Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
2bard committed Feb 17, 2019
0 parents commit f3e8a64
Show file tree
Hide file tree
Showing 7 changed files with 382 additions and 0 deletions.
27 changes: 27 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
group '2bard'
version '0.1'

apply plugin: 'java'

sourceCompatibility = 1.8

repositories {
mavenCentral()
}

dependencies {
compile 'org.codehaus.groovy:groovy-all:2.3.11'
compile 'commons-io:commons-io:2.6'
compile 'commons-logging:commons-logging:1.2'
compile 'commons-net:commons-net:3.6'
compile 'net.sourceforge.htmlunit:htmlunit-core-js:2.33'
compile 'net.sourceforge.htmlunit:htmlunit-cssparser:1.2.0'
compile 'net.sourceforge.htmlunit:neko-htmlunit:2.33'
compile 'org.apache.commons:commons-lang3:3.8'
compile 'org.apache.commons:commons-text:1.4'
compile 'org.apache.httpcomponents:httpmime:4.5.6'
compile 'org.eclipse.jetty.websocket:websocket-client:9.4.12.v20180830'
compile 'xalan:xalan:2.7.2'
compile group: 'org.seleniumhq.selenium', name: 'selenium-server', version: '3.141.59'

}
Binary file added gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
6 changes: 6 additions & 0 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#Fri Feb 15 23:03:54 GMT 2019
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-4.0-all.zip
172 changes: 172 additions & 0 deletions gradlew
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/env sh

##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
echo "$*"
}

die () {
echo
echo "$*"
echo
exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar

# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`

# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option

if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi

# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi

exec "$JAVACMD" "$@"
84 changes: 84 additions & 0 deletions gradlew.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto init

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:init
@rem Get command-line arguments, handling Windows variants

if not "%OS%" == "Windows_NT" goto win9xME_args

:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2

:win9xME_args_slurp
if "x%~1" == "x" goto execute

set CMD_LINE_ARGS=%*

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar

@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega
2 changes: 2 additions & 0 deletions settings.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
rootProject.name = 'sounddownloader'

91 changes: 91 additions & 0 deletions src/main/java/SoundDownloader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import java.io.File;
import java.net.URL;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;


public class SoundDownloader {

public static final String BASE_URL = "http://bbcsfx.acropolis.org.uk/?page=%s";
public static final String FILE_EXTENSION = "wav";
public static final int TOTAL_PAGES = 641;

public static final int ITEMS_PER_PAGE = 25;
public static final int WAIT_TIME = 10;

//Connection and read timeout values for downloading files
public static final int CONNECT_TIMEOUT = 10000;
public static final int READ_TIMEOUT = 10000;

public static void main(String[] args) {
WebDriver webDriver = new ChromeDriver();
try {

int currentPage = 1;

if(args.length > 0){
currentPage = Integer.parseInt(args[0]);
}

if(currentPage <= 0 || currentPage > TOTAL_PAGES){
throw new Exception("Start page must be above zero and below " + (TOTAL_PAGES + 1));
}

int endPage = (args.length > 1) ? Integer.parseInt(args[1]) : TOTAL_PAGES;

if(endPage <= 0 || endPage > TOTAL_PAGES){
throw new Exception("End page must be above zero and below " + (TOTAL_PAGES + 1));
} else if (endPage < currentPage){
throw new Exception("End page must be less than start page");
}

String pageUrl = String.format(BASE_URL, new String[]{Integer.toString(currentPage)});
webDriver.get(pageUrl);

String fileDescription;
String category;
String fileUrl;

while(currentPage <= endPage){
pageUrl = String.format(BASE_URL, new String[]{Integer.toString(currentPage)});
System.out.println("Downloading page : " + pageUrl);
webDriver.manage().timeouts().implicitlyWait(WAIT_TIME, TimeUnit.SECONDS);

for(int i = 1 ; i <= ITEMS_PER_PAGE; i++){
fileDescription = ((ChromeDriver) webDriver).findElementsByXPath("//*[@id=\"example\"]/tbody/tr[" + i + "]/td[1]").get(0).getText();
category = ((ChromeDriver) webDriver).findElementsByXPath("//*[@id=\"example\"]/tbody/tr[" + i + "]/td[2]").get(0).getText();
fileUrl = ((ChromeDriver) webDriver).findElementByXPath("//*[@id=\"example\"]/tbody/tr[" + i + "]/td[5]/a").getAttribute("href");

//This is strange - need to think of something smarter here
fileDescription = fileDescription.replace("/","~");

if(fileDescription.contains(".") && fileDescription.lastIndexOf(".") == fileDescription.length() - 1){
downloadFile(fileUrl, category + " - " + fileDescription + FILE_EXTENSION);
} else {
downloadFile(fileUrl, category + " - " + fileDescription + "." + FILE_EXTENSION);
}
}

((ChromeDriver) webDriver).findElementById("example_next").click();
currentPage++;
}
} catch( NumberFormatException e){
System.out.println("Input error. Please provide arguments larger above zero and below " + (TOTAL_PAGES + 1));
} catch (Exception e) {
e.printStackTrace();
System.out.println("Error: " + e.getMessage());
}
}

private static void downloadFile(String url, String filename) throws Exception{
FileUtils.copyURLToFile(
new URL(url),
new File(filename),
CONNECT_TIMEOUT,
READ_TIMEOUT);
System.out.println("Downloaded file: " + filename);
}

}

0 comments on commit f3e8a64

Please sign in to comment.