diff --git a/dev/run-tests.py b/dev/run-tests.py
index 0a178e655924d..38859da5486f5 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -24,25 +24,21 @@
 import subprocess
 from collections import namedtuple
 
-SPARK_PROJ_ROOT = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
-USER_HOME_DIR = os.environ.get("HOME")
+SPARK_HOME = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")
+USER_HOME = os.environ.get("HOME")
 
-SBT_MAVEN_PROFILE_ARGS_ENV = "SBT_MAVEN_PROFILES_ARGS"
-AMPLAB_JENKINS_BUILD_TOOL = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL")
-AMPLAB_JENKINS = os.environ.get("AMPLAB_JENKINS")
-
-SBT_OUTPUT_FILTER = re.compile("^.*[info].*Resolving" + "|" + 
-                               "^.*[warn].*Merging" + "|" +
-                               "^.*[info].*Including")
+#SBT_MAVEN_PROFILE_ARGS_ENV = "SBT_MAVEN_PROFILES_ARGS"
+#AMPLAB_JENKINS_BUILD_TOOL = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
+#AMPLAB_JENKINS = os.environ.get("AMPLAB_JENKINS")
 
 
 def get_error_codes(err_code_file):
     """Function to retrieve all block numbers from the `run-tests-codes.sh`
-    file to maintain backwards compatibility with the `run-tests-jenkins` 
+    file to maintain backwards compatibility with the `run-tests-jenkins`
     script"""
-    
+
     with open(err_code_file, 'r') as f:
-        err_codes = [e.split()[1].strip().split('=') 
+        err_codes = [e.split()[1].strip().split('=')
                      for e in f if e.startswith("readonly")]
         return dict(err_codes)
 
@@ -63,13 +59,6 @@ def rm_r(path):
         os.remove(path)
 
 
-def lineno():
-    """Returns the current line number in our program
-    - from: http://stackoverflow.com/a/3056059"""
-
-    return inspect.currentframe().f_back.f_lineno
-
-
 def run_cmd(cmd):
     """Given a command as a list of arguments will attempt to execute the
     command and, on failure, print an error message"""
@@ -82,32 +71,6 @@ def run_cmd(cmd):
         exit_from_command_with_retcode(e.cmd, e.returncode)
 
 
-def set_sbt_maven_profile_args():
-    """Properly sets the SBT environment variable arguments with additional
-    checks to determine if this is running on an Amplab Jenkins machine"""
-
-    # base environment values for SBT_MAVEN_PROFILE_ARGS_ENV which will be appended on
-    sbt_maven_profile_args_base = ["-Pkinesis-asl"]
-
-    sbt_maven_profile_arg_dict = {
-        "hadoop1.0" : ["-Phadoop-1", "-Dhadoop.version=1.0.4"],
-        "hadoop2.0" : ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
-        "hadoop2.2" : ["-Pyarn", "-Phadoop-2.2"],
-        "hadoop2.3" : ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
-    }
-
-    # set the SBT maven build profile argument environment variable and ensure
-    # we build against the right version of Hadoop
-    if os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE"):
-        os.environ[SBT_MAVEN_PROFILE_ARGS_ENV] = \
-            " ".join(sbt_maven_profile_arg_dict.get(ajbp, []) 
-                     + sbt_maven_profile_args_base)
-    else:
-        os.environ[SBT_MAVEN_PROFILE_ARGS_ENV] = \
-            " ".join(sbt_maven_profile_arg_dict.get("hadoop2.3", [])
-                     + sbt_maven_profile_args_base)
-
-
 def is_exe(path):
     """Check if a given path is an executable file
     - from: http://stackoverflow.com/a/377028"""
@@ -134,7 +97,12 @@ def which(program):
 
 
 def determine_java_executable():
-    """Will return the *best* path possible for a 'java' executable or `None`"""
+    """Will return the path of the java executable that will be used by Spark's
+    tests or `None`"""
+
+    # Any changes in the way that Spark's build detects java must be reflected
+    # here. Currently the build looks for $JAVA_HOME/bin/java then falls back to
+    # the `java` executable on the path
 
     java_home = os.environ.get("JAVA_HOME")
 
@@ -144,21 +112,21 @@ def determine_java_executable():
     return java_exe if java_exe else which("java")
 
 
+JavaVersion = namedtuple('JavaVersion', ['major', 'minor', 'patch', 'update'])
+
+
 def determine_java_version(java_exe):
     """Given a valid java executable will return its version in named tuple format
     with accessors '.major', '.minor', '.patch', '.update'"""
 
-    raw_output = subprocess.check_output([java_exe, "-version"], 
+    raw_output = subprocess.check_output([java_exe, "-version"],
                                          stderr=subprocess.STDOUT)
-    raw_version_str = raw_output.split('\n')[0] # eg 'java version "1.8.0_25"'
-    version_str = raw_version_str.split()[-1].strip('"') # eg '1.8.0_25'
-    version, update = version_str.split('_') # eg ['1.8.0', '25']
-
-    JavaVersion = namedtuple('JavaVersion', 
-                             ['major', 'minor', 'patch', 'update'])
+    raw_version_str = raw_output.split('\n')[0]  # eg 'java version "1.8.0_25"'
+    version_str = raw_version_str.split()[-1].strip('"')  # eg '1.8.0_25'
+    version, update = version_str.split('_')  # eg ['1.8.0', '25']
 
     # map over the values and convert them to integers
-    version_info = map(lambda x: int(x), version.split('.') + [update])
+    version_info = [int(x) for x in version.split('.') + [update]]
 
     return JavaVersion(major=version_info[0],
                        minor=version_info[1],
@@ -166,56 +134,6 @@ def determine_java_version(java_exe):
                        update=version_info[3])
 
 
-def multi_starts_with(orig_str, *prefixes):
-    """Takes a string and an abritrary number of prefixes then checks the
-    original string for any of the possible prefixes passed in"""
-
-    for s in prefixes:
-        if orig_str.startswith(s):
-            return True
-    return False
-
-
-def determine_test_suite():
-    """This function current acts to determine if SQL tests need to be run in
-    addition to the core test suite *or* if _only_ SQL tests need to be run
-    as the git logs show that to be the only thing touched. In the future
-    this function will act more generically to help further segregate the
-    test suite runner (hence the function name).
-    @return a set of unique test names"""
-    test_suite = list()
-
-    if AMPLAB_JENKINS:
-        run_cmd(['git', 'fetch', 'origin', 'master:master'])
-
-        raw_output = subprocess.check_output(['git', 'diff', '--name-only', 'master'])
-        # remove any empty strings
-        changed_files = [f for f in raw_output.split('\n') if f]
-
-        # find any sql files
-        sql_files = [f for f in changed_files
-                     if multi_starts_with(f, 
-                                          "sql/", 
-                                          "bin/spark-sql", 
-                                          "sbin/start-thriftserver.sh")]
-
-        non_sql_files = set(changed_files).difference(set(sql_files))
-
-        if non_sql_files:
-            test_suite.append("CORE")
-        if sql_files:
-            print "[info] Detected changes in SQL. Will run Hive test suite."
-            test_suite.append("SQL")
-            if not non_sql_files:
-                print "[info] Detected no changes except in SQL. Will only run SQL tests."
-        return set(test_suite)
-    else:
-        # we aren't in the Amplab environment so merely run all tests
-        test_suite.append("CORE")
-        test_suite.append("SQL")
-        return set(test_suite)
-
-
 def set_title_and_block(title, err_block):
     os.environ["CURRENT_BLOCK"] = error_codes[err_block]
     line_str = '=' * 72
@@ -254,6 +172,10 @@ def exec_sbt(sbt_args=[]):
 
     sbt_cmd = ["./build/sbt"] + sbt_args
 
+    sbt_output_filter = re.compile("^.*[info].*Resolving" + "|" +
+                                   "^.*[warn].*Merging" + "|" +
+                                   "^.*[info].*Including")
+
     # NOTE: echo "q" is needed because sbt on encountering a build file
     # with failure (either resolution or compilation) prompts the user for
     # input either q, r, etc to quit or retry. This echo is there to make it
@@ -264,42 +186,90 @@ def exec_sbt(sbt_args=[]):
                                 stdout=subprocess.PIPE)
     echo_proc.wait()
     for line in iter(sbt_proc.stdout.readline, ''):
-        if not SBT_OUTPUT_FILTER.match(line):
-            print line,    
+        if not sbt_output_filter.match(line):
+            print line,
     retcode = sbt_proc.wait()
 
     if retcode > 0:
         exit_from_command_with_retcode(sbt_cmd, retcode)
 
 
-def build_apache_spark():
-    """Will first build Spark with Hive v0.12.0 to ensure the build is
-    successful and, after, will build Spark again against Hive v0.13.1 as the
-    tests are based off that"""
+def get_hadoop_profiles(hadoop_version):
+    """Return a list of profiles indicating which Hadoop version to use from a Hadoop version tag."""
+
+    #amplab_jenkins_build_profile = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE")
+
+    sbt_maven_hadoop_profiles = {
+        "hadoop1.0": ["-Phadoop-1", "-Dhadoop.version=1.0.4"],
+        "hadoop2.0": ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
+        "hadoop2.2": ["-Pyarn", "-Phadoop-2.2"],
+        "hadoop2.3": ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
+    }
+
+    try:
+        hadoop_profiles = sbt_maven_hadoop_profiles[hadoop_version]
+    except KeyError:
+        print "[error] Could not find", hadoop_version, "in the list. Valid options",
+        print "are 'hadoop1.0', 'hadoop2.0', 'hadoop2.2', and 'hadoop2.3'."
+        sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
+
+    return hadoop_profiles
+
+
+def get_build_profiles(hadoop_version="hadoop2.3",
+                       base_profiles=True,
+                       hive_profiles=False):
+    """Returns a list of hadoop profiles to be used as looked up from the passed in hadoop profile
+    key with the option of adding on the base and hive profiles."""
+
+    base_profiles = ["-Pkinesis-asl"]
+    hive_profiles = ["-Phive", "-Phive-thriftserver"]
+    hadoop_profiles = get_hadoop_profiles(hadoop_version)
+    
+    # first, check and add the base profiles
+    if base_profiles: build_profiles = build_profile + base_profiles
+    # second, check and add the hive profiles
+    if hive_profiles: build_profiles = build_profile + hive_profiles
+
+    return build_profiles
+
+
+def build_spark_maven(hadoop_version):
+    build_profiles = get_build_profiles(hadoop_version, hive_profiles=True)
+    mvn_goals = ["clean", "package", "-DskipTests"]
+    profiles_and_goals = build_profiles + mvn_goals
+
+    print "[info] Building Spark (w/Hive 0.13.1) with these arguments:",
+    print " ".join(profiles_and_goals)
+
+    exec_maven(profiles_and_goals)
+
+
+def build_spark_sbt(hadoop_version):
+    build_profiles = get_build_profiles(hadoop_version, hive_profiles=True)
+    sbt_goals = ["package",
+                 "assembly/assembly",
+                 "streaming-kafka-assembly/assembly"]
+    profiles_and_goals = build_profiles + sbt_goals
+
+    print "[info] Building Spark (w/Hive 0.13.1) with these arguments:",
+    print " ".join(profiles_and_goals)
+
+    exec_sbt(profiles_and_goals)
+
+
+def build_apache_spark(build_tool, hadoop_version):
+    """Will build Spark against Hive v0.13.1 given the passed in build tool (either `sbt` or
+    `maven`). Defaults to using `sbt`."""
 
     set_title_and_block("Building Spark", "BLOCK_BUILD")
 
-    sbt_maven_profile_args = os.environ.get(SBT_MAVEN_PROFILE_ARGS_ENV).split()
-    hive_profile_args = sbt_maven_profile_args + ["-Phive", 
-                                                  "-Phive-thriftserver"]
-    # set the default maven args
-    base_mvn_args = ["clean", "package", "-DskipTests"]
-    # set the necessary sbt goals
-    sbt_hive_goals = ["package", 
-                      "assembly/assembly", 
-                      "streaming-kafka-assembly/assembly"]
-
-    # Then build with default Hive version (0.13.1) because tests are based on
-    # this version
-    print "[info] Compile with Hive 0.13.1"
     rm_r("lib_managed")
-    print "[info] Building Spark with these arguments:", 
-    print " ".join(hive_profile_args)
 
-    if AMPLAB_JENKINS_BUILD_TOOL == "maven":
-        exec_maven(hive_profile_args + base_mvn_args)
+    if build_tool == "maven":
+        build_spark_maven(hadoop_version)
     else:
-        exec_sbt(hive_profile_args + sbt_hive_goals)
+        build_spark_sbt(hadoop_version)
 
 
 def detect_binary_inop_with_mima():
@@ -308,49 +278,98 @@ def detect_binary_inop_with_mima():
     run_cmd(["./dev/mima"])
 
 
-def run_scala_tests(test_suite=[]):
-    """Function to properly execute all tests pass in, as a list, from the
-    `determine_test_suite` function"""
-    set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
+def determine_test_modules(test_env):
+    """This function current acts to determine if SQL tests need to be run in
+    addition to the core test suite *or* if _only_ SQL tests need to be run
+    as the git logs show that to be the only thing touched. In the future
+    this function will act more generically to help further segregate the
+    test suite runner (hence the function name).
+    @return a set of unique test names"""
+    test_suite = list()
 
-    # ensure the test_suite is a set
-    if not isinstance(test_suite, set): 
-        test_suite = set(test_suite)
+    if test_env == "amplab_jenkins":
+        target_branch = os.environ.get("ghprbTargetBranch")
+        run_cmd(['git', 'fetch', 'origin', target_branch+":"+target_branch])
 
-    # if the Spark SQL tests are enabled, run the tests with the Hive profiles 
-    # enabled.
-    if "SQL" in test_suite:
-        sbt_maven_profile_args = \
-            os.environ.get(SBT_MAVEN_PROFILE_ARGS_ENV).split()
-        os.environ[SBT_MAVEN_PROFILE_ARGS_ENV] = \
-            " ".join(sbt_maven_profile_args + ["-Phive", "-Phive-thriftserver"])
-
-    # if we only have changes in SQL build a custom test string
-    if "SQL" in test_suite and "CORE" not in test_suite:
-        sbt_maven_test_args = ["catalyst/test",
-                               "sql/test",
-                               "hive/test", 
-                               "hive-thriftserver/test",
-                               "mllib/test"]
+        raw_output = subprocess.check_output(['git', 'diff', '--name-only', target_branch])
+        # remove any empty strings
+        changed_files = [f for f in raw_output.split('\n') if f]
+
+        # find any sql files
+        sql_files = [f for f in changed_files
+                     if any(f.startswith(p) for p in ["sql/",
+                                                      "bin/spark-sql",
+                                                      "sbin/start-thriftserver.sh"])]
+
+        non_sql_files = set(changed_files).difference(set(sql_files))
+
+        if non_sql_files:
+            test_suite.append("CORE")
+        if sql_files:
+            print "[info] Detected changes in SQL. Will run Hive test suite."
+            test_suite.append("SQL")
+            if not non_sql_files:
+                print "[info] Detected no changes except in SQL. Will only run SQL tests."
+        return set(test_suite)
     else:
-        sbt_maven_test_args = ["test"]
+        # we aren't in the Amplab environment so simply run all tests
+        test_suite.append("CORE")
+        test_suite.append("SQL")
+        return set(test_suite)
+
+
+def run_scala_tests_maven(test_profiles):
+    mvn_test_goals = ["test", "--fail-at-end"]
+    profiles_and_goals = test_profiles + mvn_test_goals
+
+    print "[info] Running Spark tests with these arguments:",
+    print " ".join(profiles_and_goals)
+
+    exec_maven(profiles_and_goals)
 
-    # get the latest sbt maven profile arguments
-    sbt_maven_profile_args = os.environ.get(SBT_MAVEN_PROFILE_ARGS_ENV).split()
+
+def run_scala_tests_sbt(test_modules, test_profiles):
+    # if we only have changes in SQL build a custom test list
+    if "SQL" in test_modules and "CORE" not in test_modules:
+        sbt_test_goals = ["catalyst/test",
+                          "sql/test",
+                          "hive/test",
+                          "hive-thriftserver/test",
+                          "mllib/test"]
+    else:
+        sbt_test_goals = ["test"]
+
+    profiles_and_goals = test_profiles + sbt_test_goals
 
     print "[info] Running Spark tests with these arguments:",
-    print " ".join(sbt_maven_profile_args), 
-    print " ".join(sbt_maven_test_args)
+    print " ".join(profiles_and_goals)
+
+    exec_sbt(profiles_and_goals)
+
+
+def run_scala_tests(build_tool, hadoop_version, test_modules):
+    """Function to properly execute all tests passed in as a set from the
+    `determine_test_suites` function"""
+    set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
+
+    test_modules = set(test_modules)
 
-    if AMPLAB_JENKINS_BUILD_TOOL == "maven":
-        exec_maven(["test"] + sbt_maven_profile_args + ["--fail-at-end"])
+    # if the Spark SQL tests are enabled, run the tests with the Hive profiles
+    # enabled.
+    if "SQL" in test_modules:
+        test_profiles = get_build_profiles(hadoop_version, hive_profiles=True)
+    else:
+        test_profiles = get_build_profiles(hadoop_version)
+
+    if build_tool == "maven":
+        run_scala_tests_maven(test_profiles)
     else:
-        exec_sbt(sbt_maven_profile_args + sbt_maven_test_args)
+        run_scala_tests_sbt(test_modules, test_profiles)
 
 
-def run_python_tests(test_suite=[]):
+def run_python_tests():
     set_title_and_block("Running PySpark tests", "BLOCK_PYSPARK_UNIT_TESTS")
-    
+
     # Add path for Python3 in Jenkins if we're calling from a Jenkins machine
     if AMPLAB_JENKINS:
         os.environ["PATH"] = os.environ.get("PATH")+":/home/anaconda/envs/py3k/bin"
@@ -358,7 +377,7 @@ def run_python_tests(test_suite=[]):
     run_cmd(["./python/run-tests"])
 
 
-def run_sparkr_tests(test_suite=[]):
+def run_sparkr_tests():
     set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS")
 
     if which("R"):
@@ -367,29 +386,28 @@ def run_sparkr_tests(test_suite=[]):
     else:
         print "Ignoring SparkR tests as R was not found in PATH"
 
-if __name__ == "__main__":
+
+def main():
     # Ensure the user home directory (HOME) is valid and is an absolute directory
-    if not USER_HOME_DIR or not os.path.isabs(USER_HOME_DIR):
+    if not USER_HOME or not os.path.isabs(USER_HOME):
         print "[error] Cannot determine your home directory as an absolute path;",
         print "ensure the $HOME environment variable is set properly."
         sys.exit(1)
 
-    os.chdir(SPARK_PROJ_ROOT)
+    os.chdir(SPARK_HOME)
 
-    rm_r("./work")
-    rm_r(os.path.join(USER_HOME_DIR, ".ivy2/local/org.apache.spark"))
-    rm_r(os.path.join(USER_HOME_DIR, ".ivy2/cache/org.apache.spark"))
+    rm_r(os.path.join(SPARK_HOME, "work"))
+    rm_r(os.path.join(USER_HOME, ".ivy2/local/org.apache.spark"))
+    rm_r(os.path.join(USER_HOME, ".ivy2/cache/org.apache.spark"))
 
     error_codes = get_error_codes("./dev/run-tests-codes.sh")
 
     os.environ["CURRENT_BLOCK"] = error_codes["BLOCK_GENERAL"]
 
-    set_sbt_maven_profile_args()
-
     java_exe = determine_java_executable()
 
     if not java_exe:
-        print "[error] Cannot find a version of `java` on the system; please", 
+        print "[error] Cannot find a version of `java` on the system; please",
         print "install one and retry."
         sys.exit(2)
 
@@ -398,20 +416,36 @@ def run_sparkr_tests(test_suite=[]):
     if java_version.minor < 8:
         print "[warn] Java 8 tests will not run because JDK version is < 1.8."
 
-    test_suite = determine_test_suite()
+    if os.environ.get("AMPLAB_JENKINS"):
+        # if we're on the Amplab Jenkins build servers setup variables
+        # to reflect the environment settings
+        build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
+        hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.3")
+        test_env="amplab_jenkins"
+    else:
+        # else we're running locally and can use local settings
+        build_tool = "sbt"
+        hadoop_version = "hadoop2.3"
+        test_env="local"
 
+    # license checks
     run_apache_rat_checks()
-    
-    run_scala_style_checks()
 
+    # style checks
+    run_scala_style_checks()
     run_python_style_checks()
 
-    build_apache_spark()
+    # spark build
+    build_apache_spark(build_tool, hadoop_version)
 
+    # backwards compatibility checks
     detect_binary_inop_with_mima()
 
-    run_scala_tests(test_suite)
-
+    # test suites
+    test_modules = determine_test_modules(test_env)
+    run_scala_tests(build_tool, hadoop_version, test_modules)
     run_python_tests()
-
     run_sparkr_tests()
+
+if __name__ == "__main__":
+    main()