diff --git a/.github/scripts/analyze_dependencies_script.sh b/.github/scripts/analyze_dependencies_script.sh index c92d90030f71..1212dbd1cd2b 100755 --- a/.github/scripts/analyze_dependencies_script.sh +++ b/.github/scripts/analyze_dependencies_script.sh @@ -15,7 +15,7 @@ #!bin/bash -${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true ${HADOOP_PROFILE} || +${MVN} ${MAVEN_SKIP} dependency:analyze -DoutputXML=true -DignoreNonCompile=true -DfailOnWarning=true || { echo " The dependency analysis has found a dependency that is either: diff --git a/.github/scripts/license_checks_script.sh b/.github/scripts/license_checks_script.sh index 410ac60375fa..163214f82988 100755 --- a/.github/scripts/license_checks_script.sh +++ b/.github/scripts/license_checks_script.sh @@ -20,7 +20,7 @@ set -e ./.github/scripts/setup_generate_license.sh ${MVN} apache-rat:check -Prat --fail-at-end \ -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn \ --Drat.consoleOutput=true ${HADOOP_PROFILE} +-Drat.consoleOutput=true # Generate dependency reports and checks they are valid. mkdir -p target distribution/bin/generate-license-dependency-reports.py . target --clean-maven-artifact-transfer --parallel 2 diff --git a/.github/workflows/cron-job-its.yml b/.github/workflows/cron-job-its.yml index 3752b6c60eae..65471ad81b05 100644 --- a/.github/workflows/cron-job-its.yml +++ b/.github/workflows/cron-job-its.yml @@ -111,8 +111,6 @@ jobs: name: security vulnerabilities strategy: fail-fast: false - matrix: - HADOOP_PROFILE: [ '', '-Phadoop2' ] runs-on: ubuntu-latest steps: - name: Checkout branch @@ -129,10 +127,8 @@ jobs: run: mvn clean install dependency:go-offline -P dist -P skip-static-checks,skip-tests -Dmaven.javadoc.skip=true -Dcyclonedx.skip=true -Dweb.console.skip=true - name: security vulnerabilities check - env: - HADOOP_PROFILE: ${{ matrix.HADOOP_PROFILE }} run: | - mvn dependency-check:purge dependency-check:check ${HADOOP_PROFILE} || { echo " + mvn dependency-check:purge dependency-check:check || { echo " The OWASP dependency check has found security vulnerabilities. Please use a newer version of the dependency that does not have vulnerabilities. To see a report run `mvn dependency-check:check` diff --git a/.github/workflows/static-checks.yml b/.github/workflows/static-checks.yml index 8fc05d35bd87..9cdb1d3caef7 100644 --- a/.github/workflows/static-checks.yml +++ b/.github/workflows/static-checks.yml @@ -95,13 +95,6 @@ jobs: run: | ./.github/scripts/analyze_dependencies_script.sh - - name: analyze dependencies for hadoop2 - if: ${{ matrix.java == 'jdk8' }} - env: - HADOOP_PROFILE: -Phadoop2 - run: | - ./.github/scripts/analyze_dependencies_script.sh - - name: animal sniffer checks if: ${{ matrix.java == 'jdk8' }} run: ${MVN} animal-sniffer:check --fail-at-end diff --git a/distribution/pom.xml b/distribution/pom.xml index 1c7f2ca56c49..ecc00a9155d2 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -115,191 +115,6 @@ - - dist-hadoop2 - - false - - tar - - - - - - org.codehaus.mojo - exec-maven-plugin - - - generate-readme - initialize - - exec - - - ${project.basedir}/bin/build-textfile-readme.sh - - ${project.basedir}/../ - ${project.parent.version} - - - - - generate-binary-license - initialize - - exec - - - ${project.basedir}/bin/generate-binary-license.py - - ${project.parent.basedir}/licenses/APACHE2 - ${project.parent.basedir}/licenses.yaml - ${project.parent.basedir}/LICENSE.BINARY - - - - - generate-binary-notice - initialize - - exec - - - ${project.basedir}/bin/generate-binary-notice.py - - ${project.parent.basedir}/NOTICE - ${project.parent.basedir}/licenses.yaml - ${project.parent.basedir}/NOTICE.BINARY - - - - - pull-deps - package - - exec - - - ${project.parent.basedir}/examples/bin/run-java - - -classpath - - -Ddruid.extensions.loadList=[] - -Ddruid.extensions.directory=${project.build.directory}/extensions - - - -Ddruid.extensions.hadoopDependenciesDir=${project.build.directory}/hadoop-dependencies - - -Dhadoop2.enabled=true - org.apache.druid.cli.Main - tools - pull-deps - --clean - --defaultVersion - ${project.parent.version} - -l - ${settings.localRepository} - -h - org.apache.hadoop:hadoop-client:${hadoop.compile.version} - -c - org.apache.druid.extensions:druid-avro-extensions - -c - org.apache.druid.extensions:druid-azure-extensions - -c - org.apache.druid.extensions:druid-bloom-filter - -c - org.apache.druid.extensions:druid-datasketches - -c - org.apache.druid.extensions:druid-hdfs-storage - -c - org.apache.druid.extensions:druid-histogram - -c - org.apache.druid.extensions:druid-kafka-extraction-namespace - -c - org.apache.druid.extensions:druid-kafka-indexing-service - -c - org.apache.druid.extensions:druid-kinesis-indexing-service - -c - org.apache.druid.extensions:druid-lookups-cached-global - -c - org.apache.druid.extensions:druid-lookups-cached-single - -c - org.apache.druid.extensions:druid-multi-stage-query - -c - org.apache.druid.extensions:druid-protobuf-extensions - -c - org.apache.druid.extensions:mysql-metadata-storage - -c - org.apache.druid.extensions:druid-orc-extensions - -c - org.apache.druid.extensions:druid-parquet-extensions - -c - org.apache.druid.extensions:postgresql-metadata-storage - -c - org.apache.druid.extensions:druid-kerberos - -c - org.apache.druid.extensions:druid-s3-extensions - -c - org.apache.druid.extensions:druid-aws-rds-extensions - -c - org.apache.druid.extensions:druid-ec2-extensions - -c - org.apache.druid.extensions:druid-google-extensions - -c - org.apache.druid.extensions:druid-stats - -c - org.apache.druid.extensions:simple-client-sslcontext - -c - org.apache.druid.extensions:druid-basic-security - -c - org.apache.druid.extensions:druid-pac4j - -c - org.apache.druid.extensions:druid-ranger-security - -c - org.apache.druid.extensions:druid-kubernetes-extensions - -c - org.apache.druid.extensions:druid-catalog - ${druid.distribution.pulldeps.opts} - - - - - - - org.apache.maven.plugins - maven-assembly-plugin - - - distro-assembly - package - - single - - - apache-druid-${project.parent.version} - posix - - src/assembly/assembly.xml - - - - - - - org.codehaus.mojo - license-maven-plugin - - - download-licenses - - download-licenses - - - - - - - dist diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 6a0d65ae1d5f..753045a92a90 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -1538,7 +1538,7 @@ Additional peon configs include: |`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`| |`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/task`| |`druid.indexer.task.batchProcessingMode`| Batch ingestion tasks have three operating modes to control construction and tracking for intermediary segments: `OPEN_SEGMENTS`, `CLOSED_SEGMENTS`, and `CLOSED_SEGMENT_SINKS`. `OPEN_SEGMENTS` uses the streaming ingestion code path and performs a `mmap` on intermediary segments to build a timeline to make these segments available to realtime queries. Batch ingestion doesn't require intermediary segments, so the default mode, `CLOSED_SEGMENTS`, eliminates `mmap` of intermediary segments. `CLOSED_SEGMENTS` mode still tracks the entire set of segments in heap. The `CLOSED_SEGMENTS_SINKS` mode is the most aggressive configuration and should have the smallest memory footprint. It eliminates in-memory tracking and `mmap` of intermediary segments produced during segment creation. `CLOSED_SEGMENTS_SINKS` mode isn't as well tested as other modes so is currently considered experimental. You can use `OPEN_SEGMENTS` mode if problems occur with the 2 newer modes. |`CLOSED_SEGMENTS`| -|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.5| +|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|`org.apache.hadoop:hadoop-client-api:3.3.6`, `org.apache.hadoop:hadoop-client-runtime:3.3.6`| |`druid.indexer.task.defaultRowFlushBoundary`|Highest row count before persisting to disk. Used for indexing generating tasks.|75000| |`druid.indexer.task.directoryLockTimeout`|Wait this long for zombie peons to exit before giving up on their replacements.|PT10M| |`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M| @@ -1609,7 +1609,7 @@ then the value from the configuration below is used: |`druid.worker.numConcurrentMerges`|Maximum number of segment persist or merge operations that can run concurrently across all tasks.|`druid.worker.capacity` / 2, rounded down| |`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`| |`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/tasks`| -|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.5| +|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|`org.apache.hadoop:hadoop-client-api:3.3.6`, `org.apache.hadoop:hadoop-client-runtime:3.3.6`| |`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on Indexer restart for restorable tasks to gracefully exit.|PT5M| |`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`| |`druid.indexer.task.restoreTasksOnRestart`|If true, the Indexer will attempt to stop tasks gracefully on shutdown and restore them on restart.|false| diff --git a/docs/development/extensions-core/hdfs.md b/docs/development/extensions-core/hdfs.md index b5264db0fb68..32ef6133a9d4 100644 --- a/docs/development/extensions-core/hdfs.md +++ b/docs/development/extensions-core/hdfs.md @@ -112,7 +112,7 @@ example properties. Please follow the instructions at [https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md](https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md) for more details. For more configurations, [GCS core default](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/v2.0.0/gcs/conf/gcs-core-default.xml) -and [GCS core template](https://github.com/GoogleCloudPlatform/bdutil/blob/master/conf/hadoop2/gcs-core-template.xml). +and [GCS core template](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcs/src/test/resources/core-site.xml). ```xml @@ -147,8 +147,6 @@ and [GCS core template](https://github.com/GoogleCloudPlatform/bdutil/blob/maste ``` -Tested with Druid 0.17.0, Hadoop 2.8.5 and gcs-connector jar 2.0.0-hadoop2. - ## Reading data from HDFS or Cloud Storage ### Native batch ingestion diff --git a/docs/ingestion/hadoop.md b/docs/ingestion/hadoop.md index cb7f03083318..c34fdb921172 100644 --- a/docs/ingestion/hadoop.md +++ b/docs/ingestion/hadoop.md @@ -180,7 +180,7 @@ Once you install the GCS Connector jar in all MiddleManager and Indexer processe your Google Cloud Storage paths in the inputSpec with the below job properties. For more configurations, see the [instructions to configure Hadoop](https://github.com/GoogleCloudPlatform/bigdata-interop/blob/master/gcs/INSTALL.md#configure-hadoop), [GCS core default](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/v2.0.0/gcs/conf/gcs-core-default.xml) -and [GCS core template](https://github.com/GoogleCloudPlatform/bdutil/blob/master/conf/hadoop2/gcs-core-template.xml). +and [GCS core template](https://github.com/GoogleCloudDataproc/hadoop-connectors/blob/master/gcs/src/test/resources/core-site.xml). ``` "paths" : "gs://billy-bucket/the/data/is/here/data.gz,gs://billy-bucket/the/data/is/here/moredata.gz,gs://billy-bucket/the/data/is/here/evenmoredata.gz" diff --git a/docs/operations/other-hadoop.md b/docs/operations/other-hadoop.md index 14a141a1958e..f5e5839a907c 100644 --- a/docs/operations/other-hadoop.md +++ b/docs/operations/other-hadoop.md @@ -89,7 +89,7 @@ classloader. 2. Batch ingestion uses jars from `hadoop-dependencies/` to submit Map/Reduce jobs (location customizable via the `druid.extensions.hadoopDependenciesDir` runtime property; see [Configuration](../configuration/index.md#extensions)). -`hadoop-client:2.8.5` is the default version of the Hadoop client bundled with Druid for both purposes. This works with +The default version of the Hadoop client bundled with Druid is `3.3.6`. This works with many Hadoop distributions (the version does not necessarily need to match), but if you run into issues, you can instead have Druid load libraries that exactly match your distribution. To do this, either copy the jars from your Hadoop cluster, or use the `pull-deps` tool to download the jars from a Maven repository. diff --git a/docs/tutorials/tutorial-batch-hadoop.md b/docs/tutorials/tutorial-batch-hadoop.md index 065dc76c1e0b..ec3c2014dd9f 100644 --- a/docs/tutorials/tutorial-batch-hadoop.md +++ b/docs/tutorials/tutorial-batch-hadoop.md @@ -38,18 +38,18 @@ Once the Docker install is complete, please proceed to the next steps in the tut ## Build the Hadoop docker image -For this tutorial, we've provided a Dockerfile for a Hadoop 2.8.5 cluster, which we'll use to run the batch indexing task. +For this tutorial, we've provided a Dockerfile for a Hadoop 3.3.6 cluster, which we'll use to run the batch indexing task. This Dockerfile and related files are located at `quickstart/tutorial/hadoop/docker`. -From the apache-druid-{{DRUIDVERSION}} package root, run the following commands to build a Docker image named "druid-hadoop-demo" with version tag "2.8.5": +From the apache-druid-{{DRUIDVERSION}} package root, run the following commands to build a Docker image named "druid-hadoop-demo" with version tag "3.3.6": ```bash cd quickstart/tutorial/hadoop/docker -docker build -t druid-hadoop-demo:2.8.5 . +docker build -t druid-hadoop-demo:3.3.6 . ``` -This will start building the Hadoop image. Once the image build is done, you should see the message `Successfully tagged druid-hadoop-demo:2.8.5` printed to the console. +This will start building the Hadoop image. Once the image build is done, you should see the message `Successfully tagged druid-hadoop-demo:3.3.6` printed to the console. ## Setup the Hadoop docker cluster @@ -77,7 +77,7 @@ On the host machine, add the following entry to `/etc/hosts`: Once the `/tmp/shared` folder has been created and the `etc/hosts` entry has been added, run the following command to start the Hadoop container. ```bash -docker run -it -h druid-hadoop-demo --name druid-hadoop-demo -p 2049:2049 -p 2122:2122 -p 8020:8020 -p 8021:8021 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 9000:9000 -p 10020:10020 -p 19888:19888 -p 34455:34455 -p 49707:49707 -p 50010:50010 -p 50020:50020 -p 50030:50030 -p 50060:50060 -p 50070:50070 -p 50075:50075 -p 50090:50090 -p 51111:51111 -v /tmp/shared:/shared druid-hadoop-demo:2.8.5 /etc/bootstrap.sh -bash +docker run -it -h druid-hadoop-demo --name druid-hadoop-demo -p 2049:2049 -p 2122:2122 -p 8020:8020 -p 8021:8021 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 9000:9000 -p 10020:10020 -p 19888:19888 -p 34455:34455 -p 49707:49707 -p 50010:50010 -p 50020:50020 -p 50030:50030 -p 50060:50060 -p 50070:50070 -p 50075:50075 -p 50090:50090 -p 51111:51111 -v /tmp/shared:/shared druid-hadoop-demo:3.3.6 /etc/bootstrap.sh -bash ``` Once the container is started, your terminal will attach to a bash shell running inside the container: @@ -125,6 +125,7 @@ cd /usr/local/hadoop/bin ./hdfs dfs -mkdir /druid ./hdfs dfs -mkdir /druid/segments ./hdfs dfs -mkdir /quickstart +./hdfs dfs -mkdir /user ./hdfs dfs -chmod 777 /druid ./hdfs dfs -chmod 777 /druid/segments ./hdfs dfs -chmod 777 /quickstart @@ -205,10 +206,10 @@ We've included a sample of Wikipedia edits from September 12, 2015 to get you st To load this data into Druid, you can submit an *ingestion task* pointing to the file. We've included a task that loads the `wikiticker-2015-09-12-sampled.json.gz` file included in the archive. -Let's submit the `wikipedia-index-hadoop.json` task: +Let's submit the `wikipedia-index-hadoop3.json` task: ```bash -bin/post-index-task --file quickstart/tutorial/wikipedia-index-hadoop.json --url http://localhost:8081 +bin/post-index-task --file quickstart/tutorial/wikipedia-index-hadoop3.json --url http://localhost:8081 ``` ## Querying your data diff --git a/examples/quickstart/tutorial/hadoop/docker/Dockerfile b/examples/quickstart/tutorial/hadoop/docker/Dockerfile index fd71f5942fad..7739e2955456 100644 --- a/examples/quickstart/tutorial/hadoop/docker/Dockerfile +++ b/examples/quickstart/tutorial/hadoop/docker/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Creates pseudo distributed hadoop 2.8.5 with java 8 +# Creates pseudo distributed hadoop 3.3.6 with java 8 FROM centos:7 USER root @@ -56,32 +56,40 @@ ENV PATH $PATH:$JAVA_HOME/bin # hadoop ARG APACHE_ARCHIVE_MIRROR_HOST=https://archive.apache.org -RUN curl -s ${APACHE_ARCHIVE_MIRROR_HOST}/dist/hadoop/core/hadoop-2.8.5/hadoop-2.8.5.tar.gz | tar -xz -C /usr/local/ -RUN cd /usr/local && ln -s ./hadoop-2.8.5 hadoop +RUN curl -s ${APACHE_ARCHIVE_MIRROR_HOST}/dist/hadoop/core/hadoop-3.3.6/hadoop-3.3.6.tar.gz | tar -xz -C /usr/local/ +RUN cd /usr/local && ln -s ./hadoop-3.3.6 hadoop -ENV HADOOP_PREFIX /usr/local/hadoop +ENV HADOOP_HOME /usr/local/hadoop ENV HADOOP_COMMON_HOME /usr/local/hadoop ENV HADOOP_HDFS_HOME /usr/local/hadoop ENV HADOOP_MAPRED_HOME /usr/local/hadoop ENV HADOOP_YARN_HOME /usr/local/hadoop ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop -ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop +ENV YARN_CONF_DIR $HADOOP_HOME/etc/hadoop -RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/lib/jvm/zulu8\nexport HADOOP_PREFIX=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh -RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh +# in hadoop 3 the example file is nearly empty so we can just append stuff +RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu8' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HADOOP_HOME=/usr/local/hadoop' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HDFS_NAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HDFS_DATANODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export HDFS_SECONDARYNAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export YARN_RESOURCEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh +RUN sed -i '$ a export YARN_NODEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN mkdir $HADOOP_PREFIX/input -RUN cp $HADOOP_PREFIX/etc/hadoop/*.xml $HADOOP_PREFIX/input +RUN cat $HADOOP_HOME/etc/hadoop/hadoop-env.sh + +RUN mkdir $HADOOP_HOME/input +RUN cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input # pseudo distributed -ADD core-site.xml.template $HADOOP_PREFIX/etc/hadoop/core-site.xml.template +ADD core-site.xml.template $HADOOP_HOME/etc/hadoop/core-site.xml.template RUN sed s/HOSTNAME/localhost/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml -ADD hdfs-site.xml $HADOOP_PREFIX/etc/hadoop/hdfs-site.xml - -ADD mapred-site.xml $HADOOP_PREFIX/etc/hadoop/mapred-site.xml -ADD yarn-site.xml $HADOOP_PREFIX/etc/hadoop/yarn-site.xml +ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml +ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml +ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml -RUN $HADOOP_PREFIX/bin/hdfs namenode -format +RUN $HADOOP_HOME/bin/hdfs namenode -format ADD ssh_config /root/.ssh/config RUN chmod 600 /root/.ssh/config @@ -120,16 +128,16 @@ RUN echo -e \ /usr/local/bin/start_sshd && \ chmod a+x /usr/local/bin/start_sshd -RUN start_sshd && $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh && $HADOOP_PREFIX/sbin/start-dfs.sh && $HADOOP_PREFIX/bin/hdfs dfs -mkdir -p /user/root -RUN start_sshd && $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh && $HADOOP_PREFIX/sbin/start-dfs.sh && $HADOOP_PREFIX/bin/hdfs dfs -put $HADOOP_PREFIX/etc/hadoop/ input +RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh +RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh CMD ["/etc/bootstrap.sh", "-d"] # Hdfs ports -EXPOSE 50010 50020 50070 50075 50090 8020 9000 +EXPOSE 8020 9000 9820 9864 9865 9866 9867 9868 9869 9870 9871 50010 50020 50070 50075 50090 # Mapred ports EXPOSE 10020 19888 #Yarn ports EXPOSE 8030 8031 8032 8033 8040 8042 8088 #Other ports -EXPOSE 49707 2122 +EXPOSE 2122 49707 \ No newline at end of file diff --git a/examples/quickstart/tutorial/hadoop/docker/bootstrap.sh b/examples/quickstart/tutorial/hadoop/docker/bootstrap.sh old mode 100755 new mode 100644 index 053662b95245..d1fa493d4ea6 --- a/examples/quickstart/tutorial/hadoop/docker/bootstrap.sh +++ b/examples/quickstart/tutorial/hadoop/docker/bootstrap.sh @@ -15,23 +15,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -: ${HADOOP_PREFIX:=/usr/local/hadoop} +: ${HADOOP_HOME:=/usr/local/hadoop} -$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh +$HADOOP_HOME/etc/hadoop/hadoop-env.sh rm /tmp/*.pid - # installing libraries if any - (resource urls added comma separated to the ACP system variable) -cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - +cd $HADOOP_HOME/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - # altering the core-site configuration sed s/HOSTNAME/$HOSTNAME/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml start_sshd -$HADOOP_PREFIX/sbin/start-dfs.sh -$HADOOP_PREFIX/sbin/start-yarn.sh -$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver +$HADOOP_HOME/sbin/start-dfs.sh +$HADOOP_HOME/sbin/start-yarn.sh +$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver if [[ $1 == "-d" ]]; then while true; do sleep 1000; done diff --git a/examples/quickstart/tutorial/hadoop3/docker/Dockerfile b/examples/quickstart/tutorial/hadoop3/docker/Dockerfile deleted file mode 100644 index c0475651ee7f..000000000000 --- a/examples/quickstart/tutorial/hadoop3/docker/Dockerfile +++ /dev/null @@ -1,143 +0,0 @@ -# Based on the SequenceIQ hadoop-docker project hosted at -# https://github.com/sequenceiq/hadoop-docker, and modified at -# the Apache Software Foundation (ASF). -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Creates pseudo distributed hadoop 3.3.1 with java 8 -FROM centos:7 - -USER root - -# install dev tools -RUN yum clean all \ - && rpm --rebuilddb \ - && yum install -y curl which tar sudo openssh-server openssh-clients rsync yum-plugin-ovl\ - && yum clean all \ - && yum update -y libselinux \ - && yum update -y nss \ - && yum clean all -# update libselinux. see https://github.com/sequenceiq/hadoop-docker/issues/14 -# update nss. see https://unix.stackexchange.com/questions/280548/curl-doesnt-connect-to-https-while-wget-does-nss-error-12286 - -# passwordless ssh -RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key -RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key -RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa -RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys - -# -# Pull Zulu OpenJDK binaries from official repository: -# - -ARG ZULU_REPO_VER=1.0.0-1 - -RUN rpm --import http://repos.azulsystems.com/RPM-GPG-KEY-azulsystems && \ - curl -sLO https://cdn.azul.com/zulu/bin/zulu-repo-${ZULU_REPO_VER}.noarch.rpm && \ - rpm -ivh zulu-repo-${ZULU_REPO_VER}.noarch.rpm && \ - yum -q -y update && \ - yum -q -y upgrade && \ - yum -q -y install zulu8-jdk && \ - yum clean all && \ - rm -rf /var/cache/yum zulu-repo_${ZULU_REPO_VER}.noarch.rpm - -ENV JAVA_HOME=/usr/lib/jvm/zulu8 -ENV PATH $PATH:$JAVA_HOME/bin - -# hadoop -ARG APACHE_ARCHIVE_MIRROR_HOST=https://archive.apache.org -RUN curl -s ${APACHE_ARCHIVE_MIRROR_HOST}/dist/hadoop/core/hadoop-3.3.1/hadoop-3.3.1.tar.gz | tar -xz -C /usr/local/ -RUN cd /usr/local && ln -s ./hadoop-3.3.1 hadoop - -ENV HADOOP_HOME /usr/local/hadoop -ENV HADOOP_COMMON_HOME /usr/local/hadoop -ENV HADOOP_HDFS_HOME /usr/local/hadoop -ENV HADOOP_MAPRED_HOME /usr/local/hadoop -ENV HADOOP_YARN_HOME /usr/local/hadoop -ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop -ENV YARN_CONF_DIR $HADOOP_HOME/etc/hadoop - -# in hadoop 3 the example file is nearly empty so we can just append stuff -RUN sed -i '$ a export JAVA_HOME=/usr/lib/jvm/zulu8' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN sed -i '$ a export HADOOP_HOME=/usr/local/hadoop' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN sed -i '$ a export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN sed -i '$ a export HDFS_NAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN sed -i '$ a export HDFS_DATANODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN sed -i '$ a export HDFS_SECONDARYNAMENODE_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN sed -i '$ a export YARN_RESOURCEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh -RUN sed -i '$ a export YARN_NODEMANAGER_USER=root' $HADOOP_HOME/etc/hadoop/hadoop-env.sh - -RUN cat $HADOOP_HOME/etc/hadoop/hadoop-env.sh - -RUN mkdir $HADOOP_HOME/input -RUN cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input - -# pseudo distributed -ADD core-site.xml.template $HADOOP_HOME/etc/hadoop/core-site.xml.template -RUN sed s/HOSTNAME/localhost/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml -ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml -ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml -ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml - -RUN $HADOOP_HOME/bin/hdfs namenode -format - -ADD ssh_config /root/.ssh/config -RUN chmod 600 /root/.ssh/config -RUN chown root:root /root/.ssh/config - -# # installing supervisord -# RUN yum install -y python-setuptools -# RUN easy_install pip -# RUN curl https://bitbucket.org/pypa/setuptools/raw/bootstrap/ez_setup.py -o - | python -# RUN pip install supervisor -# -# ADD supervisord.conf /etc/supervisord.conf - -ADD bootstrap.sh /etc/bootstrap.sh -RUN chown root:root /etc/bootstrap.sh -RUN chmod 700 /etc/bootstrap.sh - -ENV BOOTSTRAP /etc/bootstrap.sh - -# workingaround docker.io build error -RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh -RUN chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh -RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh - -# Copy additional .jars to classpath -RUN cp /usr/local/hadoop/share/hadoop/tools/lib/*.jar /usr/local/hadoop/share/hadoop/common/lib/ - -# fix the 254 error code -RUN sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config -RUN echo "UsePAM no" >> /etc/ssh/sshd_config -RUN echo "Port 2122" >> /etc/ssh/sshd_config - -# script for plain sshd start -RUN echo -e \ - '#!/bin/bash\n/usr/sbin/sshd\ntimeout 10 bash -c "until printf \"\" 2>>/dev/null >>/dev/tcp/127.0.0.1/2122; do sleep 0.5; done"' > \ - /usr/local/bin/start_sshd && \ - chmod a+x /usr/local/bin/start_sshd - -RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh -RUN start_sshd && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh - -CMD ["/etc/bootstrap.sh", "-d"] - -# Hdfs ports -EXPOSE 8020 9000 9820 9864 9865 9866 9867 9868 9869 9870 9871 50010 50020 50070 50075 50090 -# Mapred ports -EXPOSE 10020 19888 -#Yarn ports -EXPOSE 8030 8031 8032 8033 8040 8042 8088 -#Other ports -EXPOSE 2122 49707 \ No newline at end of file diff --git a/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh b/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh deleted file mode 100644 index d1fa493d4ea6..000000000000 --- a/examples/quickstart/tutorial/hadoop3/docker/bootstrap.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -: ${HADOOP_HOME:=/usr/local/hadoop} - -$HADOOP_HOME/etc/hadoop/hadoop-env.sh - -rm /tmp/*.pid -# installing libraries if any - (resource urls added comma separated to the ACP system variable) -cd $HADOOP_HOME/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - - -# altering the core-site configuration -sed s/HOSTNAME/$HOSTNAME/ /usr/local/hadoop/etc/hadoop/core-site.xml.template > /usr/local/hadoop/etc/hadoop/core-site.xml - - -start_sshd -$HADOOP_HOME/sbin/start-dfs.sh -$HADOOP_HOME/sbin/start-yarn.sh -$HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver - -if [[ $1 == "-d" ]]; then - while true; do sleep 1000; done -fi - -if [[ $1 == "-bash" ]]; then - /bin/bash -fi diff --git a/examples/quickstart/tutorial/hadoop3/docker/core-site.xml.template b/examples/quickstart/tutorial/hadoop3/docker/core-site.xml.template deleted file mode 100644 index 256de4f0988f..000000000000 --- a/examples/quickstart/tutorial/hadoop3/docker/core-site.xml.template +++ /dev/null @@ -1,24 +0,0 @@ - - - - - fs.defaultFS - hdfs://HOSTNAME:9000 - - diff --git a/examples/quickstart/tutorial/hadoop3/docker/hdfs-site.xml b/examples/quickstart/tutorial/hadoop3/docker/hdfs-site.xml deleted file mode 100644 index 18758b97ff3c..000000000000 --- a/examples/quickstart/tutorial/hadoop3/docker/hdfs-site.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - dfs.replication - 1 - - - dfs.client.use.datanode.hostname - true - - - dfs.datanode.use.datanode.hostname - true - - diff --git a/examples/quickstart/tutorial/hadoop3/docker/mapred-site.xml b/examples/quickstart/tutorial/hadoop3/docker/mapred-site.xml deleted file mode 100644 index 96aa1fa9ec8d..000000000000 --- a/examples/quickstart/tutorial/hadoop3/docker/mapred-site.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - mapreduce.framework.name - yarn - - diff --git a/examples/quickstart/tutorial/hadoop3/docker/ssh_config b/examples/quickstart/tutorial/hadoop3/docker/ssh_config deleted file mode 100644 index e9d0b917b533..000000000000 --- a/examples/quickstart/tutorial/hadoop3/docker/ssh_config +++ /dev/null @@ -1,20 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -Host * - UserKnownHostsFile /dev/null - StrictHostKeyChecking no - LogLevel quiet - Port 2122 diff --git a/examples/quickstart/tutorial/hadoop3/docker/yarn-site.xml b/examples/quickstart/tutorial/hadoop3/docker/yarn-site.xml deleted file mode 100644 index 68a0ecfa60c8..000000000000 --- a/examples/quickstart/tutorial/hadoop3/docker/yarn-site.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - yarn.nodemanager.aux-services - mapreduce_shuffle - - - - yarn.application.classpath - /usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/* - - - - - Number of seconds after an application finishes before the nodemanager's - DeletionService will delete the application's localized file directory - and log directory. - - To diagnose Yarn application problems, set this property's value large - enough (for example, to 600 = 10 minutes) to permit examination of these - directories. After changing the property's value, you must restart the - nodemanager in order for it to have an effect. - - The roots of Yarn applications' work directories is configurable with - the yarn.nodemanager.local-dirs property (see below), and the roots - of the Yarn applications' log directories is configurable with the - yarn.nodemanager.log-dirs property (see also below). - - yarn.nodemanager.delete.debug-delay-sec - 600 - - - - yarn.log-aggregation-enable - true - - - - yarn.log-aggregation.retain-seconds - 900000 - - - - yarn.nodemanager.vmem-check-enabled - false - - - diff --git a/examples/quickstart/tutorial/wikipedia-index-hadoop.json b/examples/quickstart/tutorial/wikipedia-index-hadoop.json deleted file mode 100644 index c727fcfcc38b..000000000000 --- a/examples/quickstart/tutorial/wikipedia-index-hadoop.json +++ /dev/null @@ -1,79 +0,0 @@ -{ - "type" : "index_hadoop", - "spec" : { - "dataSchema" : { - "dataSource" : "wikipedia", - "parser" : { - "type" : "hadoopyString", - "parseSpec" : { - "format" : "json", - "dimensionsSpec" : { - "dimensions" : [ - "channel", - "cityName", - "comment", - "countryIsoCode", - "countryName", - "isAnonymous", - "isMinor", - "isNew", - "isRobot", - "isUnpatrolled", - "metroCode", - "namespace", - "page", - "regionIsoCode", - "regionName", - "user", - { "name": "added", "type": "long" }, - { "name": "deleted", "type": "long" }, - { "name": "delta", "type": "long" } - ] - }, - "timestampSpec" : { - "format" : "auto", - "column" : "time" - } - } - }, - "metricsSpec" : [], - "granularitySpec" : { - "type" : "uniform", - "segmentGranularity" : "day", - "queryGranularity" : "none", - "intervals" : ["2015-09-12/2015-09-13"], - "rollup" : false - } - }, - "ioConfig" : { - "type" : "hadoop", - "inputSpec" : { - "type" : "static", - "paths" : "/quickstart/wikiticker-2015-09-12-sampled.json.gz" - } - }, - "tuningConfig" : { - "type" : "hadoop", - "partitionsSpec" : { - "type" : "hashed", - "targetPartitionSize" : 5000000 - }, - "forceExtendableShardSpecs" : true, - "jobProperties" : { - "fs.default.name" : "hdfs://druid-hadoop-demo:9000", - "fs.defaultFS" : "hdfs://druid-hadoop-demo:9000", - "dfs.datanode.address" : "druid-hadoop-demo", - "dfs.client.use.datanode.hostname" : "true", - "dfs.datanode.use.datanode.hostname" : "true", - "yarn.resourcemanager.hostname" : "druid-hadoop-demo", - "yarn.nodemanager.vmem-check-enabled" : "false", - "mapreduce.map.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", - "mapreduce.job.user.classpath.first" : "true", - "mapreduce.reduce.java.opts" : "-Duser.timezone=UTC -Dfile.encoding=UTF-8", - "mapreduce.map.memory.mb" : 1024, - "mapreduce.reduce.memory.mb" : 1024 - } - } - }, - "hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client:2.8.5"] -} diff --git a/extensions-contrib/thrift-extensions/pom.xml b/extensions-contrib/thrift-extensions/pom.xml index 387fb6bb9074..e58a7f24fce4 100644 --- a/extensions-contrib/thrift-extensions/pom.xml +++ b/extensions-contrib/thrift-extensions/pom.xml @@ -133,34 +133,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - org.apache.hadoop - hadoop-common - provided - - - org.apache.hadoop - hadoop-client - ${hadoop.compile.version} - provided - - - org.apache.avro - avro - - - - - hadoop3 diff --git a/extensions-core/avro-extensions/pom.xml b/extensions-core/avro-extensions/pom.xml index 548760d6564f..eeb40f0f47d6 100644 --- a/extensions-core/avro-extensions/pom.xml +++ b/extensions-core/avro-extensions/pom.xml @@ -268,51 +268,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - org.apache.hadoop - hadoop-client - ${hadoop.compile.version} - provided - - - org.apache.avro - avro - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - provided - - - org.slf4j - slf4j-api - - - - - org.apache.hadoop - hadoop-common - provided - - - org.slf4j - slf4j-api - - - - - hadoop3 diff --git a/extensions-core/druid-ranger-security/pom.xml b/extensions-core/druid-ranger-security/pom.xml index dad57134fd0d..0b120acb7512 100644 --- a/extensions-core/druid-ranger-security/pom.xml +++ b/extensions-core/druid-ranger-security/pom.xml @@ -169,238 +169,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - org.apache.hadoop - hadoop-client - ${hadoop.compile.version} - runtime - - - org.apache.avro - avro - - - commons-cli - commons-cli - - - log4j - log4j - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - javax.ws.rs - jsr311-api - - - com.google.code.findbugs - jsr305 - - - org.mortbay.jetty - jetty-util - - - org.apache.hadoop - hadoop-annotations - - - javax.activation - activation - - - com.google.protobuf - protobuf-java - - - com.sun.jersey - jersey-core - - - org.apache.curator - curator-client - - - org.apache.curator - curator-framework - - - org.apache.curator - curator-recipes - - - org.apache.commons - commons-math3 - - - com.google.guava - guava - - - - commons-beanutils - commons-beanutils-core - - - - - org.apache.hadoop - hadoop-common - compile - - - commons-cli - commons-cli - - - log4j - log4j - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - javax.ws.rs - jsr311-api - - - com.google.code.findbugs - jsr305 - - - org.mortbay.jetty - jetty-util - - - com.google.protobuf - protobuf-java - - - com.sun.jersey - jersey-core - - - org.apache.curator - curator-client - - - org.apache.commons - commons-math3 - - - com.google.guava - guava - - - org.apache.avro - avro - - - net.java.dev.jets3t - jets3t - - - com.sun.jersey - jersey-json - - - com.jcraft - jsch - - - org.mortbay.jetty - jetty - - - com.sun.jersey - jersey-server - - - - commons-beanutils - commons-beanutils-core - - - - - hadoop3 diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml index 50f002167562..face5fba9883 100644 --- a/extensions-core/hdfs-storage/pom.xml +++ b/extensions-core/hdfs-storage/pom.xml @@ -140,295 +140,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - org.apache.hadoop - hadoop-client - ${hadoop.compile.version} - runtime - - - org.apache.avro - avro - - - commons-cli - commons-cli - - - log4j - log4j - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - javax.ws.rs - jsr311-api - - - com.google.code.findbugs - jsr305 - - - org.mortbay.jetty - jetty-util - - - org.apache.hadoop - hadoop-annotations - - - javax.activation - activation - - - com.google.protobuf - protobuf-java - - - com.sun.jersey - jersey-core - - - org.apache.curator - curator-client - - - org.apache.curator - curator-framework - - - org.apache.curator - curator-recipes - - - org.apache.commons - commons-math3 - - - com.google.guava - guava - - - - commons-beanutils - commons-beanutils-core - - - - - org.apache.hadoop - hadoop-common - compile - - - commons-cli - commons-cli - - - log4j - log4j - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - javax.ws.rs - jsr311-api - - - com.google.code.findbugs - jsr305 - - - org.mortbay.jetty - jetty-util - - - com.google.protobuf - protobuf-java - - - com.sun.jersey - jersey-core - - - org.apache.curator - curator-client - - - org.apache.commons - commons-math3 - - - com.google.guava - guava - - - org.apache.avro - avro - - - net.java.dev.jets3t - jets3t - - - com.sun.jersey - jersey-json - - - com.jcraft - jsch - - - org.mortbay.jetty - jetty - - - com.sun.jersey - jersey-server - - - - commons-beanutils - commons-beanutils-core - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - compile - - - javax.servlet - servlet-api - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.google.inject.extensions - guice-servlet - - - com.google.protobuf - protobuf-java - - - io.netty - netty - - - log4j - log4j - - - org.apache.avro - avro - - - org.apache.hadoop - hadoop-annotations - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - - - org.apache.hadoop - hadoop-hdfs-client - runtime - - - - hadoop3 diff --git a/extensions-core/orc-extensions/pom.xml b/extensions-core/orc-extensions/pom.xml index 09156eabf969..a2710b311e9f 100644 --- a/extensions-core/orc-extensions/pom.xml +++ b/extensions-core/orc-extensions/pom.xml @@ -232,265 +232,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - compile - - - aopalliance - aopalliance - - - org.apache.avro - avro - - - org.apache.commons - commons-compress - - - com.google.guava - guava - - - com.google.inject - guice - - - javax.servlet - servlet-api - - - com.google.inject.extensions - guice-servlet - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - javax.inject - javax - - - io.netty - netty - - - org.slf4j - slf4j-log4j12 - - - org.slf4j - slf4j-api - - - com.google.protobuf - protobuf-java - - - - - org.apache.hadoop - hadoop-hdfs-client - runtime - - - - org.apache.hadoop - hadoop-common - ${hadoop.compile.version} - compile - - - org.apache.yetus - audience-annotations - - - org.apache.directory.server - apacheds-kerberos-codec - - - org.apache.avro - avro - - - commons-beanutils - commons-beanutils-core - - - commons-cli - commons-cli - - - commons-codec - commons-codec - - - org.apache.commons - commons-compress - - - commons-io - commons-io - - - commons-lang - commons-lang - - - commons-collections - commons-collections - - - commons-logging - commons-logging - - - org.apache.commons - commons-math3 - - - commons-net - commons-net - - - org.apache.curator - curator-client - - - org.apache.curator - curator-recipes - - - org.apache.curator - curator-framework - - - com.google.code.gson - gson - - - com.google.guava - guava - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.sun.jersey - jersey-core - - - com.sun.jersey - jersey-server - - - com.sun.jersey - jersey-json - - - org.mortbay.jetty - jetty-util - - - org.mortbay.jetty - jetty-sslengine - - - org.mortbay.jetty - jetty - - - net.java.dev.jets3t - jets3t - - - com.google.code.findbugs - jsr305 - - - javax.ws.rs - jsr311-api - - - javax.servlet.jsp - jsp-api - - - com.jcraft - jsch - - - log4j - log4j - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - xmlenc - xmlenc - - - org.apache.zookeeper - zookeeper - - - com.nimbusds - nimbus-jose-jwt - - - - - hadoop3 diff --git a/extensions-core/parquet-extensions/pom.xml b/extensions-core/parquet-extensions/pom.xml index ea28dfebe61d..d5b882d60e49 100644 --- a/extensions-core/parquet-extensions/pom.xml +++ b/extensions-core/parquet-extensions/pom.xml @@ -176,257 +176,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - - org.apache.hadoop - hadoop-hdfs-client - runtime - - - - org.apache.hadoop - hadoop-mapreduce-client-core - compile - - - aopalliance - aopalliance - - - org.apache.commons - commons-compress - - - com.google.guava - guava - - - com.google.inject - guice - - - com.google.inject.extensions - guice-servlet - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - javax.inject - javax - - - io.netty - netty - - - slf4j-log4j12 - org.slf4j - - - org.slf4j - slf4j-api - - - protobuf-java - com.google.protobuf - - - - - org.apache.hadoop - hadoop-common - compile - - - org.apache.yetus - audience-annotations - - - commons-codec - commons-codec - - - org.apache.commons - commons-compress - - - commons-io - commons-io - - - commons-lang - commons-lang - - - org.apache.commons - commons-math3 - - - commons-net - commons-net - - - org.apache.curator - curator-client - - - org.apache.curator - curator-framework - - - org.apache.curator - curator-recipes - - - com.google.guava - guava - - - com.fasterxml.jackson.core - jackson-annotations - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-databind - - - com.sun.jersey - jersey-core - - - com.sun.jersey - jersey-server - - - javax.servlet.jsp - jsp-api - - - com.google.code.findbugs - jsr305 - - - javax.ws.rs - jsr311-api - - - org.apache.zookeeper - zookeeper - - - org.slf4j - slf4j-api - - - org.slf4j - slf4j-log4j12 - - - com.sun.jersey - jersey-json - - - log4j - log4j - - - org.mortbay.jetty - jetty-sslengine - - - org.mortbay.jetty - jetty-util - - - net.java.dev.jets3t - jets3t - - - org.mortbay.jetty - jetty - - - com.google.code.gson - gson - - - xmlenc - xmlenc - - - org.apache.httpcomponents - httpclient - - - com.jcraft - jsch - - - com.google.protobuf - protobuf-java - - - commons-collections - commons-collections - - - commons-logging - commons-logging - - - commons-cli - commons-cli - - - commons-digester - commons-digester - - - commons-beanutils - commons-beanutils-core - - - org.apache.directory.server - apacheds-kerberos-codec - - - com.nimbusds - nimbus-jose-jwt - - - - - hadoop3 diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 355730d229fe..ac7e62fb8a8f 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -152,64 +152,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - org.apache.hadoop - hadoop-client - ${hadoop.compile.version} - provided - - - org.apache.avro - avro - - - javax.servlet - servlet-api - - - - - - - org.apache.hadoop - hadoop-common - provided - - - javax.servlet - servlet-api - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - provided - - - javax.servlet - servlet-api - - - - - org.apache.hadoop - hadoop-common - ${hadoop.compile.version} - tests - test - - - hadoop3 diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index 4ba42aac022b..480c602a3462 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -271,53 +271,6 @@ - - hadoop2 - - - hadoop2.enabled - true - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - provided - - - javax.servlet - servlet-api - - - - - org.apache.hadoop - hadoop-common - provided - - - org.apache.hadoop - hadoop-client - ${hadoop.compile.version} - provided - - - org.apache.avro - avro - - - - - org.apache.hadoop - hadoop-yarn-common - provided - - - - org.apache.hadoop:hadoop-client:${hadoop.compile.version} - - hadoop3 diff --git a/integration-tests/README.md b/integration-tests/README.md index c9b49fb57fd1..4dfaaa3f293d 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -219,7 +219,7 @@ The values shown above are for the default docker compose cluster. For other clu - docker-compose.druid-hadoop.yml - For starting Apache Hadoop 2.8.5 cluster with the same setup as the Druid tutorial. + For starting Apache Hadoop 3.3.6 cluster with the same setup as the Druid tutorial. ```bash docker-compose -f docker-compose.druid-hadoop.yml up diff --git a/integration-tests/build_run_cluster.sh b/integration-tests/build_run_cluster.sh index 7da37342ab28..aea46b60a2ae 100755 --- a/integration-tests/build_run_cluster.sh +++ b/integration-tests/build_run_cluster.sh @@ -21,11 +21,6 @@ echo $DRUID_INTEGRATION_TEST_OVERRIDE_CONFIG_PATH export DIR=$(cd $(dirname $0) && pwd) export HADOOP_DOCKER_DIR=$DIR/../examples/quickstart/tutorial/hadoop/docker -if [ -n "${HADOOP_VERSION}" ] && [ "${HADOOP_VERSION:0:1}" == "3" ]; then - export HADOOP_DOCKER_DIR=$DIR/../examples/quickstart/tutorial/hadoop3/docker -fi - - export DOCKERDIR=$DIR/docker export SHARED_DIR=${HOME}/shared diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure index 2382cd120c17..599c6d364be4 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_azure @@ -30,8 +30,6 @@ druid_extensions_loadList=["druid-azure-extensions","druid-hdfs-storage"] # # Please replace with corresponding libs -# Sample hadoop 2 config -# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] # # Sample hadoop 3 config # druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs index bfc2552d07ce..c1f1f7ff9b43 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/azure_to_hdfs @@ -33,8 +33,6 @@ druid_azure_container= # # Please replace with corresponding libs -# Sample hadoop 2 config -# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] # # Sample hadoop 3 config # druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs index 785e376595b1..c72fe93f34be 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_hdfs @@ -33,8 +33,6 @@ druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"] # # Please replace with corresponding libs -# Sample hadoop 2 config -# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] # # Sample hadoop 3 config # druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] diff --git a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 index 7daf16a63b15..086566515462 100644 --- a/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 +++ b/integration-tests/docker/environment-configs/override-examples/hadoop/s3_to_s3 @@ -33,8 +33,6 @@ AWS_REGION= druid_extensions_loadList=["druid-s3-extensions","druid-hdfs-storage"] # # Please replace with corresponding libs -# Sample hadoop 2 config -# druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.8.5", "org.apache.hadoop:hadoop-aws:2.8.5"] # # Sample hadoop 3 config # druid_indexer_task_defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client-api:3.2.2", "org.apache.hadoop:hadoop-client-runtime:3.2.2", "org.apache.hadoop:hadoop-azure:3.2.2"] diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 020b9079361c..afd96b9b1ca6 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -553,13 +553,6 @@ - - hadoop2 - - "org.apache.hadoop:hadoop-client:${hadoop.compile.version}", "org.apache.hadoop:hadoop-azure:${hadoop.compile.version}" - org.apache.hadoop.fs.s3native.NativeS3FileSystem - - integration-tests diff --git a/integration-tests/script/copy_resources_template.sh b/integration-tests/script/copy_resources_template.sh index 4e0b8fcb26f9..fade47c4e315 100755 --- a/integration-tests/script/copy_resources_template.sh +++ b/integration-tests/script/copy_resources_template.sh @@ -62,15 +62,10 @@ then mkdir -p $HADOOP_GCS_DIR ## We put same version in both commands but as we have an if, correct code path will always be executed as this is generated script. ## Remove if - if [ -n "${HADOOP_VERSION}" ] && [ "${HADOOP_VERSION:0:1}" == "3" ]; then - "$SHARED_DIR/docker/bin/run-java" -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client-api:${hadoop.compile.version} -h org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version} -h org.apache.hadoop:hadoop-aws:${hadoop.compile.version} -h org.apache.hadoop:hadoop-azure:${hadoop.compile.version} - curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar --output $HADOOP_GCS_DIR/gcs-connector-hadoop3-latest.jar - cp $HADOOP_GCS_DIR/gcs-connector-hadoop3-latest.jar $DRUID_HDFS_EXT - else - "$SHARED_DIR/docker/bin/run-java" -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client:${hadoop.compile.version} -h org.apache.hadoop:hadoop-aws:${hadoop.compile.version} -h org.apache.hadoop:hadoop-azure:${hadoop.compile.version} - curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-latest.jar --output $HADOOP_GCS_DIR/gcs-connector-hadoop2-latest.jar - cp $HADOOP_GCS_DIR/gcs-connector-hadoop2-latest.jar $DRUID_HDFS_EXT - fi + "$SHARED_DIR/docker/bin/run-java" -cp "$SHARED_DIR/docker/lib/*" -Ddruid.extensions.hadoopDependenciesDir="$SHARED_DIR/hadoop-dependencies" org.apache.druid.cli.Main tools pull-deps -h org.apache.hadoop:hadoop-client-api:${hadoop.compile.version} -h org.apache.hadoop:hadoop-client-runtime:${hadoop.compile.version} -h org.apache.hadoop:hadoop-aws:${hadoop.compile.version} -h org.apache.hadoop:hadoop-azure:${hadoop.compile.version} + curl https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop3-latest.jar --output $HADOOP_GCS_DIR/gcs-connector-hadoop3-latest.jar + cp $HADOOP_GCS_DIR/gcs-connector-hadoop3-latest.jar $DRUID_HDFS_EXT + cp $HADOOP_AWS_DIR/hadoop-aws-${hadoop.compile.version}.jar $DRUID_HDFS_EXT cp $HADOOP_AZURE_DIR/hadoop-azure-${hadoop.compile.version}.jar $DRUID_HDFS_EXT fi diff --git a/licenses.yaml b/licenses.yaml index 90a3bc63604f..5ae4672c90f3 100644 --- a/licenses.yaml +++ b/licenses.yaml @@ -2486,461 +2486,6 @@ notices: --- -name: Apache Hadoop -license_category: binary -module: hadoop-client -license_name: Apache License version 2.0 -version: 2.8.5 -libraries: - - org.apache.hadoop: hadoop-annotations - - org.apache.hadoop: hadoop-auth - - org.apache.hadoop: hadoop-client - - org.apache.hadoop: hadoop-common - - org.apache.hadoop: hadoop-hdfs-client - - org.apache.hadoop: hadoop-mapreduce-client-app - - org.apache.hadoop: hadoop-mapreduce-client-common - - org.apache.hadoop: hadoop-mapreduce-client-core - - org.apache.hadoop: hadoop-mapreduce-client-jobclient - - org.apache.hadoop: hadoop-mapreduce-client-shuffle - - org.apache.hadoop: hadoop-yarn-api - - org.apache.hadoop: hadoop-yarn-client - - org.apache.hadoop: hadoop-yarn-common - - org.apache.hadoop: hadoop-yarn-server-common -notice: | - The binary distribution of this product bundles binaries of - org.iq80.leveldb:leveldb-api (https://github.com/dain/leveldb), which has the - following notices: - * Copyright 2011 Dain Sundstrom - * Copyright 2011 FuseSource Corp. http://fusesource.com - - The binary distribution of this product bundles binaries of - org.fusesource.hawtjni:hawtjni-runtime (https://github.com/fusesource/hawtjni), - which has the following notices: - * This product includes software developed by FuseSource Corp. - http://fusesource.com - * This product includes software developed at - Progress Software Corporation and/or its subsidiaries or affiliates. - * This product includes software developed by IBM Corporation and others. - - The binary distribution of this product bundles binaries of - AWS Java SDK 1.10.6, - which has the following notices: - * This software includes third party software subject to the following - copyrights: - XML parsing and utility functions from JetS3t - Copyright - 2006-2009 James Murty. - JSON parsing and utility functions from JSON.org - - Copyright 2002 JSON.org. - PKCS#1 PEM encoded private key parsing and utility - functions from oauth.googlecode.com - Copyright 1998-2010 AOL Inc. - - The binary distribution of this product bundles binaries of - Gson 2.2.4, - which has the following notices: - - The Netty Project - ================= - - Please visit the Netty web site for more information: - - * http://netty.io/ - - Copyright 2014 The Netty Project - - The Netty Project licenses this file to you under the Apache License, - version 2.0 (the "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - License for the specific language governing permissions and limitations - under the License. - - Also, please refer to each LICENSE..txt file, which is located in - the 'license' directory of the distribution file, for the license terms of the - components that this product depends on. - - ------------------------------------------------------------------------------- - This product contains the extensions to Java Collections Framework which has - been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: - - * LICENSE: - * license/LICENSE.jsr166y.txt (Public Domain) - * HOMEPAGE: - * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ - * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ - - This product contains a modified version of Robert Harder's Public Domain - Base64 Encoder and Decoder, which can be obtained at: - - * LICENSE: - * license/LICENSE.base64.txt (Public Domain) - * HOMEPAGE: - * http://iharder.sourceforge.net/current/java/base64/ - - This product contains a modified portion of 'Webbit', an event based - WebSocket and HTTP server, which can be obtained at: - - * LICENSE: - * license/LICENSE.webbit.txt (BSD License) - * HOMEPAGE: - * https://github.com/joewalnes/webbit - - This product contains a modified portion of 'SLF4J', a simple logging - facade for Java, which can be obtained at: - - * LICENSE: - * license/LICENSE.slf4j.txt (MIT License) - * HOMEPAGE: - * http://www.slf4j.org/ - - This product contains a modified portion of 'ArrayDeque', written by Josh - Bloch of Google, Inc: - - * LICENSE: - * license/LICENSE.deque.txt (Public Domain) - - This product contains a modified portion of 'Apache Harmony', an open source - Java SE, which can be obtained at: - - * LICENSE: - * license/LICENSE.harmony.txt (Apache License 2.0) - * HOMEPAGE: - * http://archive.apache.org/dist/harmony/ - - This product contains a modified version of Roland Kuhn's ASL2 - AbstractNodeQueue, which is based on Dmitriy Vyukov's non-intrusive MPSC queue. - It can be obtained at: - - * LICENSE: - * license/LICENSE.abstractnodequeue.txt (Public Domain) - * HOMEPAGE: - * https://github.com/akka/akka/blob/wip-2.2.3-for-scala-2.11/akka-actor/src/main/java/akka/dispatch/AbstractNodeQueue.java - - This product contains a modified portion of 'jbzip2', a Java bzip2 compression - and decompression library written by Matthew J. Francis. It can be obtained at: - - * LICENSE: - * license/LICENSE.jbzip2.txt (MIT License) - * HOMEPAGE: - * https://code.google.com/p/jbzip2/ - - This product contains a modified portion of 'libdivsufsort', a C API library to construct - the suffix array and the Burrows-Wheeler transformed string for any input string of - a constant-size alphabet written by Yuta Mori. It can be obtained at: - - * LICENSE: - * license/LICENSE.libdivsufsort.txt (MIT License) - * HOMEPAGE: - * https://code.google.com/p/libdivsufsort/ - - This product contains a modified portion of Nitsan Wakart's 'JCTools', Java Concurrency Tools for the JVM, - which can be obtained at: - - * LICENSE: - * license/LICENSE.jctools.txt (ASL2 License) - * HOMEPAGE: - * https://github.com/JCTools/JCTools - - This product optionally depends on 'JZlib', a re-implementation of zlib in - pure Java, which can be obtained at: - - * LICENSE: - * license/LICENSE.jzlib.txt (BSD style License) - * HOMEPAGE: - * http://www.jcraft.com/jzlib/ - - This product optionally depends on 'Compress-LZF', a Java library for encoding and - decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: - - * LICENSE: - * license/LICENSE.compress-lzf.txt (Apache License 2.0) - * HOMEPAGE: - * https://github.com/ning/compress - - This product optionally depends on 'lz4', a LZ4 Java compression - and decompression library written by Adrien Grand. It can be obtained at: - - * LICENSE: - * license/LICENSE.lz4.txt (Apache License 2.0) - * HOMEPAGE: - * https://github.com/jpountz/lz4-java - - This product optionally depends on 'lzma-java', a LZMA Java compression - and decompression library, which can be obtained at: - - * LICENSE: - * license/LICENSE.lzma-java.txt (Apache License 2.0) - * HOMEPAGE: - * https://github.com/jponge/lzma-java - - This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression - and decompression library written by William Kinney. It can be obtained at: - - * LICENSE: - * license/LICENSE.jfastlz.txt (MIT License) - * HOMEPAGE: - * https://code.google.com/p/jfastlz/ - - This product contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data - interchange format, which can be obtained at: - - * LICENSE: - * license/LICENSE.protobuf.txt (New BSD License) - * HOMEPAGE: - * http://code.google.com/p/protobuf/ - - This product optionally depends on 'Bouncy Castle Crypto APIs' to generate - a temporary self-signed X.509 certificate when the JVM does not provide the - equivalent functionality. It can be obtained at: - - * LICENSE: - * license/LICENSE.bouncycastle.txt (MIT License) - * HOMEPAGE: - * http://www.bouncycastle.org/ - - This product optionally depends on 'Snappy', a compression library produced - by Google Inc, which can be obtained at: - - * LICENSE: - * license/LICENSE.snappy.txt (New BSD License) - * HOMEPAGE: - * http://code.google.com/p/snappy/ - - This product optionally depends on 'JBoss Marshalling', an alternative Java - serialization API, which can be obtained at: - - * LICENSE: - * license/LICENSE.jboss-marshalling.txt (GNU LGPL 2.1) - * HOMEPAGE: - * http://www.jboss.org/jbossmarshalling - - This product optionally depends on 'Caliper', Google's micro- - benchmarking framework, which can be obtained at: - - * LICENSE: - * license/LICENSE.caliper.txt (Apache License 2.0) - * HOMEPAGE: - * http://code.google.com/p/caliper/ - - This product optionally depends on 'Apache Commons Logging', a logging - framework, which can be obtained at: - - * LICENSE: - * license/LICENSE.commons-logging.txt (Apache License 2.0) - * HOMEPAGE: - * http://commons.apache.org/logging/ - - This product optionally depends on 'Apache Log4J', a logging framework, which - can be obtained at: - - * LICENSE: - * license/LICENSE.log4j.txt (Apache License 2.0) - * HOMEPAGE: - * http://logging.apache.org/log4j/ - - This product optionally depends on 'Aalto XML', an ultra-high performance - non-blocking XML processor, which can be obtained at: - - * LICENSE: - * license/LICENSE.aalto-xml.txt (Apache License 2.0) - * HOMEPAGE: - * http://wiki.fasterxml.com/AaltoHome - - This product contains a modified version of 'HPACK', a Java implementation of - the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: - - * LICENSE: - * license/LICENSE.hpack.txt (Apache License 2.0) - * HOMEPAGE: - * https://github.com/twitter/hpack - - This product contains a modified portion of 'Apache Commons Lang', a Java library - provides utilities for the java.lang API, which can be obtained at: - - * LICENSE: - * license/LICENSE.commons-lang.txt (Apache License 2.0) - * HOMEPAGE: - * https://commons.apache.org/proper/commons-lang/ - - The binary distribution of this product bundles binaries of - Commons Codec 1.4, - which has the following notices: - * src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.javacontains test data from http://aspell.net/test/orig/batch0.tab.Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) - =============================================================================== - The content of package org.apache.commons.codec.language.bm has been translated - from the original php source code available at http://stevemorse.org/phoneticinfo.htm - with permission from the original authors. - Original source copyright:Copyright (c) 2008 Alexander Beider & Stephen P. Morse. - - The binary distribution of this product bundles binaries of - Commons Lang 2.6, - which has the following notices: - * This product includes software from the Spring Framework,under the Apache License 2.0 (see: StringUtils.containsWhitespace()) - - The binary distribution of this product bundles binaries of - Apache Log4j 1.2.17, - which has the following notices: - * ResolverUtil.java - Copyright 2005-2006 Tim Fennell - Dumbster SMTP test server - Copyright 2004 Jason Paul Kitchen - TypeUtil.java - Copyright 2002-2012 Ramnivas Laddad, Juergen Hoeller, Chris Beams - - The binary distribution of this product bundles binaries of - Java Concurrency in Practice book annotations 1.0, - which has the following notices: - * Copyright (c) 2005 Brian Goetz and Tim Peierls Released under the Creative - Commons Attribution License (http://creativecommons.org/licenses/by/2.5) - Official home: http://www.jcip.net Any republication or derived work - distributed in source code form must include this copyright and license - notice. - - The binary distribution of this product bundles binaries of - Jetty 6.1.26, - which has the following notices: - * ============================================================== - Jetty Web Container - Copyright 1995-2016 Mort Bay Consulting Pty Ltd. - ============================================================== - - The Jetty Web Container is Copyright Mort Bay Consulting Pty Ltd - unless otherwise noted. - - Jetty is dual licensed under both - - * The Apache 2.0 License - http://www.apache.org/licenses/LICENSE-2.0.html - - and - - * The Eclipse Public 1.0 License - http://www.eclipse.org/legal/epl-v10.html - - Jetty may be distributed under either license. - - ------ - Eclipse - - The following artifacts are EPL. - * org.eclipse.jetty.orbit:org.eclipse.jdt.core - - The following artifacts are EPL and ASL2. - * org.eclipse.jetty.orbit:javax.security.auth.message - - - The following artifacts are EPL and CDDL 1.0. - * org.eclipse.jetty.orbit:javax.mail.glassfish - - - ------ - Oracle - - The following artifacts are CDDL + GPLv2 with classpath exception. - https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html - - * javax.servlet:javax.servlet-api - * javax.annotation:javax.annotation-api - * javax.transaction:javax.transaction-api - * javax.websocket:javax.websocket-api - - ------ - Oracle OpenJDK - - If ALPN is used to negotiate HTTP/2 connections, then the following - artifacts may be included in the distribution or downloaded when ALPN - module is selected. - - * java.sun.security.ssl - - These artifacts replace/modify OpenJDK classes. The modififications - are hosted at github and both modified and original are under GPL v2 with - classpath exceptions. - http://openjdk.java.net/legal/gplv2+ce.html - - - ------ - OW2 - - The following artifacts are licensed by the OW2 Foundation according to the - terms of http://asm.ow2.org/license.html - - org.ow2.asm:asm-commons - org.ow2.asm:asm - - - ------ - Apache - - The following artifacts are ASL2 licensed. - - org.apache.taglibs:taglibs-standard-spec - org.apache.taglibs:taglibs-standard-impl - - - ------ - MortBay - - The following artifacts are ASL2 licensed. Based on selected classes from - following Apache Tomcat jars, all ASL2 licensed. - - org.mortbay.jasper:apache-jsp - org.apache.tomcat:tomcat-jasper - org.apache.tomcat:tomcat-juli - org.apache.tomcat:tomcat-jsp-api - org.apache.tomcat:tomcat-el-api - org.apache.tomcat:tomcat-jasper-el - org.apache.tomcat:tomcat-api - org.apache.tomcat:tomcat-util-scan - org.apache.tomcat:tomcat-util - - org.mortbay.jasper:apache-el - org.apache.tomcat:tomcat-jasper-el - org.apache.tomcat:tomcat-el-api - - - ------ - Mortbay - - The following artifacts are CDDL + GPLv2 with classpath exception. - - https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html - - org.eclipse.jetty.toolchain:jetty-schemas - - ------ - Assorted - - The UnixCrypt.java code implements the one way cryptography used by - Unix systems for simple password protection. Copyright 1996 Aki Yoshida, - modified April 2001 by Iris Van den Broeke, Daniel Deville. - Permission to use, copy, modify and distribute UnixCrypt - for non-commercial or commercial purposes and without fee is - granted provided that the copyright notice appears in all copies./ - - The binary distribution of this product bundles binaries of - Snappy for Java 1.0.4.1, - which has the following notices: - * This product includes software developed by Google - Snappy: http://code.google.com/p/snappy/ (New BSD License) - - This product includes software developed by Apache - PureJavaCrc32C from apache-hadoop-common http://hadoop.apache.org/ - (Apache 2.0 license) - - This library containd statically linked libstdc++. This inclusion is allowed by - "GCC RUntime Library Exception" - http://gcc.gnu.org/onlinedocs/libstdc++/manual/license.html - - == Contributors == - * Tatu Saloranta - * Providing benchmark suite - * Alec Wysoker - * Performance and memory usage improvement - ---- - name: Apache Hadoop license_category: binary module: hadoop-client diff --git a/owasp-dependency-check-suppressions.xml b/owasp-dependency-check-suppressions.xml index 175459abe749..0685b126c424 100644 --- a/owasp-dependency-check-suppressions.xml +++ b/owasp-dependency-check-suppressions.xml @@ -62,25 +62,6 @@ CVE-2021-3563 - - - - ^pkg:maven/net\.minidev/json\-smart@.*$ - CVE-2021-27568 - CVE-2021-31684 - CVE-2022-45688 - CVE-2023-1370 - CVE-2022-45688 - - - - ^pkg:maven/net\.minidev/accessors\-smart@.*$ - CVE-2021-27568 - CVE-2022-45688 - - - ^pkg:maven/org\.apache\.hadoop/hadoop\-.*@.*$ - CVE-2018-11765 - CVE-2020-9492 - CVE-2022-25168 - CVE-2022-26612 - CVE-2018-8009 - CVE-2021-33036 - true - - hadoop2 - - - hadoop2.enabled - true - - - - 2.8.5 - 5.2.5.Final - - diff --git a/services/src/main/java/org/apache/druid/cli/PullDependencies.java b/services/src/main/java/org/apache/druid/cli/PullDependencies.java index fb16422855aa..6ea8626ba7e7 100644 --- a/services/src/main/java/org/apache/druid/cli/PullDependencies.java +++ b/services/src/main/java/org/apache/druid/cli/PullDependencies.java @@ -181,7 +181,7 @@ on the class path and propagated around the system. Most places TRY to make sure @Option( name = "--no-default-hadoop", - description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client if hadoop2 or org.apache.hadoop:hadoop-client-runtime if hadoop3. If `-h` option is supplied, then default hadoop coordinate will not be downloaded." + description = "Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-client-runtime if hadoop3. If `-h` option is supplied, then default hadoop coordinate will not be downloaded." ) public boolean noDefaultHadoop = false; diff --git a/website/.spelling b/website/.spelling index cc7a09882d92..26d7049ef44e 100644 --- a/website/.spelling +++ b/website/.spelling @@ -813,7 +813,6 @@ yaml Phadoop3 dist-hadoop3 hadoop3 -hadoop2 2.x.x 3.x.x ambari-metrics @@ -1035,7 +1034,6 @@ PT17S GCS StaticGoogleBlobStoreFirehose gcs-connector -hadoop2 hdfs Aotearoa Czechia