gengliangwang · gengliangwang · Aug 6, 2020 · Aug 6, 2020 · Aug 6, 2020 · Aug 6, 2020
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -9,265 +9,16 @@ on:
     - master
 
 jobs:
-  # TODO(SPARK-32248): Recover JDK 11 builds
-  # Build: build Spark and run the tests for specified modules.
-  build:
-    name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        java:
-          - 1.8
-        hadoop:
-          - hadoop3.2
-        hive:
-          - hive2.3
-        # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
-        # Kinesis tests depends on external Amazon kinesis service.
-        # Note that the modules below are from sparktestsupport/modules.py.
-        modules:
-          - |-
-            core, unsafe, kvstore, avro,
-            network-common, network-shuffle, repl, launcher,
-            examples, sketch, graphx
-          - |-
-            catalyst, hive-thriftserver
-          - |-
-            streaming, sql-kafka-0-10, streaming-kafka-0-10,
-            mllib-local, mllib,
-            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
-          - |-
-            pyspark-sql, pyspark-mllib, pyspark-resource
-          - |-
-            pyspark-core, pyspark-streaming, pyspark-ml
-          - |-
-            sparkr
-        # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
-        included-tags: [""]
-        excluded-tags: [""]
-        comment: [""]
-        include:
-          # Hive tests
-          - modules: hive
-            java: 1.8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            included-tags: org.apache.spark.tags.SlowHiveTest
-            comment: "- slow tests"
-          - modules: hive
-            java: 1.8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            excluded-tags: org.apache.spark.tags.SlowHiveTest
-            comment: "- other tests"
-          # SQL tests
-          - modules: sql
-            java: 1.8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            included-tags: org.apache.spark.tags.ExtendedSQLTest
-            comment: "- slow tests"
-          - modules: sql
-            java: 1.8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            excluded-tags: org.apache.spark.tags.ExtendedSQLTest
-            comment: "- other tests"
-    env:
-      MODULES_TO_TEST: ${{ matrix.modules }}
-      EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
-      INCLUDED_TAGS: ${{ matrix.included-tags }}
-      HADOOP_PROFILE: ${{ matrix.hadoop }}
-      HIVE_PROFILE: ${{ matrix.hive }}
-      # GitHub Actions' default miniconda to use in pip packaging test.
-      CONDA_PREFIX: /usr/share/miniconda
-      GITHUB_PREV_SHA: ${{ github.event.before }}
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-      # In order to fetch changed files
-      with:
-        fetch-depth: 0
-    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
-    - name: Cache Scala, SBT, Maven and Zinc
-      uses: actions/cache@v1
-      with:
-        path: build
-        key: build-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
-    - name: Cache Ivy local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.ivy2/cache
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
-    - name: Install JDK ${{ matrix.java }}
-      uses: actions/setup-java@v1
-      with:
-        java-version: ${{ matrix.java }}
-    # PySpark
-    - name: Install PyPy3
-      # Note that order of Python installations here matters because default python3 is
-      # overridden by pypy3.
-      uses: actions/setup-python@v2
-      if: contains(matrix.modules, 'pyspark')
-      with:
-        python-version: pypy3
-        architecture: x64
-    - name: Install Python 3.6
-      uses: actions/setup-python@v2
-      if: contains(matrix.modules, 'pyspark')
-      with:
-        python-version: 3.6
-        architecture: x64
-    - name: Install Python 3.8
-      uses: actions/setup-python@v2
-      # We should install one Python that is higher then 3+ for SQL and Yarn because:
-      # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
-      # - Yarn has a Python specific test too, for example, YarnClusterSuite.
-      if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
-      with:
-        python-version: 3.8
-        architecture: x64
-    - name: Install Python packages (Python 3.6 and PyPy3)
-      if: contains(matrix.modules, 'pyspark')
-      # PyArrow is not supported in PyPy yet, see ARROW-2651.
-      # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
-      run: |
-        python3.6 -m pip install numpy pyarrow pandas scipy
-        python3.6 -m pip list
-        pypy3 -m pip install numpy pandas
-        pypy3 -m pip list
-    - name: Install Python packages (Python 3.8)
-      if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
-      run: |
-        python3.8 -m pip install numpy pyarrow pandas scipy
-        python3.8 -m pip list
-    # SparkR
-    - name: Install R 4.0
-      if: contains(matrix.modules, 'sparkr')
-      run: |
-        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
-        curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
-        sudo apt-get update
-        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
-    - name: Install R packages
-      if: contains(matrix.modules, 'sparkr')
-      run: |
-        # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497.
-        sudo apt-get install -y libcurl4-openssl-dev qpdf
-        sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
-        # Show installed packages in R.
-        sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
-    # Run the tests.
-    - name: "Run tests: ${{ matrix.modules }}"
-      run: |
-        # Hive tests become flaky when running in parallel as it's too intensive.
-        if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
-        mkdir -p ~/.m2
-        ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
-        rm -rf ~/.m2/repository/org/apache/spark
-
   # Static analysis, and documentation build
   lint:
     name: Linters, licenses, dependencies and documentation generation
     runs-on: ubuntu-latest
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          docs-maven-
-    - name: Install JDK 1.8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 1.8
-    - name: Install Python 3.6
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.6
-        architecture: x64
-    - name: Install Python linter dependencies
-      run: |
-        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme
-    - name: Install R 4.0
-      run: |
-        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
-        curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
-        sudo apt-get update
-        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
-    - name: Install R linter dependencies and SparkR
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev
-        sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
-        sudo Rscript -e "devtools::install_github('jimhester/[email protected]')"
-        ./R/install-dev.sh
-    - name: Install Ruby 2.7 for documentation generation
-      uses: actions/setup-ruby@v1
-      with:
-        ruby-version: 2.7
-    - name: Install dependencies for documentation generation
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev pandoc
-        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme
-        gem install jekyll jekyll-redirect-from rouge
-        sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
-    - name: Scala linter
-      run: ./dev/lint-scala
-    - name: Java linter
-      run: ./dev/lint-java
-    - name: Python linter
-      run: ./dev/lint-python
-    - name: R linter
-      run: ./dev/lint-r
-    - name: License test
-      run: ./dev/check-license
-    - name: Dependencies test
-      run: ./dev/test-dependencies.sh
-    - name: Run documentation build
-      run: |
-        cd docs
-        jekyll build
-
-  java11:
-    name: Java 11 build
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: java11-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          java11-maven-
-    - name: Install Java 11
-      uses: actions/setup-java@v1
-      with:
-        java-version: 11
-    - name: Build with Maven
-      run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
-        export MAVEN_CLI_OPTS="--no-transfer-progress"
-        mkdir -p ~/.m2
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
-        rm -rf ~/.m2/repository/org/apache/spark
+    - name: Lint Code Base
+      uses: docker://github/super-linter:v3
+      env:
+        DEFAULT_BRANCH: master
+        VALIDATE_MD: true
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/docs/monitoring.md b/docs/monitoring.md
@@ -573,7 +573,6 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
     <code>?planDescription=[true (default) | false]</code> enables/disables Physical <code>planDescription</code> on demand when Physical Plan size is high.
     <br>
     <code>?offset=[offset]&length=[len]</code> lists queries in the given range.
-    </td>
   </tr>
   <tr>
     <td><code>/applications/[app-id]/sql/[execution-id]</code></td>
@@ -582,7 +581,6 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
     <code>?details=[true (default) | false]</code> lists/hides metric details in addition to given query details.
     <br>
     <code>?planDescription=[true (default) | false]</code> enables/disables Physical <code>planDescription</code> on demand for the given query when Physical Plan size is high.
-    </td>
   </tr>
   <tr>
     <td><code>/applications/[app-id]/environment</code></td>