From c0034585252cbdeb09b61f326091e0d0ac6479ec Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 6 Aug 2020 10:22:06 +0800 Subject: [PATCH 1/4] check markdown --- .github/workflows/master.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 009ebe90ddf51..eaf101c418c7a 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -242,6 +242,12 @@ jobs: run: ./dev/check-license - name: Dependencies test run: ./dev/test-dependencies.sh + - name: Lint Code Base + uses: docker://github/super-linter:v3 + env: + DEFAULT_BRANCH: master + VALIDATE_MD: true + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Run documentation build run: | cd docs From 48ae5374a78e2d6d17f0a1970e68068059bba31e Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 6 Aug 2020 10:22:59 +0800 Subject: [PATCH 2/4] Revert "[SPARK-32525][DOCS] The layout of monitoring.html is broken" This reverts commit 0660a0501d28c9a24cb537ebaee2d8f0a78fea17. --- docs/monitoring.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index 247957d087fa3..2ab7b30a1dca9 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -573,7 +573,6 @@ can be identified by their `[attempt-id]`. In the API listed below, when running ?planDescription=[true (default) | false] enables/disables Physical planDescription on demand when Physical Plan size is high.
?offset=[offset]&length=[len] lists queries in the given range. - /applications/[app-id]/sql/[execution-id] @@ -582,7 +581,6 @@ can be identified by their `[attempt-id]`. In the API listed below, when running ?details=[true (default) | false] lists/hides metric details in addition to given query details.
?planDescription=[true (default) | false] enables/disables Physical planDescription on demand for the given query when Physical Plan size is high. - /applications/[app-id]/environment From 96108e1a67e2418fef0225caa41190a482230640 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 6 Aug 2020 10:26:22 +0800 Subject: [PATCH 3/4] remove other steps for now --- .github/workflows/master.yml | 61 ------------------------------------ 1 file changed, 61 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index eaf101c418c7a..4f1331a608b3f 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -185,73 +185,12 @@ jobs: steps: - name: Checkout Spark repository uses: actions/checkout@v2 - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} - restore-keys: | - docs-maven- - - name: Install JDK 1.8 - uses: actions/setup-java@v1 - with: - java-version: 1.8 - - name: Install Python 3.6 - uses: actions/setup-python@v2 - with: - python-version: 3.6 - architecture: x64 - - name: Install Python linter dependencies - run: | - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme - - name: Install R 4.0 - run: | - sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" - curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add - sudo apt-get update - sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev - - name: Install R linter dependencies and SparkR - run: | - sudo apt-get install -y libcurl4-openssl-dev - sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" - sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')" - ./R/install-dev.sh - - name: Install Ruby 2.7 for documentation generation - uses: actions/setup-ruby@v1 - with: - ruby-version: 2.7 - - name: Install dependencies for documentation generation - run: | - sudo apt-get install -y libcurl4-openssl-dev pandoc - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme - gem install jekyll jekyll-redirect-from rouge - sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" - - name: Scala linter - run: ./dev/lint-scala - - name: Java linter - run: ./dev/lint-java - - name: Python linter - run: ./dev/lint-python - - name: R linter - run: ./dev/lint-r - - name: License test - run: ./dev/check-license - - name: Dependencies test - run: ./dev/test-dependencies.sh - name: Lint Code Base uses: docker://github/super-linter:v3 env: DEFAULT_BRANCH: master VALIDATE_MD: true GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Run documentation build - run: | - cd docs - jekyll build java11: name: Java 11 build From 39d21fb4a4f2d7ee7d03950a4df3a54d23b009b8 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 6 Aug 2020 10:30:43 +0800 Subject: [PATCH 4/4] just lint --- .github/workflows/master.yml | 194 ----------------------------------- 1 file changed, 194 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 4f1331a608b3f..458272c11af6e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -9,175 +9,6 @@ on: - master jobs: - # TODO(SPARK-32248): Recover JDK 11 builds - # Build: build Spark and run the tests for specified modules. - build: - name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - java: - - 1.8 - hadoop: - - hadoop3.2 - hive: - - hive2.3 - # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. - # Kinesis tests depends on external Amazon kinesis service. - # Note that the modules below are from sparktestsupport/modules.py. - modules: - - |- - core, unsafe, kvstore, avro, - network-common, network-shuffle, repl, launcher, - examples, sketch, graphx - - |- - catalyst, hive-thriftserver - - |- - streaming, sql-kafka-0-10, streaming-kafka-0-10, - mllib-local, mllib, - yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl - - |- - pyspark-sql, pyspark-mllib, pyspark-resource - - |- - pyspark-core, pyspark-streaming, pyspark-ml - - |- - sparkr - # Here, we split Hive and SQL tests into some of slow ones and the rest of them. - included-tags: [""] - excluded-tags: [""] - comment: [""] - include: - # Hive tests - - modules: hive - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.SlowHiveTest - comment: "- slow tests" - - modules: hive - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - excluded-tags: org.apache.spark.tags.SlowHiveTest - comment: "- other tests" - # SQL tests - - modules: sql - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - included-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- slow tests" - - modules: sql - java: 1.8 - hadoop: hadoop3.2 - hive: hive2.3 - excluded-tags: org.apache.spark.tags.ExtendedSQLTest - comment: "- other tests" - env: - MODULES_TO_TEST: ${{ matrix.modules }} - EXCLUDED_TAGS: ${{ matrix.excluded-tags }} - INCLUDED_TAGS: ${{ matrix.included-tags }} - HADOOP_PROFILE: ${{ matrix.hadoop }} - HIVE_PROFILE: ${{ matrix.hive }} - # GitHub Actions' default miniconda to use in pip packaging test. - CONDA_PREFIX: /usr/share/miniconda - GITHUB_PREV_SHA: ${{ github.event.before }} - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - # In order to fetch changed files - with: - fetch-depth: 0 - # Cache local repositories. Note that GitHub Actions cache has a 2G limit. - - name: Cache Scala, SBT, Maven and Zinc - uses: actions/cache@v1 - with: - path: build - key: build-${{ hashFiles('**/pom.xml') }} - restore-keys: | - build- - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-maven- - - name: Cache Ivy local repository - uses: actions/cache@v2 - with: - path: ~/.ivy2/cache - key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} - restore-keys: | - ${{ matrix.java }}-${{ matrix.hadoop }}-ivy- - - name: Install JDK ${{ matrix.java }} - uses: actions/setup-java@v1 - with: - java-version: ${{ matrix.java }} - # PySpark - - name: Install PyPy3 - # Note that order of Python installations here matters because default python3 is - # overridden by pypy3. - uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') - with: - python-version: pypy3 - architecture: x64 - - name: Install Python 3.6 - uses: actions/setup-python@v2 - if: contains(matrix.modules, 'pyspark') - with: - python-version: 3.6 - architecture: x64 - - name: Install Python 3.8 - uses: actions/setup-python@v2 - # We should install one Python that is higher then 3+ for SQL and Yarn because: - # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. - # - Yarn has a Python specific test too, for example, YarnClusterSuite. - if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) - with: - python-version: 3.8 - architecture: x64 - - name: Install Python packages (Python 3.6 and PyPy3) - if: contains(matrix.modules, 'pyspark') - # PyArrow is not supported in PyPy yet, see ARROW-2651. - # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. - run: | - python3.6 -m pip install numpy pyarrow pandas scipy - python3.6 -m pip list - pypy3 -m pip install numpy pandas - pypy3 -m pip list - - name: Install Python packages (Python 3.8) - if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) - run: | - python3.8 -m pip install numpy pyarrow pandas scipy - python3.8 -m pip list - # SparkR - - name: Install R 4.0 - if: contains(matrix.modules, 'sparkr') - run: | - sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list" - curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add - sudo apt-get update - sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev - - name: Install R packages - if: contains(matrix.modules, 'sparkr') - run: | - # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497. - sudo apt-get install -y libcurl4-openssl-dev qpdf - sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" - # Show installed packages in R. - sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' - # Run the tests. - - name: "Run tests: ${{ matrix.modules }}" - run: | - # Hive tests become flaky when running in parallel as it's too intensive. - if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi - mkdir -p ~/.m2 - ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" - rm -rf ~/.m2/repository/org/apache/spark - # Static analysis, and documentation build lint: name: Linters, licenses, dependencies and documentation generation @@ -191,28 +22,3 @@ jobs: DEFAULT_BRANCH: master VALIDATE_MD: true GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - java11: - name: Java 11 build - runs-on: ubuntu-latest - steps: - - name: Checkout Spark repository - uses: actions/checkout@v2 - - name: Cache Maven local repository - uses: actions/cache@v2 - with: - path: ~/.m2/repository - key: java11-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - java11-maven- - - name: Install Java 11 - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Build with Maven - run: | - export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" - export MAVEN_CLI_OPTS="--no-transfer-progress" - mkdir -p ~/.m2 - ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install - rm -rf ~/.m2/repository/org/apache/spark