Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 24 additions & 67 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ on:
- 'main'

jobs:
spark23:
spark32:
if: "! contains(toJSON(github.event.commits.*.message), '[skip test]')"
runs-on: macos-latest
name: Build and Test on Apache Spark 2.3.x
name: Build and Test on Apache Spark 3.2.x

steps:
- uses: actions/checkout@v2
Expand All @@ -48,25 +48,28 @@ jobs:
- name: Install Python packages (Python 3.7)
run: |
python -m pip install --upgrade pip
pip install pyspark==2.3.4 numpy
- name: Build Spark NLP on Apache Spark 2.3.x
pip install pyspark==3.2.1 numpy
- name: Build Spark NLP on Apache Spark 3.2.1
run: |
brew install sbt
sbt -Dis_spark23=true clean
sbt -Dis_spark23=true compile
sbt -mem 4096 -Dis_spark23=true assemblyAndCopy
- name: Test Spark NLP in Scala - Apache Spark 2.3.x
sbt -mem 4096 clean assemblyAndCopy
- name: Test Spark NLP in Scala - Apache Spark 3.2.x
run: |
sbt -mem 4096 -Dis_spark23=true test
- name: Test Spark NLP in Python - Apache Spark 2.3.x
sbt -mem 4096 coverage test
- name: Upload coverage data to Coveralls
run: sbt coverageReport coveralls
env:
COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COVERALLS_FLAG_NAME: Apache Spark 3.2.x - Scala 2.12
- name: Test Spark NLP in Python - Apache Spark 3.2.x
run: |
cd python
python3.7 -m run-tests

spark24:
spark31:
if: "! contains(toJSON(github.event.commits.*.message), '[skip test]')"
runs-on: macos-latest
name: Build and Test on Apache Spark 2.4.x
name: Build and Test on Apache Spark 3.1.x

steps:
- uses: actions/checkout@v2
Expand All @@ -82,17 +85,15 @@ jobs:
- name: Install Python packages (Python 3.7)
run: |
python -m pip install --upgrade pip
pip install pyspark==2.4.8 numpy
- name: Build Spark NLP on Apache Spark 2.4.x
pip install pyspark==3.1.3 numpy
- name: Build Spark NLP on Apache Spark 3.1.x
run: |
brew install sbt
sbt clean
sbt compile
sbt -mem 4096 -Dis_spark24=true assemblyAndCopy
- name: Test Spark NLP in Scala - Apache Spark 2.4.x
sbt -mem 4096 -Dis_spark30=true clean assemblyAndCopy
- name: Test Spark NLP in Scala - Apache Spark 3.1.x
run: |
sbt -mem 4096 -Dis_spark24=true test
- name: Test Spark NLP in Python - Apache Spark 2.4.x
sbt -mem 4096 -Dis_spark30=true test
- name: Test Spark NLP in Python - Apache Spark 3.1.x
run: |
cd python
python3.7 -m run-tests
Expand All @@ -117,55 +118,11 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install pyspark==3.0.3 numpy
- name: Build Spark NLP on Apache Spark 3.0.3
- name: Build Spark NLP on Apache Spark 3.0.x
run: |
brew install sbt
sbt clean
sbt compile
sbt -mem 4096 assemblyAndCopy
- name: Test Spark NLP in Scala - Apache Spark 3.0.x
run: |
sbt -mem 4096 test
sbt -mem 4096 -Dis_spark30=true clean assemblyAndCopy
- name: Test Spark NLP in Python - Apache Spark 3.0.x
run: |
cd python
python3.7 -m run-tests

spark32:
if: "! contains(toJSON(github.event.commits.*.message), '[skip test]')"
runs-on: macos-latest
name: Build and Test on Apache Spark 3.2.x

steps:
- uses: actions/checkout@v2
- name: Set up JDK 8
uses: actions/setup-java@v1
with:
java-version: 1.8
- name: Install Python 3.7
uses: actions/setup-python@v2
with:
python-version: 3.7.7
architecture: x64
- name: Install Python packages (Python 3.7)
run: |
python -m pip install --upgrade pip
pip install pyspark==3.2.1 numpy
- name: Build Spark NLP on Apache Spark 3.2.1
run: |
brew install sbt
sbt clean
sbt compile
sbt -mem 4096 -Dis_spark32=true assemblyAndCopy
- name: Test Spark NLP in Scala - Apache Spark 3.2.x
run: |
sbt -mem 4096 -Dis_spark32=true coverage test
- name: Upload coverage data to Coveralls
run: sbt coverageReport coveralls
env:
COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COVERALLS_FLAG_NAME: Apache Spark 3.2.x - Scala 2.12
- name: Test Spark NLP in Python - Apache Spark 3.2.x
run: |
cd python
python3.7 -m run-tests
python3.7 -m run-tests
2 changes: 1 addition & 1 deletion .github/workflows/stale.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
id: stale
with:
stale-issue-message: 'This issue is stale because it has been open 120 days with no activity. Remove stale label or comment or this will be closed in 5 days'
days-before-stale: 365
days-before-stale: 120
days-before-close: 14
exempt-issue-labels: 'backlog,bug,ob-hold,keep'
- name: Print outputs
Expand Down
6 changes: 3 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import Dependencies._
import Resolvers.m2Resolvers
import sbtassembly.MergeStrategy

name := getPackageName(is_spark23, is_spark24, is_spark32, is_gpu)
name := getPackageName(is_spark30, is_gpu)

organization := "com.johnsnowlabs.nlp"

Expand Down Expand Up @@ -217,8 +217,8 @@ inConfig(SlowTest)(Defaults.testTasks)
(Test / publishArtifact) := true

/** Copies the assembled jar to the pyspark/lib dir * */
lazy val copyAssembledJar = taskKey[Unit]("Copy assembled jar to pyspark/lib")
lazy val copyAssembledJarForPyPi = taskKey[Unit]("Copy assembled jar to pyspark/sparknlp/lib")
lazy val copyAssembledJar = taskKey[Unit]("Copy assembled jar to python/lib")
lazy val copyAssembledJarForPyPi = taskKey[Unit]("Copy assembled jar to python/sparknlp/lib")

copyAssembledJar := {
val jarFilePath = (assembly / assemblyOutputPath).value
Expand Down
54 changes: 17 additions & 37 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,69 +3,50 @@ import sbt._
object Dependencies {

/** ------- Spark version start ------- */
val spark23Ver = "2.3.4"
val spark24Ver = "2.4.8"
val spark30Ver = "3.0.3"
// Spark 3.0.x and 3.1.x are similar
val spark30Ver = "3.1.3"
val spark32Ver = "3.2.1"

val is_gpu: String = System.getProperty("is_gpu", "false")
val is_opt: String = System.getProperty("is_opt", "false")
val is_spark23: String = System.getProperty("is_spark23", "false")
val is_spark24: String = System.getProperty("is_spark24", "false")
val is_spark30: String = System.getProperty("is_spark30", "false")
val is_spark32: String = System.getProperty("is_spark32", "false")

val sparkVer: String = getSparkVersion(is_spark23, is_spark24, is_spark32)
val sparkVer: String = getSparkVersion(is_spark30)

/** ------- Spark version end ------- */

/** Package attributes */
def getPackageName(
is_spark23: String,
is_spark24: String,
is_spark32: String,
is_gpu: String): String = {
if (is_gpu.equals("true") && is_spark23.equals("true")) {
"spark-nlp-gpu-spark23"
} else if (is_gpu.equals("true") && is_spark24.equals("true")) {
"spark-nlp-gpu-spark24"
} else if (is_gpu.equals("true") && is_spark32.equals("true")) {
"spark-nlp-gpu-spark32"
} else if (is_gpu.equals("true") && is_spark32.equals("false")) {
def getPackageName(is_spark30: String, is_gpu: String): String = {
if (is_gpu.equals("true") && is_spark30.equals("true")) {
"spark-nlp-gpu-spark30"
} else if (is_gpu.equals("true") && is_spark30.equals("false")) {
"spark-nlp-gpu"
} else if (is_gpu.equals("false") && is_spark23.equals("true")) {
"spark-nlp-spark23"
} else if (is_gpu.equals("false") && is_spark24.equals("true")) {
"spark-nlp-spark24"
} else if (is_gpu.equals("false") && is_spark32.equals("true")) {
"spark-nlp-spark32"
} else if (is_gpu.equals("false") && is_spark30.equals("true")) {
"spark-nlp-spark30"
} else {
"spark-nlp"
}
}

def getSparkVersion(is_spark23: String, is_spark24: String, is_spark32: String): String = {
if (is_spark24 == "true") spark24Ver
else if (is_spark23 == "true") spark23Ver
else if (is_spark32 == "true") spark32Ver
else spark30Ver
def getSparkVersion(is_spark30: String): String = {
if (is_spark30 == "true") spark30Ver
else spark32Ver
}

def getJavaTarget(is_spark23: String, is_spark24: String): String = {
if (is_spark24.equals("true") || is_spark23.equals("true")) {
def getJavaTarget(is_spark30: String, is_spark32: String): String = {
if (is_spark30.equals("true") || is_spark32.equals("true")) {
"-target:jvm-1.8"
} else {
""
}
}

/** ------- Scala version start ------- */
lazy val scala211 = "2.11.12"
lazy val scala212 = "2.12.10"
lazy val scalaVer: String =
if (is_spark23 == "true" | is_spark24 == "true") scala211 else scala212
lazy val scalaVer: String = scala212

lazy val supportedScalaVersions: Seq[String] = List(scala212, scala211)
lazy val supportedScalaVersions: Seq[String] = List(scala212)

val scalaTestVersion = "3.2.9"

Expand All @@ -90,8 +71,7 @@ object Dependencies {
val greexVersion = "1.0"
val greex = "com.navigamez" % "greex" % greexVersion

val json4sVersion: String = if (is_spark32 == "true") "3.7.0-M11" else "3.5.3"

val json4sVersion: String = if (is_spark30 == "true") "3.7.0-M5" else "3.7.0-M11"
val json4s = "org.json4s" %% "json4s-ext" % json4sVersion

val junitVersion = "4.13.2"
Expand Down
Loading