diff --git a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml index c35a1ad8..2990b0bd 100644 --- a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml +++ b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml @@ -144,13 +144,13 @@ data: stackable.tech/vendor: Stackable spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/spark-ingest-into-lakehouse.py deps: packages: - - org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 - - org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.6 + - org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.0 + - org.apache.spark:spark-sql-kafka-0-10_2.13:4.0.1 s3connection: reference: minio sparkConf: diff --git a/demos/end-to-end-security/create-spark-report.yaml b/demos/end-to-end-security/create-spark-report.yaml index 79b32e9b..4efd848d 100644 --- a/demos/end-to-end-security/create-spark-report.yaml +++ b/demos/end-to-end-security/create-spark-report.yaml @@ -55,12 +55,12 @@ data: name: spark-report spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///stackable/spark/jobs/spark-report.py deps: packages: - - org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 + - org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.0 sparkConf: spark.driver.extraClassPath: /stackable/config/hdfs spark.executor.extraClassPath: /stackable/config/hdfs diff --git a/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml b/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml index 928037f2..46f1cd79 100644 --- a/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml +++ b/demos/spark-k8s-anomaly-detection-taxi-data/create-spark-anomaly-detection-job.yaml @@ -10,22 +10,22 @@ spec: - name: wait-for-testdata image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev command: - - bash - - -euo - - pipefail - - -c - - | + - bash + - -euo + - pipefail + - -c + - | echo 'Waiting for job load-ny-taxi-data to finish' kubectl wait --for=condition=complete --timeout=30m job/load-ny-taxi-data containers: - name: create-spark-anomaly-detection-job image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev command: - - bash - - -euo - - pipefail - - -c - - | + - bash + - -euo + - pipefail + - -c + - | echo 'Submitting Spark job' kubectl apply -f /tmp/manifest/spark-ad-job.yaml volumeMounts: @@ -51,12 +51,12 @@ data: name: spark-ad spec: sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///spark-scripts/spark-ad.py deps: packages: - - org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.8.1 + - org.apache.iceberg:iceberg-spark-runtime-4.0_2.13:1.10.1 requirements: - scikit-learn==1.4.0 s3connection: diff --git a/stacks/airflow/airflow.yaml b/stacks/airflow/airflow.yaml index b86a6c05..01094310 100644 --- a/stacks/airflow/airflow.yaml +++ b/stacks/airflow/airflow.yaml @@ -303,7 +303,7 @@ data: spec: version: "1.0" sparkImage: - productVersion: 3.5.6 + productVersion: 4.0.1 mode: cluster mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py job: @@ -331,6 +331,8 @@ data: memory: limit: 1024Mi replicas: 3 + + # {% endraw %} --- apiVersion: v1 diff --git a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml index 1d0b637f..c6639b53 100644 --- a/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/jupyterlab.yaml @@ -21,7 +21,7 @@ spec: serviceAccountName: default containers: - name: jupyterlab - image: oci.stackable.tech/stackable/spark-connect-client:3.5.6-stackable0.0.0-dev + image: oci.stackable.tech/stackable/spark-connect-client:4.0.1-stackable0.0.0-dev imagePullPolicy: IfNotPresent command: - bash @@ -39,8 +39,12 @@ spec: name: notebook initContainers: - name: download-notebook - image: oci.stackable.tech/stackable/spark-connect-client:3.5.6-stackable0.0.0-dev - command: ['sh', '-c', 'curl https://raw.githubusercontent.com/stackabletech/demos/main/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb -o /notebook/notebook.ipynb'] + image: oci.stackable.tech/stackable/spark-connect-client:4.0.1-stackable0.0.0-dev + command: + - bash + args: + - -c + - curl https://raw.githubusercontent.com/stackabletech/demos/main/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb -o /notebook/notebook.ipynb volumeMounts: - mountPath: /notebook name: notebook diff --git a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb index 3e81c879..92a9eb7b 100644 --- a/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb +++ b/stacks/jupyterhub-pyspark-hdfs/notebook.ipynb @@ -53,7 +53,7 @@ "#\n", "# See: https://issues.apache.org/jira/browse/SPARK-46032\n", "#\n", - "spark.addArtifacts(\"/stackable/spark/connect/spark-connect_2.12-3.5.6.jar\")" + "spark.addArtifacts(\"/stackable/spark/connect/spark-connect-4.0.1.jar\")" ] }, { diff --git a/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml b/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml index 5fe372b2..3bdefa71 100644 --- a/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/spark_connect.yaml @@ -30,8 +30,8 @@ spec: image: # Using an image that includes scikit-learn (among other things) # because this package needs to be available on the executors. - custom: oci.stackable.tech/stackable/spark-connect-client:3.5.6-stackable0.0.0-dev - productVersion: 3.5.6 + custom: oci.stackable.tech/stackable/spark-connect-client:4.0.1-stackable0.0.0-dev + productVersion: 4.0.1 pullPolicy: IfNotPresent args: server: