pycoder49
diff --git a/‎.config/nextest.toml‎
Lines changed: 7 additions & 0 deletions b/‎.config/nextest.toml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎.github/workflows/batch-test.yml‎
Lines changed: 1 addition & 13 deletions b/‎.github/workflows/batch-test.yml‎
Lines changed: 1 addition & 13 deletions
diff --git a/‎.github/workflows/general.yml‎
Lines changed: 22 additions & 21 deletions b/‎.github/workflows/general.yml‎
Lines changed: 22 additions & 21 deletions
diff --git a/‎.github/workflows/helm-publish.yml‎
Lines changed: 171 additions & 0 deletions b/‎.github/workflows/helm-publish.yml‎
Lines changed: 171 additions & 0 deletions
@@ -57,6 +57,13 @@ e2e_aws_sagemaker_tgi = { max-threads = 1 }
 e2e_aws_sagemaker_openai = { max-threads = 2 }
 e2e_groq = { max-threads = 1 }
 
+# Require that nothing else runs at the same time as a Jaeger test,
+# so that we can find our Jaeger traces (without other tests
+# flushing our target spans out of the Jaeger buffer).
+[[profile.default.overrides]]
+filter = 'test(test_jaeger)'
+threads-required = 'num-test-threads'
+
 [[profile.default.overrides]]
 filter = 'binary(e2e) and test(providers::aws_bedrock::)'
 test-group = 'e2e_aws_bedrock'
 
@@ -34,25 +34,13 @@ on:
 
 jobs:
   batch-tests:
-    runs-on: namespace-profile-tensorzero-8x16
+    runs-on: ubuntu-latest
 
     timeout-minutes: 15
 
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
 
-      - name: Install Namespace CLI
-        uses: namespacelabs/nscloud-setup@d1c625762f7c926a54bd39252efff0705fd11c64
-
-      - name: Configure Namespace-powered Buildx
-        uses: namespacelabs/nscloud-setup-buildx-action@84ca8c58fdf372d6a4750476cd09b7b96ee778ca
-
-      - name: Configure Namespace cache for Rust
-        uses: namespacelabs/nscloud-cache-action@2f50e7d0f70475e6f59a55ba0f05eec9108e77cc
-        with:
-          cache: |
-            rust
-
       - uses: dtolnay/rust-toolchain@stable
 
       - name: Login to DockerHub
 
@@ -6,6 +6,12 @@ on:
   pull_request:
     branches: ["main"]
 
+# When triggered from the merge queue, cancel any existing workflow runs for the same PR branch
+# Otherwise, use the unique run id for the concurrency group, to prevent anything from getting cancelled
+concurrency:
+  group: ${{ github.event_name == 'merge_group' && format('{0}-{1}', github.workflow, github.ref) || github.run_id }}
+  cancel-in-progress: true
+
 env:
   FORCE_COLOR: 1
   TENSORZERO_CLICKHOUSE_URL: "http://chuser:chpassword@localhost:8123/tensorzero"
@@ -46,6 +52,11 @@ jobs:
           docker load < gateway-container.tar
           docker load < ui-container.tar
 
+      - name: Create the `object_storage` directory for the multimodal-vision-finetuning example
+        run: |
+          mkdir -p examples/multimodal-vision-finetuning/object_storage
+          chmod 777 examples/multimodal-vision-finetuning/object_storage
+
       - name: Check latest docker-compose
         run: ./ci/check-all-docker-compose.sh
 
@@ -76,6 +87,11 @@ jobs:
         uses: namespacelabs/nscloud-setup-buildx-action@84ca8c58fdf372d6a4750476cd09b7b96ee778ca
         continue-on-error: ${{ github.event.pull_request.head.repo.full_name != github.repository || github.actor == 'dependabot[bot]' }}
 
+      - name: Create the `object_storage` directory for the multimodal-vision-finetuning example
+        run: |
+          mkdir -p examples/multimodal-vision-finetuning/object_storage
+          chmod 777 examples/multimodal-vision-finetuning/object_storage
+
       - name: Check all docker-compose.yml files
         run: ./ci/check-all-docker-compose.sh
 
@@ -133,7 +149,7 @@ jobs:
   validate:
     runs-on: namespace-profile-tensorzero-8x16
 
-    timeout-minutes: 20
+    timeout-minutes: 30
 
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
@@ -289,18 +305,6 @@ jobs:
         run: |
           uv run stubtest tensorzero.tensorzero
 
-      - name: "Python: OpenAI Client: Install dependencies"
-        working-directory: clients/openai-python
-        run: |
-          uv venv
-          uv pip sync requirements.txt
-
-      - name: "Python: OpenAI Client: pyright"
-        working-directory: clients/openai-python
-        run: |
-          uv pip install pyright==1.1.394
-          uv run pyright
-
       - name: "Node.js: Run prettier"
         run: pnpm --filter=openai-node run format
 
@@ -355,7 +359,7 @@ jobs:
     # We don't run many tests here, so use a normal runner with Github Actions caching
     # to avoid unnecessarily using Namespace credits (it should still always finish before
     # the main 'validate' job)
-    runs-on: ${{ matrix.replicated && 'namespace-profile-tensorzero-large-cache-volume' || 'ubuntu-latest' }}
+    runs-on: ${{ matrix.replicated && 'namespace-profile-tensorzero-16x32' || 'ubuntu-latest' }}
     continue-on-error: ${{ matrix.clickhouse_version.allow_failure }}
     strategy:
       matrix:
@@ -365,15 +369,9 @@ jobs:
           - tag: "24.12-alpine"
             prefix: "24.12"
             allow_failure: false
-          - tag: "25.2-alpine"
-            prefix: "25.2"
-            allow_failure: false
           - tag: "latest-alpine"
             prefix: ""
-            # ClickHouse can make new releases at any time, which might break our tests.
-            # We allow this job to fail to avoid blocking CI whenever this happens.
-            # However, we'll still want to fix the failing tests soon after we notice the failure
-            allow_failure: true
+            allow_failure: false
 
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
@@ -517,6 +515,7 @@ jobs:
       OPENAI_API_KEY: not_used
       FIREWORKS_API_KEY: not_used
       FIREWORKS_ACCOUNT_ID: not_used
+      TOGETHER_API_KEY: not_used
       TENSORZERO_USE_MOCK_INFERENCE_PROVIDER: 1
       TENSORZERO_SKIP_LARGE_FIXTURES: 1
       R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
@@ -581,6 +580,8 @@ jobs:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }}
       FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
+      S3_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+      S3_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
 
   # See 'ci/README.md' at the repository root for more details.
   check-all-general-jobs-passed:
 
@@ -0,0 +1,171 @@
+# This workflow publishes the Helm chart to R2 when we tag a release.
+name: Publish Helm chart
+
+on:
+  workflow_dispatch:
+  release:
+    types: [released]
+
+jobs:
+  publish-helm-chart:
+    name: Publish Helm chart to R2
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    # Required secrets:
+    # - HELM_PUBLISH_ACCESS_KEY_ID: Cloudflare R2 access key ID
+    # - HELM_PUBLISH_SECRET_ACCESS_KEY: Cloudflare R2 secret access key
+    # Required variables:
+    # - R2_ENDPOINT_URL: Cloudflare R2 endpoint URL (e.g., https://ACCOUNT_ID.r2.cloudflarestorage.com)
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v4
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v4
+        with:
+          version: "latest"
+
+      - name: Extract version from Cargo.toml
+        id: version
+        run: |
+          echo "=== Extracting version from Cargo.toml ==="
+          echo "Current directory: $(pwd)"
+          echo "Cargo.toml contents (first 30 lines):"
+          head -30 Cargo.toml
+          echo ""
+          echo "Looking for version line..."
+          # Extract version from Cargo.toml workspace.package section
+          VERSION=$(grep -E '^version = ' Cargo.toml | head -1 | sed 's/version = "\(.*\)"/\1/')
+          echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT
+          echo "Extracted version: ${VERSION}"
+          echo "========================="
+
+      - name: Update Chart.yaml version
+        run: |
+          cd examples/production-deployment-k8s-helm
+          echo "=== Updating Chart.yaml ==="
+          echo "Current directory: $(pwd)"
+          echo ""
+          echo "Chart.yaml BEFORE updates:"
+          echo "----------------------------"
+          cat Chart.yaml
+          echo "----------------------------"
+          echo ""
+          # Update the version field in Chart.yaml
+          sed -i "s/^version:.*/version: ${{ steps.version.outputs.VERSION }}/" Chart.yaml
+          # Update the appVersion field in Chart.yaml to match
+          sed -i "s/^appVersion:.*/appVersion: \"${{ steps.version.outputs.VERSION }}\"/" Chart.yaml
+          echo "Chart.yaml AFTER updates:"
+          echo "----------------------------"
+          cat Chart.yaml
+          echo "----------------------------"
+          echo "========================="
+
+      - name: Package Helm chart
+        run: |
+          cd examples/production-deployment-k8s-helm
+          echo "=== Packaging Helm chart ==="
+          echo "Current directory: $(pwd)"
+          echo ""
+          echo "Files in directory before packaging:"
+          ls -la
+          echo ""
+          helm package .
+          echo ""
+          echo "Files in directory after packaging:"
+          ls -la
+          echo ""
+          echo "Package details:"
+          for pkg in tensorzero-*.tgz; do
+            if [ -f "$pkg" ]; then
+              echo "  Package: $pkg"
+              echo "  Size: $(ls -lh $pkg | awk '{print $5}')"
+              echo "  MD5: $(md5sum $pkg | awk '{print $1}')"
+              echo "  SHA256: $(sha256sum $pkg | awk '{print $1}')"
+              echo ""
+              echo "  Package contents:"
+              tar -tzf "$pkg" | head -20
+              echo "  ..."
+            fi
+          done
+          echo "========================="
+
+      - name: Configure AWS CLI for R2
+        run: |
+          echo "=== Configuring AWS CLI for R2 ==="
+          echo "R2 endpoint: ${{ vars.R2_ENDPOINT_URL }}"
+          aws configure set aws_access_key_id ${{ secrets.HELM_PUBLISH_ACCESS_KEY_ID }}
+          aws configure set aws_secret_access_key ${{ secrets.HELM_PUBLISH_SECRET_ACCESS_KEY }}
+          aws configure set default.region auto
+          aws configure set default.s3.signature_version s3v4
+          echo "AWS CLI configured"
+          echo "========================="
+        env:
+          R2_ACCESS_KEY_ID: ${{ secrets.HELM_PUBLISH_ACCESS_KEY_ID }}
+          R2_SECRET_ACCESS_KEY: ${{ secrets.HELM_PUBLISH_SECRET_ACCESS_KEY }}
+
+      - name: Download existing index.yaml
+        continue-on-error: true
+        run: |
+          cd examples/production-deployment-k8s-helm
+          echo "=== Downloading existing index.yaml from R2 ==="
+          echo "Current directory: $(pwd)"
+          echo "R2 endpoint: ${{ vars.R2_ENDPOINT_URL }}"
+          echo ""
+          # Try to download existing index.yaml from R2
+          if aws s3 cp s3://tensorzero-helm-charts/index.yaml ./existing-index.yaml \
+            --endpoint-url ${{ vars.R2_ENDPOINT_URL }}; then
+            echo "Successfully downloaded existing index.yaml"
+            echo ""
+            echo "Existing index.yaml contents:"
+            echo "----------------------------"
+            cat existing-index.yaml
+            echo "----------------------------"
+          else
+            echo "No existing index.yaml found (this is normal for first run)"
+          fi
+          echo "========================="
+
+      - name: Generate index.yaml
+        run: |
+          cd examples/production-deployment-k8s-helm
+          echo "=== Generating index.yaml ==="
+          echo "Current directory: $(pwd)"
+          echo ""
+          echo "Package files present:"
+          ls -la tensorzero-*.tgz 2>/dev/null || echo "No package files found"
+          echo ""
+          if [ -f existing-index.yaml ]; then
+            echo "Merging with existing index.yaml"
+            helm repo index . --url https://helm.tensorzero.com --merge existing-index.yaml
+          else
+            echo "Creating new index.yaml (no existing index to merge)"
+            helm repo index . --url https://helm.tensorzero.com
+          fi
+          echo ""
+          echo "Generated index.yaml contents:"
+          echo "----------------------------"
+          cat index.yaml
+          echo "----------------------------"
+          echo ""
+          echo "index.yaml file details:"
+          echo "  Size: $(ls -lh index.yaml | awk '{print $5}')"
+          echo "  Lines: $(wc -l < index.yaml)"
+          echo "  MD5: $(md5sum index.yaml | awk '{print $1}')"
+          echo "========================="
+
+      - name: Upload chart package to R2
+        run: |
+          cd examples/production-deployment-k8s-helm
+          CHART_FILE=$(ls tensorzero-*.tgz)
+          aws s3 cp ${CHART_FILE} s3://tensorzero-helm-charts/${CHART_FILE} \
+            --endpoint-url ${{ vars.R2_ENDPOINT_URL }}
+          echo "Uploaded ${CHART_FILE} to R2"
+
+      - name: Upload index.yaml to R2
+        run: |
+          cd examples/production-deployment-k8s-helm
+          aws s3 cp index.yaml s3://tensorzero-helm-charts/index.yaml \
+            --endpoint-url ${{ vars.R2_ENDPOINT_URL }}
+          echo "Uploaded index.yaml to R2"