Merge pull request #714 from are-ces/rhelai-support

tisnik · web-flow · commit cd3291ce9e14 · 2025-10-31T14:38:49.000+01:00
LCORE-333: Lightspeed core needs to fully support RHEL AI LLM provider
diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml
@@ -120,7 +120,7 @@ jobs:
 
       - name: Test RHAIIS connectivity
         run: |  
-          curl ${RHAIIS_URL}:8000/v1/models   -H "Authorization: Bearer ${RHAIIS_API_KEY}"  
+          curl -f ${RHAIIS_URL}:8000/v1/models   -H "Authorization: Bearer ${RHAIIS_API_KEY}"  
 
       - name: Run service manually
         run: |         
diff --git a/.github/workflows/e2e_tests_rhelai.yaml b/.github/workflows/e2e_tests_rhelai.yaml
@@ -0,0 +1,172 @@
+# .github/workflows/e2e_tests_rhelai.yaml
+name: RHEL AI E2E Tests
+
+on:
+  schedule:
+    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  workflow_dispatch:
+
+jobs:
+  e2e_tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        environment: [ "rhelai" ]
+    env:
+      RHEL_AI_URL: ${{ secrets.RHEL_AI_URL }}
+      RHEL_AI_PORT: ${{ secrets.RHEL_AI_PORT }}
+      RHEL_AI_API_KEY: ${{ secrets.RHEL_AI_API_KEY }}
+      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+      RHEL_AI_MODEL: ${{ vars.RHEL_AI_MODEL }}
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          # On PR_TARGET → the fork (or same repo) that opened the PR.
+          # On push      → falls back to the current repository.
+          repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
+
+          # On PR_TARGET → the PR head *commit* (reproducible).
+          # On push      → the pushed commit that triggered the workflow.
+          ref: ${{ github.event.pull_request.head.ref || github.sha }}
+
+          # Don’t keep credentials when running untrusted PR code under PR_TARGET.
+          persist-credentials: ${{ github.event_name != 'pull_request_target' }}
+
+      - name: Verify actual git checkout result
+        run: |
+          echo "=== Git Status After Checkout ==="
+          echo "Remote URLs:"
+          git remote -v
+          echo ""
+          echo "Current branch: $(git branch --show-current 2>/dev/null || echo 'detached HEAD')"
+          echo "Current commit: $(git rev-parse HEAD)"
+          echo "Current commit message: $(git log -1 --oneline)"
+          echo ""
+          echo "=== Recent commits (should show setup-metrics commits) ==="
+          git log --oneline -5
+
+      - uses: 1arp/create-a-file-action@0.4.5
+        with:
+          path: '.'
+          isAbsolutePath: false
+          file: 'lightspeed-stack.yaml'
+          content: |
+            name: Lightspeed Core Service (LCS)
+            service:
+              host: 0.0.0.0
+              port: 8080
+              auth_enabled: false
+              workers: 1
+              color_log: true
+              access_log: true
+            llama_stack:
+              # Uses a remote llama-stack service
+              # The instance would have already been started with a llama-stack-run.yaml file
+              use_as_library_client: false
+              # Alternative for "as library use"
+              # use_as_library_client: true
+              # library_client_config_path: <path-to-llama-stack-run.yaml-file>
+              url: http://llama-stack:8321
+              api_key: xyzzy
+            user_data_collection:
+              feedback_enabled: true
+              feedback_storage: "/tmp/data/feedback"
+              transcripts_enabled: true
+              transcripts_storage: "/tmp/data/transcripts"
+
+            authentication:
+              module: "noop"
+
+      - name: Select and configure run.yaml
+        env:
+          CONFIG_ENVIRONMENT: ${{ matrix.environment || 'rhelai' }}
+        run: |
+          CONFIGS_DIR="tests/e2e/configs"
+          ENVIRONMENT="$CONFIG_ENVIRONMENT"
+          
+          echo "Looking for configurations in $CONFIGS_DIR/"
+          
+          # List available configurations
+          if [ -d "$CONFIGS_DIR" ]; then
+            echo "Available configurations:"
+            ls -la "$CONFIGS_DIR"/*.yaml 2>/dev/null || echo "No YAML files found in $CONFIGS_DIR/"
+          else
+            echo "Configs directory '$CONFIGS_DIR' not found!"
+            exit 1
+          fi
+          
+          # Determine which config file to use
+          CONFIG_FILE="$CONFIGS_DIR/run-$ENVIRONMENT.yaml"
+          
+          echo "Looking for: $CONFIG_FILE"
+          
+          if [ -f "$CONFIG_FILE" ]; then
+            echo "Found config for environment: $ENVIRONMENT"
+            cp "$CONFIG_FILE" run.yaml
+          else
+            echo "Configuration file not found: $CONFIG_FILE"
+            echo "Available files in $CONFIGS_DIR:"
+            ls -la "$CONFIGS_DIR/"
+            exit 1
+          fi
+          
+          # Update paths for container environment (relative -> absolute)
+          sed -i 's|db_path: \.llama/distributions|db_path: /app-root/.llama/distributions|g' run.yaml
+          sed -i 's|db_path: tmp/|db_path: /app-root/.llama/distributions/|g' run.yaml
+          
+          echo "Successfully configured for environment: $ENVIRONMENT"
+          echo "Using configuration: $(basename "$CONFIG_FILE")"
+
+      - name: Test RHEL_AI connectivity
+        run: |  
+          echo $RHEL_AI_MODEL
+          curl -f ${RHEL_AI_URL}:${RHEL_AI_PORT}/v1/models   -H "Authorization: Bearer ${RHEL_AI_API_KEY}"  
+
+      - name: Run service manually
+        run: |         
+          docker compose version
+          docker compose up -d
+          
+          # Check for errors and show logs if any services failed
+          if docker compose ps | grep -E 'Exit|exited|stopped'; then
+            echo "Some services failed to start - showing logs:"
+            docker compose logs
+            exit 1
+          else
+            echo "All services started successfully"
+          fi
+
+      - name: Wait for services
+        run: |
+          echo "Waiting for services to be healthy..."
+          sleep 20  # adjust depending on boot time
+
+      - name: Quick connectivity test
+        run: |
+          echo "Testing basic connectivity before full test suite..."
+          curl -f http://localhost:8080/v1/models || {
+            echo "❌ Basic connectivity failed - showing logs before running full tests"
+            docker compose logs --tail=30
+            exit 1
+          }
+
+      - name: Run e2e tests
+        run: |
+          echo "Installing test dependencies..."
+          pip install uv
+          uv sync
+
+          echo "Running comprehensive e2e test suite..."
+          make test-e2e
+
+      - name: Show logs on failure
+        if: failure()
+        run: |
+          echo "=== Test failure logs ==="
+          echo "=== llama-stack logs ==="
+          docker compose logs llama-stack
+          
+          echo ""
+          echo "=== lightspeed-stack logs ==="
+          docker compose logs lightspeed-stack
diff --git a/README.md b/README.md
@@ -125,6 +125,7 @@ Lightspeed Core Stack (LCS) supports the large language models from the provider
 | OpenAI   | gpt-5, gpt-4o, gpt4-turbo, gpt-4.1, o1, o3, o4 | Yes          | remote::openai | [1](examples/openai-faiss-run.yaml) [2](examples/openai-pgvector-run.yaml) |
 | OpenAI   | gpt-3.5-turbo, gpt-4                           | No           | remote::openai |                                                                            |
 | RHAIIS (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhaiis.yaml)                                     |
+| RHEL AI (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhelai.yaml)                                     |
 | Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
 | Azure    |  o1, o1-mini | No          | remote::azure  |  |
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -18,6 +18,10 @@ services:
       - RHAIIS_URL=${RHAIIS_URL}
       - RHAIIS_API_KEY=${RHAIIS_API_KEY}
       - RHAIIS_MODEL=${RHAIIS_MODEL}
+      - RHEL_AI_URL=${RHEL_AI_URL}
+      - RHEL_AI_PORT=${RHEL_AI_PORT}
+      - RHEL_AI_API_KEY=${RHEL_AI_API_KEY}
+      - RHEL_AI_MODEL=${RHEL_AI_MODEL}
       - LLAMA_STACK_LOGGING=all=debug # enable llama-stack debug log
     networks:
       - lightspeednet
diff --git a/docs/providers.md b/docs/providers.md
@@ -62,6 +62,7 @@ Red Hat providers:
 | Name | Version Tested | Type | Pip Dependencies | Supported in LCS |
 |---|---|---|---|:---:|
 | RHAIIS (vllm) | 3.2.3 (on RHEL 9.20250429.0.4) | remote | `openai` | ✅ |
+| RHEL AI (vllm) | 1.5.2 | remote | `openai` | ✅ |
 
 
 ---
diff --git a/tests/e2e/configs/run-rhelai.yaml b/tests/e2e/configs/run-rhelai.yaml
@@ -0,0 +1,148 @@
+version: '2'
+image_name: rhelai-configuration
+
+apis:
+  - agents
+  - datasetio
+  - eval
+  - files
+  - inference
+  - post_training
+  - safety
+  - scoring
+  - telemetry
+  - tool_runtime
+  - vector_io
+benchmarks: []
+container_image: null
+datasets: []
+external_providers_dir: null
+inference_store:
+  db_path: .llama/distributions/ollama/inference_store.db
+  type: sqlite
+logging: null
+metadata_store:
+  db_path: .llama/distributions/ollama/registry.db
+  namespace: null
+  type: sqlite
+providers:
+  files:
+  - config:
+      storage_dir: /tmp/llama-stack-files
+      metadata_store:
+        type: sqlite
+        db_path: .llama/distributions/ollama/files_metadata.db
+    provider_id: localfs
+    provider_type: inline::localfs
+  agents:
+  - config:
+      persistence_store:
+        db_path: .llama/distributions/ollama/agents_store.db
+        namespace: null
+        type: sqlite
+      responses_store:
+        db_path: .llama/distributions/ollama/responses_store.db
+        type: sqlite
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  datasetio:
+  - config:
+      kvstore:
+        db_path: .llama/distributions/ollama/huggingface_datasetio.db
+        namespace: null
+        type: sqlite
+    provider_id: huggingface
+    provider_type: remote::huggingface
+  - config:
+      kvstore:
+        db_path: .llama/distributions/ollama/localfs_datasetio.db
+        namespace: null
+        type: sqlite
+    provider_id: localfs
+    provider_type: inline::localfs
+  eval:
+  - config:
+      kvstore:
+        db_path: .llama/distributions/ollama/meta_reference_eval.db
+        namespace: null
+        type: sqlite
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  inference:
+    - provider_id: sentence-transformers # Can be any embedding provider
+      provider_type: inline::sentence-transformers
+      config: {}
+    - provider_id: vllm
+      provider_type: remote::vllm
+      config:
+        url: http://${env.RHEL_AI_URL}:${env.RHEL_AI_PORT}/v1/
+        api_token: ${env.RHEL_AI_API_KEY}
+        tls_verify: false
+        max_tokens: 2048  
+    - provider_id: openai
+      provider_type: remote::openai
+      config:
+        api_key: ${env.OPENAI_API_KEY}
+  post_training:
+  - config:
+      checkpoint_format: huggingface
+      device: cpu
+      distributed_backend: null
+      dpo_output_dir: "."
+    provider_id: huggingface
+    provider_type: inline::huggingface-gpu
+  safety:
+  - config:
+      excluded_categories: []
+    provider_id: llama-guard
+    provider_type: inline::llama-guard
+  scoring:
+  - config: {}
+    provider_id: basic
+    provider_type: inline::basic
+  - config: {}
+    provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - config:
+      openai_api_key: '********'
+    provider_id: braintrust
+    provider_type: inline::braintrust
+  telemetry:
+  - config:
+      service_name: 'lightspeed-stack-telemetry'
+      sinks: sqlite
+      sqlite_db_path: .llama/distributions/ollama/trace_store.db
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  tool_runtime:
+    - provider_id: model-context-protocol
+      provider_type: remote::model-context-protocol
+      config: {}
+scoring_fns: []
+server:
+  auth: null
+  host: null
+  port: 8321
+  quota: null
+  tls_cafile: null
+  tls_certfile: null
+  tls_keyfile: null
+shields: 
+  - shield_id: llama-guard-shield
+    provider_id: llama-guard
+    provider_shield_id: "gpt-4-turbo"
+models:
+  - metadata:
+      embedding_dimension: 768 # Depends on chosen model
+    model_id: sentence-transformers/all-mpnet-base-v2 # Example embedding model
+    provider_id: sentence-transformers
+    provider_model_id: sentence-transformers/all-mpnet-base-v2 # Location of embedding model
+    model_type: embedding
+  - model_id: ${env.RHEL_AI_MODEL}
+    provider_id: vllm
+    model_type: llm
+    provider_model_id: ${env.RHEL_AI_MODEL}
+  - model_id: gpt-4-turbo
+    provider_id: openai
+    model_type: llm
+    provider_model_id: gpt-4-turbo