Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions tests/e2e/features/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
4. after_scenario
"""

import subprocess
import time
from behave.model import Scenario
from behave.runner import Context

Expand All @@ -32,6 +34,52 @@ def before_scenario(context: Context, scenario: Scenario) -> None:

def after_scenario(context: Context, scenario: Scenario) -> None:
"""Run after each scenario is run."""
# Restore Llama Stack connection if it was disrupted
if hasattr(context, "llama_stack_was_running") and context.llama_stack_was_running:
try:
# Start the llama-stack container again
subprocess.run(
["docker", "start", "llama-stack"], check=True, capture_output=True
)

# Wait for the service to be healthy
print("Restoring Llama Stack connection...")
time.sleep(5)

# Check if it's healthy
for attempt in range(6): # Try for 30 seconds
try:
result = subprocess.run(
[
"docker",
"exec",
"llama-stack",
"curl",
"-f",
"http://localhost:8321/v1/health",
],
capture_output=True,
timeout=5,
check=True,
)
if result.returncode == 0:
print("✓ Llama Stack connection restored successfully")
break
except subprocess.TimeoutExpired:
print(f"⏱Health check timed out on attempt {attempt + 1}/6")

if attempt < 5:
print(
f"Waiting for Llama Stack to be healthy... (attempt {attempt + 1}/6)"
)
time.sleep(5)
else:
print(
"Warning: Llama Stack may not be fully healthy after restoration"
)

except subprocess.CalledProcessError as e:
print(f"Warning: Could not restore Llama Stack connection: {e}")


def before_feature(context: Context, feature: Scenario) -> None:
Expand Down
117 changes: 64 additions & 53 deletions tests/e2e/features/health.feature
Original file line number Diff line number Diff line change
@@ -1,53 +1,64 @@
# Feature: Health endpoint API tests
#TODO: fix test

# Background:
# Given The service is started locally
# And REST API service hostname is localhost
# And REST API service port is 8080
# And REST API service prefix is /v1


# Scenario: Check if service report proper readiness state
# Given The system is in default state
# When I access endpoint "readiness" using HTTP GET method
# Then The status code of the response is 200
# And The body of the response has the following schema
# """
# {
# "ready": "bool",
# "reason": "str",
# "providers": "list[str]"
# }
# """
# And The body of the response is the following
# """
# {"ready": true, "reason": "All providers are healthy", "providers": []}
# """

# Scenario: Check if service report proper readiness state when llama stack is not available
# Given The system is in default state
# And The llama-stack connection is disrupted
# When I access endpoint "readiness" using HTTP GET method
# Then The status code of the response is 503

# Scenario: Check if service report proper liveness state
# Given The system is in default state
# When I access endpoint "liveness" using HTTP GET method
# Then The status code of the response is 200
# And The body of the response has the following schema
# """
# {
# "alive": "bool"
# }
# """
# And The body of the response is the following
# """
# {"alive":true}
# """

# Scenario: Check if service report proper liveness state when llama stack is not available
# Given The system is in default state
# And The llama-stack connection is disrupted
# When I access endpoint "liveness" using HTTP GET method
# Then The status code of the response is 503
Feature: REST API tests


Background:
Given The service is started locally
And REST API service hostname is localhost
And REST API service port is 8080
And REST API service prefix is /v1


Scenario: Check if service report proper readiness state
Given The system is in default state
When I access endpoint "readiness" using HTTP GET method
Then The status code of the response is 200
And The body of the response has the following schema
"""
{
"ready": "bool",
"reason": "str",
"providers": "list[str]"
}
"""
And The body of the response is the following
"""
{"ready": true, "reason": "All providers are healthy", "providers": []}
"""


Scenario: Check if service report proper liveness state
Given The system is in default state
When I access endpoint "liveness" using HTTP GET method
Then The status code of the response is 200
And The body of the response has the following schema
"""
{
"alive": "bool"
}
"""
And The body of the response is the following
"""
{"alive": true}
"""


Scenario: Check if service report proper readiness state when llama stack is not available
Given The system is in default state
And The llama-stack connection is disrupted
When I access endpoint "readiness" using HTTP GET method
Then The status code of the response is 503
And The body of the response, ignoring the "providers" field, is the following
"""
{"ready": false, "reason": "Providers not healthy: unknown"}
"""


Scenario: Check if service report proper liveness state even when llama stack is not available
Given The system is in default state
And The llama-stack connection is disrupted
When I access endpoint "liveness" using HTTP GET method
Then The status code of the response is 200
And The body of the response is the following
"""
{"alive": true}
"""
32 changes: 29 additions & 3 deletions tests/e2e/features/steps/health.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,40 @@
"""Implementation of common test steps."""

import subprocess
import time
from behave import given # pyright: ignore[reportAttributeAccessIssue]
from behave.runner import Context


@given("The llama-stack connection is disrupted")
def llama_stack_connection_broken(context: Context) -> None:
"""Break llama_stack connection."""
# TODO: add step implementation
assert context is not None
"""Break llama_stack connection by stopping the container."""
# Store original state for restoration
context.llama_stack_was_running = False

try:
result = subprocess.run(
["docker", "inspect", "-f", "{{.State.Running}}", "llama-stack"],
capture_output=True,
text=True,
check=True,
)

if result.stdout.strip():
context.llama_stack_was_running = True
subprocess.run(
["docker", "stop", "llama-stack"], check=True, capture_output=True
)

# Wait a moment for the connection to be fully disrupted
time.sleep(2)

print("Llama Stack connection disrupted successfully")
else:
print("Llama Stack container was not running")

except subprocess.CalledProcessError as e:
print(f"Warning: Could not disrupt Llama Stack connection: {e}")


@given("the service is stopped")
Expand Down