ai-agent/tests/performance/benchmarks.py

172 lines
No EOL
6.5 KiB
Python

import pytest
import time
import statistics
import threading
import logging
from orchestrator.core.dispatcher import Task, TaskQueue, TaskDispatcher
from security.rbac_engine import RBACEngine, Role
from storage.adapters.sqlite_adapter import SQLiteAdapter
from unittest.mock import MagicMock
class TestPerformanceBenchmarks:
"""Performance benchmark suite for system components"""
@pytest.fixture
def sample_task(self):
return Task(
id="test-1",
payload={},
requester="system",
priority=2,
metadata={"resource": "tasks", "action": "execute"}
)
@pytest.fixture
def sqlite_adapter(self):
adapter = SQLiteAdapter(":memory:", "test-encryption-key-12345678901234567890")
adapter.rbac = MagicMock()
adapter.rbac.validate_permission.return_value = True
return adapter
def test_rbac_operation_latency(self, sample_task):
"""Benchmark RBAC permission validation latency"""
# Setup
queue = TaskQueue()
queue.rbac.assign_role("test-user", Role.ADMIN)
# Benchmark
times = []
for _ in range(1000):
start = time.perf_counter_ns()
queue._validate_permissions("test-user", sample_task)
times.append(time.perf_counter_ns() - start)
median = statistics.median(times) / 1_000_000 # Convert to ms
assert median < 0.8 # Architectural guardian: ≤800ms
def test_sqlite_crud_operations(self, sqlite_adapter):
"""Benchmark SQLite CRUD operations under different load conditions"""
test_user = "benchmark-user"
logging.basicConfig(filename='metrics/api_performance.log', level=logging.INFO)
def run_operations(iterations, load_type):
"""Run CRUD operations and log metrics"""
create_times = []
read_times = []
delete_times = []
for i in range(iterations):
# Create
start = time.perf_counter_ns()
sqlite_adapter.create(f"task-{i}-{load_type}", {"data": "test"}, test_user)
create_time = time.perf_counter_ns() - start
create_times.append(create_time)
# Read
start = time.perf_counter_ns()
sqlite_adapter.read(f"task-{i}-{load_type}", test_user)
read_time = time.perf_counter_ns() - start
read_times.append(read_time)
# Delete
start = time.perf_counter_ns()
sqlite_adapter.delete(f"task-{i}-{load_type}", test_user)
delete_time = time.perf_counter_ns() - start
delete_times.append(delete_time)
# Verify architectural guardian
if create_time > 800_000_000 or read_time > 800_000_000 or delete_time > 800_000_000:
logging.warning(f"Operation exceeded 800ms threshold in {load_type} load")
# Log metrics
logging.info(f"{load_type.upper()} LOAD RESULTS:")
logging.info(f"Create avg: {sum(create_times)/len(create_times)/1_000_000:.2f}ms")
logging.info(f"Read avg: {sum(read_times)/len(read_times)/1_000_000:.2f}ms")
logging.info(f"Delete avg: {sum(delete_times)/len(delete_times)/1_000_000:.2f}ms")
# Idle load (single thread)
run_operations(100, "idle")
# Medium load (10 threads)
threads = []
for _ in range(10):
t = threading.Thread(target=run_operations, args=(100, "medium"))
threads.append(t)
t.start()
for t in threads:
t.join()
# Peak load (50 threads)
threads = []
for _ in range(50):
t = threading.Thread(target=run_operations, args=(100, "peak"))
threads.append(t)
t.start()
for t in threads:
t.join()
# Verify all operations meet performance targets
assert statistics.median(create_times) / 1_000_000 < 0.8
assert statistics.median(read_times) / 1_000_000 < 0.8
assert statistics.median(delete_times) / 1_000_000 < 0.8
def test_dispatcher_throughput(self, sample_task):
"""Benchmark dispatcher task processing throughput"""
dispatcher = TaskDispatcher()
dispatcher._process_task = MagicMock(return_value=True)
# Add 1000 tasks
for i in range(1000):
task = Task(
id=f"task-{i}",
payload={},
requester="system",
priority=1,
metadata={"resource": "tasks", "action": "execute"}
)
dispatcher.queue.add_task(task)
# Benchmark processing
start = time.perf_counter_ns()
dispatcher.dispatch()
duration = (time.perf_counter_ns() - start) / 1_000_000_000 # Convert to seconds
# Calculate throughput (tasks/second)
throughput = 1000 / duration
assert throughput > 100 # Target: 100 tasks/second
def test_load_conditions(self, sample_task):
"""Test performance under different load conditions"""
dispatcher = TaskDispatcher()
dispatcher._process_task = MagicMock(return_value=True)
def simulate_load(iterations):
"""Simulate task processing load"""
for i in range(iterations):
task = Task(
id=f"load-task-{i}",
payload={},
requester="system",
priority=1,
metadata={"resource": "tasks", "action": "execute"}
)
dispatcher.queue.add_task(task)
start = time.perf_counter_ns()
dispatcher.dispatch()
return (time.perf_counter_ns() - start) / 1_000_000 # ms
# Idle load (single task)
idle_time = simulate_load(1)
logging.info(f"Idle load processing time: {idle_time:.2f}ms")
# Medium load (100 tasks)
medium_time = simulate_load(100)
logging.info(f"Medium load processing time: {medium_time:.2f}ms")
# Peak load (1000 tasks)
peak_time = simulate_load(1000)
logging.info(f"Peak load processing time: {peak_time:.2f}ms")
# Verify architectural guardian
assert peak_time < 800 # ≤800ms for 1000 tasks