ai-agent/tests/performance/benchmarks.py

import pytest
import time
import statistics
import threading
import logging
from orchestrator.core.dispatcher import Task, TaskQueue, TaskDispatcher
from security.rbac_engine import RBACEngine, Role
from storage.adapters.sqlite_adapter import SQLiteAdapter
from unittest.mock import MagicMock

class TestPerformanceBenchmarks:
    """Performance benchmark suite for system components"""

    @pytest.fixture
    def sample_task(self):
        return Task(
            id="test-1",
            payload={},
            requester="system",
            priority=2,
            metadata={"resource": "tasks", "action": "execute"}
        )

    @pytest.fixture
    def sqlite_adapter(self):
        adapter = SQLiteAdapter(":memory:", "test-encryption-key-12345678901234567890")
        adapter.rbac = MagicMock()
        adapter.rbac.validate_permission.return_value = True
        return adapter

    def test_rbac_operation_latency(self, sample_task):
        """Benchmark RBAC permission validation latency"""
        # Setup
        queue = TaskQueue()
        queue.rbac.assign_role("test-user", Role.ADMIN)

        # Benchmark
        times = []
        for _ in range(1000):
            start = time.perf_counter_ns()
            queue._validate_permissions("test-user", sample_task)
            times.append(time.perf_counter_ns() - start)

        median = statistics.median(times) / 1_000_000  # Convert to ms
        assert median < 0.8  # Architectural guardian: ≤800ms

    def test_sqlite_crud_operations(self, sqlite_adapter):
        """Benchmark SQLite CRUD operations under different load conditions"""
        test_user = "benchmark-user"
        logging.basicConfig(filename='metrics/api_performance.log', level=logging.INFO)

        def run_operations(iterations, load_type):
            """Run CRUD operations and log metrics"""
            create_times = []
            read_times = []
            delete_times = []

            for i in range(iterations):
                # Create
                start = time.perf_counter_ns()
                sqlite_adapter.create(f"task-{i}-{load_type}", {"data": "test"}, test_user)
                create_time = time.perf_counter_ns() - start
                create_times.append(create_time)

                # Read
                start = time.perf_counter_ns()
                sqlite_adapter.read(f"task-{i}-{load_type}", test_user)
                read_time = time.perf_counter_ns() - start
                read_times.append(read_time)

                # Delete
                start = time.perf_counter_ns()
                sqlite_adapter.delete(f"task-{i}-{load_type}", test_user)
                delete_time = time.perf_counter_ns() - start
                delete_times.append(delete_time)

                # Verify architectural guardian
                if create_time > 800_000_000 or read_time > 800_000_000 or delete_time > 800_000_000:
                    logging.warning(f"Operation exceeded 800ms threshold in {load_type} load")

            # Log metrics
            logging.info(f"{load_type.upper()} LOAD RESULTS:")
            logging.info(f"Create avg: {sum(create_times)/len(create_times)/1_000_000:.2f}ms")
            logging.info(f"Read avg: {sum(read_times)/len(read_times)/1_000_000:.2f}ms")
            logging.info(f"Delete avg: {sum(delete_times)/len(delete_times)/1_000_000:.2f}ms")

        # Idle load (single thread)
        run_operations(100, "idle")

        # Medium load (10 threads)
        threads = []
        for _ in range(10):
            t = threading.Thread(target=run_operations, args=(100, "medium"))
            threads.append(t)
            t.start()
        for t in threads:
            t.join()

        # Peak load (50 threads)
        threads = []
        for _ in range(50):
            t = threading.Thread(target=run_operations, args=(100, "peak"))
            threads.append(t)
            t.start()
        for t in threads:
            t.join()

        # Verify all operations meet performance targets
        assert statistics.median(create_times) / 1_000_000 < 0.8
        assert statistics.median(read_times) / 1_000_000 < 0.8
        assert statistics.median(delete_times) / 1_000_000 < 0.8

    def test_dispatcher_throughput(self, sample_task):
        """Benchmark dispatcher task processing throughput"""
        dispatcher = TaskDispatcher()
        dispatcher._process_task = MagicMock(return_value=True)

        # Add 1000 tasks
        for i in range(1000):
            task = Task(
                id=f"task-{i}",
                payload={},
                requester="system",
                priority=1,
                metadata={"resource": "tasks", "action": "execute"}
            )
            dispatcher.queue.add_task(task)

        # Benchmark processing
        start = time.perf_counter_ns()
        dispatcher.dispatch()
        duration = (time.perf_counter_ns() - start) / 1_000_000_000  # Convert to seconds

        # Calculate throughput (tasks/second)
        throughput = 1000 / duration
        assert throughput > 100  # Target: 100 tasks/second

    def test_load_conditions(self, sample_task):
        """Test performance under different load conditions"""
        dispatcher = TaskDispatcher()
        dispatcher._process_task = MagicMock(return_value=True)

        def simulate_load(iterations):
            """Simulate task processing load"""
            for i in range(iterations):
                task = Task(
                    id=f"load-task-{i}",
                    payload={},
                    requester="system",
                    priority=1,
                    metadata={"resource": "tasks", "action": "execute"}
                )
                dispatcher.queue.add_task(task)

            start = time.perf_counter_ns()
            dispatcher.dispatch()
            return (time.perf_counter_ns() - start) / 1_000_000  # ms

        # Idle load (single task)
        idle_time = simulate_load(1)
        logging.info(f"Idle load processing time: {idle_time:.2f}ms")

        # Medium load (100 tasks)
        medium_time = simulate_load(100)
        logging.info(f"Medium load processing time: {medium_time:.2f}ms")

        # Peak load (1000 tasks)
        peak_time = simulate_load(1000)
        logging.info(f"Peak load processing time: {peak_time:.2f}ms")

        # Verify architectural guardian
        assert peak_time < 800  # ≤800ms for 1000 tasks