v2/benchmark/plans/testing-strategy.md
โฒ
/ \
/ \
/ E2E \
/ \
/โโโโโโโโโ\
/Integration\
/ \
/โโโโโโโโโโโโโโโ\
/ Unit \
/ \
/โโโโโโโโโโโโโโโโโโโโโ\
tests/unit/)Test individual components in isolation.
# tests/unit/core/test_benchmark_engine.py
def test_benchmark_engine_initialization():
engine = BenchmarkEngine()
assert engine.status == EngineStatus.READY
assert engine.task_queue.is_empty()
def test_benchmark_engine_task_submission():
engine = BenchmarkEngine()
task = Task(id="1", objective="test", strategy="auto")
engine.submit_task(task)
assert len(engine.task_queue) == 1
# tests/unit/strategies/test_auto_strategy.py
def test_auto_strategy_selection():
strategy = AutoStrategy()
task = Task(objective="analyze data", parameters={})
selected = strategy.select_strategy(task)
assert selected == "analysis"
def test_auto_strategy_execution():
strategy = AutoStrategy()
task = create_test_task()
result = await strategy.execute(task)
assert result.status == ResultStatus.SUCCESS
# tests/unit/modes/test_centralized_mode.py
def test_centralized_mode_agent_assignment():
mode = CentralizedMode()
agents = create_test_agents(5)
tasks = create_test_tasks(10)
assignments = mode.assign_tasks(agents, tasks)
assert len(assignments) == 10
tests/integration/)Test component interactions and data flow.
# tests/integration/test_strategy_mode_integration.py
async def test_research_strategy_with_distributed_mode():
strategy = ResearchStrategy()
mode = DistributedMode()
task = create_research_task()
result = await mode.execute_with_strategy(strategy, task)
assert result.status == ResultStatus.SUCCESS
assert result.coordination_metrics["overhead"] < 0.1
# tests/integration/test_output_integration.py
def test_json_sqlite_consistency():
results = create_test_results()
json_writer = JSONWriter()
sqlite_manager = SQLiteManager()
json_data = json_writer.export(results)
sqlite_manager.store(results)
sqlite_data = sqlite_manager.query_all()
assert normalize_data(json_data) == normalize_data(sqlite_data)
tests/performance/)Validate system performance under various conditions.
# tests/performance/test_load_handling.py
async def test_high_task_volume():
engine = BenchmarkEngine()
tasks = create_test_tasks(1000)
start_time = time.time()
results = await engine.execute_batch(tasks)
execution_time = time.time() - start_time
assert len(results) == 1000
assert execution_time < 60 # Should complete within 1 minute
assert all(r.status == ResultStatus.SUCCESS for r in results)
# tests/performance/test_resource_limits.py
def test_memory_usage_under_load():
with resource_monitor() as monitor:
engine = BenchmarkEngine()
tasks = create_memory_intensive_tasks(100)
engine.execute_batch(tasks)
assert monitor.peak_memory < 1024 * 1024 * 1024 # < 1GB
tests/e2e/)Test complete user workflows through the CLI.
# tests/e2e/test_cli_workflows.py
def test_complete_benchmark_workflow():
# Execute via CLI
result = subprocess.run([
"python", "-m", "swarm_benchmark",
"run", "test-benchmark",
"--strategy", "research",
"--mode", "distributed",
"--output", "json,sqlite"
], capture_output=True, text=True)
assert result.returncode == 0
assert "Benchmark completed successfully" in result.stdout
# Verify outputs exist
assert os.path.exists("reports/test-benchmark.json")
assert os.path.exists("reports/test-benchmark.db")
tests/fixtures/)Reusable test data and objects.
# tests/fixtures/tasks.py
def create_test_task(**kwargs):
defaults = {
"id": str(uuid.uuid4()),
"objective": "test objective",
"strategy": "auto",
"mode": "centralized",
"timeout": 60,
"max_retries": 3
}
defaults.update(kwargs)
return Task(**defaults)
def create_test_tasks(count: int) -> List[Task]:
return [create_test_task(id=str(i)) for i in range(count)]
tests/mocks/)Mock external dependencies for isolated testing.
# tests/mocks/claude_flow_client.py
class MockClaudeFlowClient:
def __init__(self):
self.calls = []
async def execute_swarm(self, objective: str, **kwargs):
self.calls.append(("execute_swarm", objective, kwargs))
return MockResult(success=True, output="mock output")
tests/utils/)Helper functions for testing.
# tests/utils/assertions.py
def assert_result_valid(result: Result):
assert result.task_id is not None
assert result.status in [ResultStatus.SUCCESS, ResultStatus.FAILURE]
assert result.execution_time >= 0
assert isinstance(result.metrics, dict)
def assert_metrics_complete(metrics: Dict[str, Any]):
required_keys = ["execution_time", "resource_usage", "quality_score"]
for key in required_keys:
assert key in metrics
# Run tests with coverage
pytest --cov=src --cov-report=html --cov-report=term
# Coverage requirements in pytest.ini
[tool:pytest]
addopts = --cov=src --cov-fail-under=95
# .pre-commit-config.yaml
repos:
- repo: local
hooks:
- id: pytest
name: pytest
entry: pytest
language: python
stages: [commit]
types: [python]
# .github/workflows/test.yml
name: Test Suite
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, 3.10, 3.11]
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install -r requirements-dev.txt
- name: Run tests
run: pytest --cov=src --cov-report=xml
- name: Upload coverage
uses: codecov/codecov-action@v1
# tests/data/generators.py
class TaskGenerator:
@staticmethod
def simple_task() -> Task:
return Task(
id="simple-001",
objective="Simple test task",
strategy="auto",
mode="centralized"
)
@staticmethod
def complex_workflow() -> List[Task]:
return [
Task(id=f"complex-{i}", objective=f"Step {i}")
for i in range(10)
]
# Setup test environment
python -m venv venv
source venv/bin/activate
pip install -r requirements-dev.txt
# Run tests
pytest
# Run with coverage
pytest --cov=src
# Run specific test category
pytest tests/unit/
pytest tests/integration/
pytest tests/performance/
# Dockerfile.test
FROM python:3.9-slim
WORKDIR /app
COPY requirements*.txt ./
RUN pip install -r requirements-dev.txt
COPY . .
CMD ["pytest", "--cov=src", "--cov-report=html"]
# Linting
flake8 tests/
pylint tests/
# Type checking
mypy tests/
# Security scanning
bandit -r tests/
This comprehensive testing strategy ensures robust, reliable, and maintainable code through systematic test-driven development practices.