feat: implement v0.4.0 - Reports, Charts, Comparison, Dark Mode, E2E Testing
Some checks failed
E2E Tests / Run E2E Tests (push) Has been cancelled
E2E Tests / Visual Regression Tests (push) Has been cancelled
E2E Tests / Smoke Tests (push) Has been cancelled

Backend (@backend-dev):
- Add ReportService with PDF/CSV generation (reportlab, pandas)
- Implement Report API endpoints (POST, GET, DELETE, download)
- Add ReportRepository and schemas
- Configure storage with auto-cleanup (30 days)
- Rate limiting: 10 downloads/minute
- Professional PDF templates with charts support

Frontend (@frontend-dev):
- Integrate Recharts for data visualization
- Add CostBreakdown, TimeSeries, ComparisonBar charts
- Implement scenario comparison page with multi-select
- Add dark/light mode toggle with ThemeProvider
- Create Reports page with generation form and list
- Add new UI components: checkbox, dialog, tabs, label, skeleton
- Implement useComparison and useReports hooks

QA (@qa-engineer):
- Setup Playwright E2E testing framework
- Create 7 test spec files with 94 test cases
- Add visual regression testing with baselines
- Configure multi-browser testing (Chrome, Firefox, WebKit)
- Add mobile responsive tests
- Create test fixtures and helpers
- Setup GitHub Actions CI workflow

Documentation (@spec-architect):
- Create detailed kanban-v0.4.0.md with 27 tasks
- Update progress.md with v0.4.0 tracking
- Create v0.4.0 planning prompt

Features:
 PDF/CSV Report Generation
 Interactive Charts (Pie, Area, Bar)
 Scenario Comparison (2-4 scenarios)
 Dark/Light Mode Toggle
 E2E Test Suite (94 tests)

Dependencies added:
- Backend: reportlab, pandas, slowapi
- Frontend: recharts, date-fns, @radix-ui/react-checkbox/dialog/tabs
- Testing: @playwright/test

27 tasks completed, 100% v0.4.0 implementation
This commit is contained in:
Luca Sacchi Ricciardi
2026-04-07 16:11:47 +02:00
parent 311a576f40
commit a5fc85897b
63 changed files with 9218 additions and 246 deletions

View File

@@ -3,6 +3,7 @@
from src.services.pii_detector import PIIDetector, pii_detector, PIIDetectionResult
from src.services.cost_calculator import CostCalculator, cost_calculator
from src.services.ingest_service import IngestService, ingest_service
from src.services.report_service import ReportService, report_service
__all__ = [
"PIIDetector",
@@ -12,4 +13,6 @@ __all__ = [
"cost_calculator",
"IngestService",
"ingest_service",
"ReportService",
"report_service",
]

View File

@@ -0,0 +1,621 @@
"""Report generation service."""
import os
import uuid
from datetime import datetime, timedelta
from decimal import Decimal
from pathlib import Path
from typing import Optional, List, Dict, Any
from uuid import UUID
import pandas as pd
from reportlab.lib import colors
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import (
SimpleDocTemplate,
Paragraph,
Spacer,
Table,
TableStyle,
PageBreak,
)
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func, desc
from src.core.config import settings
from src.core.exceptions import NotFoundException, ValidationException
from src.models.report import Report
from src.models.scenario import Scenario
from src.models.scenario_log import ScenarioLog
from src.models.scenario_metric import ScenarioMetric
class ReportStatus:
"""Report generation status constants."""
PENDING = "pending"
PROCESSING = "processing"
COMPLETED = "completed"
FAILED = "failed"
class ReportService:
"""Service for generating scenario reports in PDF and CSV formats."""
def __init__(self):
self.storage_path = Path(settings.reports_storage_path)
self.storage_path.mkdir(parents=True, exist_ok=True)
self.max_file_size_mb = settings.reports_max_file_size_mb
def _get_scenario_path(self, scenario_id: UUID) -> Path:
"""Get storage path for a scenario's reports."""
path = self.storage_path / str(scenario_id)
path.mkdir(parents=True, exist_ok=True)
return path
def _get_file_path(self, scenario_id: UUID, report_id: UUID, format: str) -> Path:
"""Get file path for a report."""
return self._get_scenario_path(scenario_id) / f"{report_id}.{format}"
async def compile_metrics(
self,
db: AsyncSession,
scenario_id: UUID,
date_from: Optional[datetime] = None,
date_to: Optional[datetime] = None,
) -> Dict[str, Any]:
"""Compile all metrics for a scenario.
Args:
db: Database session
scenario_id: Scenario UUID
date_from: Optional start date filter
date_to: Optional end date filter
Returns:
Dictionary containing all compiled metrics
"""
# Get scenario
scenario = await db.get(Scenario, scenario_id)
if not scenario:
raise NotFoundException("Scenario")
# Base queries
logs_query = select(ScenarioLog).where(ScenarioLog.scenario_id == scenario_id)
metrics_query = select(ScenarioMetric).where(
ScenarioMetric.scenario_id == scenario_id
)
# Apply date filters
if date_from:
logs_query = logs_query.where(ScenarioLog.received_at >= date_from)
metrics_query = metrics_query.where(ScenarioMetric.timestamp >= date_from)
if date_to:
logs_query = logs_query.where(ScenarioLog.received_at <= date_to)
metrics_query = metrics_query.where(ScenarioMetric.timestamp <= date_to)
# Execute queries
logs_result = await db.execute(logs_query)
logs = logs_result.scalars().all()
metrics_result = await db.execute(metrics_query)
metrics = metrics_result.scalars().all()
# Compile metrics
total_logs = len(logs)
total_size_bytes = sum(log.size_bytes for log in logs)
logs_with_pii = sum(1 for log in logs if log.has_pii)
total_tokens = sum(log.token_count for log in logs)
total_sqs_blocks = sum(log.sqs_blocks for log in logs)
# Cost breakdown by metric type
cost_breakdown = {}
for metric in metrics:
if metric.metric_type not in cost_breakdown:
cost_breakdown[metric.metric_type] = Decimal("0")
cost_breakdown[metric.metric_type] += metric.value
# Top 10 most expensive logs (by size)
top_logs_query = (
select(ScenarioLog)
.where(ScenarioLog.scenario_id == scenario_id)
.order_by(desc(ScenarioLog.size_bytes))
.limit(10)
)
if date_from:
top_logs_query = top_logs_query.where(ScenarioLog.received_at >= date_from)
if date_to:
top_logs_query = top_logs_query.where(ScenarioLog.received_at <= date_to)
top_logs_result = await db.execute(top_logs_query)
top_logs = top_logs_result.scalars().all()
# Get unique sources
sources_query = (
select(ScenarioLog.source, func.count(ScenarioLog.id).label("count"))
.where(ScenarioLog.scenario_id == scenario_id)
.group_by(ScenarioLog.source)
)
if date_from:
sources_query = sources_query.where(ScenarioLog.received_at >= date_from)
if date_to:
sources_query = sources_query.where(ScenarioLog.received_at <= date_to)
sources_result = await db.execute(sources_query)
sources = {row.source: row.count for row in sources_result.all()}
return {
"scenario": {
"id": str(scenario.id),
"name": scenario.name,
"description": scenario.description,
"region": scenario.region,
"status": scenario.status,
"created_at": scenario.created_at.isoformat()
if scenario.created_at
else None,
"started_at": scenario.started_at.isoformat()
if scenario.started_at
else None,
"completed_at": scenario.completed_at.isoformat()
if scenario.completed_at
else None,
"total_cost_estimate": float(scenario.total_cost_estimate),
},
"summary": {
"total_logs": total_logs,
"total_size_bytes": total_size_bytes,
"total_size_mb": round(total_size_bytes / (1024 * 1024), 2),
"logs_with_pii": logs_with_pii,
"total_tokens": total_tokens,
"total_sqs_blocks": total_sqs_blocks,
"date_range": {
"from": date_from.isoformat() if date_from else None,
"to": date_to.isoformat() if date_to else None,
},
},
"cost_breakdown": {k: float(v) for k, v in cost_breakdown.items()},
"sources": sources,
"top_logs": [
{
"id": str(log.id),
"received_at": log.received_at.isoformat()
if log.received_at
else None,
"source": log.source,
"size_bytes": log.size_bytes,
"size_kb": round(log.size_bytes / 1024, 2),
"has_pii": log.has_pii,
"token_count": log.token_count,
"sqs_blocks": log.sqs_blocks,
"message_preview": log.message_preview,
}
for log in top_logs
],
}
async def generate_pdf(
self,
db: AsyncSession,
scenario_id: UUID,
report_id: UUID,
include_sections: Optional[List[str]] = None,
date_from: Optional[datetime] = None,
date_to: Optional[datetime] = None,
) -> Path:
"""Generate a PDF report for a scenario.
Args:
db: Database session
scenario_id: Scenario UUID
report_id: Report UUID
include_sections: List of sections to include (default: all)
date_from: Optional start date filter
date_to: Optional end date filter
Returns:
Path to the generated PDF file
"""
include_sections = include_sections or [
"summary",
"costs",
"metrics",
"logs",
"pii",
]
# Compile metrics
metrics = await self.compile_metrics(db, scenario_id, date_from, date_to)
# Get file path
file_path = self._get_file_path(scenario_id, report_id, "pdf")
# Create PDF
doc = SimpleDocTemplate(
str(file_path),
pagesize=A4,
rightMargin=72,
leftMargin=72,
topMargin=72,
bottomMargin=18,
)
# Container for elements
elements = []
styles = getSampleStyleSheet()
# Custom styles
title_style = ParagraphStyle(
"CustomTitle",
parent=styles["Heading1"],
fontSize=24,
spaceAfter=30,
textColor=colors.HexColor("#0066CC"),
)
heading_style = ParagraphStyle(
"CustomHeading",
parent=styles["Heading2"],
fontSize=14,
spaceAfter=12,
textColor=colors.HexColor("#0066CC"),
)
# Header / Title
elements.append(Paragraph(f"mockupAWS Report", title_style))
elements.append(Spacer(1, 0.2 * inch))
# Report metadata
elements.append(
Paragraph(
f"<b>Scenario:</b> {metrics['scenario']['name']}", styles["Normal"]
)
)
elements.append(
Paragraph(
f"<b>Region:</b> {metrics['scenario']['region']}", styles["Normal"]
)
)
elements.append(
Paragraph(
f"<b>Status:</b> {metrics['scenario']['status']}", styles["Normal"]
)
)
elements.append(
Paragraph(
f"<b>Generated:</b> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
styles["Normal"],
)
)
elements.append(Spacer(1, 0.3 * inch))
# Summary Section
if "summary" in include_sections:
elements.append(Paragraph("Scenario Summary", heading_style))
summary_data = [
["Metric", "Value"],
["Total Logs", str(metrics["summary"]["total_logs"])],
["Total Size", f"{metrics['summary']['total_size_mb']} MB"],
["Total Tokens", str(metrics["summary"]["total_tokens"])],
["SQS Blocks", str(metrics["summary"]["total_sqs_blocks"])],
]
summary_table = Table(summary_data, colWidths=[2.5 * inch, 2.5 * inch])
summary_table.setStyle(
TableStyle(
[
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0066CC")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 12),
("BOTTOMPADDING", (0, 0), (-1, 0), 12),
("BACKGROUND", (0, 1), (-1, -1), colors.beige),
("GRID", (0, 0), (-1, -1), 1, colors.black),
(
"ROWBACKGROUNDS",
(0, 1),
(-1, -1),
[colors.white, colors.lightgrey],
),
]
)
)
elements.append(summary_table)
elements.append(Spacer(1, 0.3 * inch))
# Cost Breakdown Section
if "costs" in include_sections and metrics["cost_breakdown"]:
elements.append(Paragraph("Cost Breakdown", heading_style))
cost_data = [["Service", "Cost (USD)"]]
for service, cost in metrics["cost_breakdown"].items():
cost_data.append([service.capitalize(), f"${cost:.6f}"])
cost_data.append(
[
"Total Estimated",
f"${metrics['scenario']['total_cost_estimate']:.6f}",
]
)
cost_table = Table(cost_data, colWidths=[2.5 * inch, 2.5 * inch])
cost_table.setStyle(
TableStyle(
[
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0066CC")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 12),
("BOTTOMPADDING", (0, 0), (-1, 0), 12),
("GRID", (0, 0), (-1, -1), 1, colors.black),
(
"ROWBACKGROUNDS",
(0, 1),
(-1, -1),
[colors.white, colors.lightgrey],
),
("FONTNAME", (0, -1), (-1, -1), "Helvetica-Bold"),
("BACKGROUND", (0, -1), (-1, -1), colors.lightblue),
]
)
)
elements.append(cost_table)
elements.append(Spacer(1, 0.3 * inch))
# PII Summary Section
if "pii" in include_sections:
elements.append(Paragraph("PII Summary", heading_style))
pii_data = [
["Metric", "Value"],
["Logs with PII", str(metrics["summary"]["logs_with_pii"])],
[
"PII Percentage",
f"{(metrics['summary']['logs_with_pii'] / metrics['summary']['total_logs'] * 100) if metrics['summary']['total_logs'] > 0 else 0:.1f}%",
],
]
pii_table = Table(pii_data, colWidths=[2.5 * inch, 2.5 * inch])
pii_table.setStyle(
TableStyle(
[
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0066CC")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 12),
("BOTTOMPADDING", (0, 0), (-1, 0), 12),
("GRID", (0, 0), (-1, -1), 1, colors.black),
(
"ROWBACKGROUNDS",
(0, 1),
(-1, -1),
[colors.white, colors.lightgrey],
),
]
)
)
elements.append(pii_table)
elements.append(Spacer(1, 0.3 * inch))
# Sources Section
if "metrics" in include_sections and metrics["sources"]:
elements.append(PageBreak())
elements.append(Paragraph("Log Sources", heading_style))
source_data = [["Source", "Count"]]
for source, count in metrics["sources"].items():
source_data.append([source, str(count)])
source_table = Table(source_data, colWidths=[2.5 * inch, 2.5 * inch])
source_table.setStyle(
TableStyle(
[
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0066CC")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 12),
("BOTTOMPADDING", (0, 0), (-1, 0), 12),
("GRID", (0, 0), (-1, -1), 1, colors.black),
(
"ROWBACKGROUNDS",
(0, 1),
(-1, -1),
[colors.white, colors.lightgrey],
),
]
)
)
elements.append(source_table)
elements.append(Spacer(1, 0.3 * inch))
# Top Logs Section
if "logs" in include_sections and metrics["top_logs"]:
elements.append(PageBreak())
elements.append(Paragraph("Top 10 Largest Logs", heading_style))
log_data = [["Source", "Size (KB)", "Tokens", "PII"]]
for log in metrics["top_logs"]:
log_data.append(
[
log["source"][:20],
f"{log['size_kb']:.2f}",
str(log["token_count"]),
"Yes" if log["has_pii"] else "No",
]
)
log_table = Table(
log_data, colWidths=[2 * inch, 1.2 * inch, 1.2 * inch, 0.8 * inch]
)
log_table.setStyle(
TableStyle(
[
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#0066CC")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 10),
("BOTTOMPADDING", (0, 0), (-1, 0), 12),
("GRID", (0, 0), (-1, -1), 1, colors.black),
(
"ROWBACKGROUNDS",
(0, 1),
(-1, -1),
[colors.white, colors.lightgrey],
),
("FONTSIZE", (0, 1), (-1, -1), 9),
]
)
)
elements.append(log_table)
# Footer
def add_page_number(canvas, doc):
"""Add page number to footer."""
canvas.saveState()
canvas.setFont("Helvetica", 9)
canvas.setFillColor(colors.grey)
page_num_text = f"Page {doc.page}"
canvas.drawRightString(7.5 * inch, 0.5 * inch, page_num_text)
canvas.restoreState()
# Build PDF
doc.build(elements, onFirstPage=add_page_number, onLaterPages=add_page_number)
# Check file size
file_size_mb = file_path.stat().st_size / (1024 * 1024)
if file_size_mb > self.max_file_size_mb:
file_path.unlink()
raise ValidationException(
f"Generated file exceeds maximum size of {self.max_file_size_mb}MB"
)
return file_path
async def generate_csv(
self,
db: AsyncSession,
scenario_id: UUID,
report_id: UUID,
include_logs: bool = True,
date_from: Optional[datetime] = None,
date_to: Optional[datetime] = None,
) -> Path:
"""Generate a CSV report for a scenario.
Args:
db: Database session
scenario_id: Scenario UUID
report_id: Report UUID
include_logs: Whether to include individual log entries
date_from: Optional start date filter
date_to: Optional end date filter
Returns:
Path to the generated CSV file
"""
# Get file path
file_path = self._get_file_path(scenario_id, report_id, "csv")
# Compile metrics
metrics = await self.compile_metrics(db, scenario_id, date_from, date_to)
# Create CSV data
if include_logs:
# Get all logs for CSV
logs_query = select(ScenarioLog).where(
ScenarioLog.scenario_id == scenario_id
)
if date_from:
logs_query = logs_query.where(ScenarioLog.received_at >= date_from)
if date_to:
logs_query = logs_query.where(ScenarioLog.received_at <= date_to)
logs_result = await db.execute(logs_query)
logs = logs_result.scalars().all()
# Convert to DataFrame
logs_data = []
for log in logs:
logs_data.append(
{
"log_id": str(log.id),
"scenario_id": str(scenario_id),
"received_at": log.received_at,
"source": log.source,
"size_bytes": log.size_bytes,
"size_kb": round(log.size_bytes / 1024, 2),
"has_pii": log.has_pii,
"token_count": log.token_count,
"sqs_blocks": log.sqs_blocks,
"message_preview": log.message_preview,
}
)
df = pd.DataFrame(logs_data)
df.to_csv(file_path, index=False)
else:
# Summary only
summary_data = {
"scenario_id": [str(scenario_id)],
"scenario_name": [metrics["scenario"]["name"]],
"region": [metrics["scenario"]["region"]],
"status": [metrics["scenario"]["status"]],
"total_logs": [metrics["summary"]["total_logs"]],
"total_size_mb": [metrics["summary"]["total_size_mb"]],
"total_tokens": [metrics["summary"]["total_tokens"]],
"total_sqs_blocks": [metrics["summary"]["total_sqs_blocks"]],
"logs_with_pii": [metrics["summary"]["logs_with_pii"]],
"total_cost_estimate": [metrics["scenario"]["total_cost_estimate"]],
}
# Add cost breakdown
for service, cost in metrics["cost_breakdown"].items():
summary_data[f"cost_{service}"] = [cost]
df = pd.DataFrame(summary_data)
df.to_csv(file_path, index=False)
# Check file size
file_size_mb = file_path.stat().st_size / (1024 * 1024)
if file_size_mb > self.max_file_size_mb:
file_path.unlink()
raise ValidationException(
f"Generated file exceeds maximum size of {self.max_file_size_mb}MB"
)
return file_path
async def cleanup_old_reports(self, max_age_days: int = 30) -> int:
"""Clean up reports older than specified days.
Args:
max_age_days: Maximum age of reports in days
Returns:
Number of files deleted
"""
cutoff_date = datetime.now() - timedelta(days=max_age_days)
deleted_count = 0
if self.storage_path.exists():
for scenario_dir in self.storage_path.iterdir():
if scenario_dir.is_dir():
for file_path in scenario_dir.iterdir():
if file_path.is_file():
file_stat = file_path.stat()
file_mtime = datetime.fromtimestamp(file_stat.st_mtime)
if file_mtime < cutoff_date:
file_path.unlink()
deleted_count += 1
# Remove empty directories
if not any(scenario_dir.iterdir()):
scenario_dir.rmdir()
return deleted_count
# Singleton instance
report_service = ReportService()