feat: initial project setup with scenarios, database and web ui

Add complete mockupAWS platform for AWS cost estimation: - FastAPI backend with scenario management - PostgreSQL database schema for scenarios, metrics, logs - AWS pricing table with real pricing data - React frontend dashboard (planned) - PII detection and token counting - Report generation (PDF/CSV) - Complete test suite with pytest - Docker Compose setup - Documentation: README, PRD, Architecture - OpenCode configuration (.opencode/)
2026-04-07 12:52:18 +02:00
parent b539134280
commit 59e5cf48f0
23 changed files with 2982 additions and 29 deletions
--- a/test/test_ingest.py
+++ b/test/test_ingest.py
@@ -0,0 +1,97 @@
+import pytest
+from fastapi.testclient import TestClient
+from src.main import app
+
+# Inizializziamo il client di test di FastAPI
+client = TestClient(app)
+
+
+@pytest.fixture(autouse=True)
+def reset_metrics():
+    """Resetta i contatori prima di ogni test per garantire isolamento."""
+    client.post("/reset")
+
+
+def test_ingest_returns_200():
+    """Verifica che il webhook accetti il payload e risponda velocemente."""
+    payload = {"message": "Standard error log", "source": "nginx"}
+    response = client.post("/ingest", json=payload)
+    assert response.status_code == 200
+
+
+def test_sqs_billing_block_calculation():
+    """
+    Simula l'invio di un payload > 64KB.
+    AWS SQS fattura a blocchi di 64KB. 65KB = 2 blocchi.
+    """
+    # 65 * 1024 bytes = 66560 caratteri (circa 65KB)
+    large_payload = {"message": "A" * 66560}
+    client.post("/ingest", json=large_payload)
+
+    response = client.get("/metrics")
+    metrics = response.json()
+
+    # Ci aspettiamo 2 blocchi fatturabili
+    assert metrics["sqs_billing_blocks"] == 2
+
+
+def test_safety_first_leak_detection():
+    """
+    Verifica che se Logstash fallisce la sanitizzazione,
+    il mock lo rilevi e incrementi il contatore delle violazioni (Safety First).
+    """
+    leaky_payload = {"message": "User luca@example.com failed login"}
+    client.post("/ingest", json=leaky_payload)
+
+    response = client.get("/metrics")
+    metrics = response.json()
+
+    assert metrics["safety_violations_detected"] == 1
+
+
+def test_double_check_token_count():
+    """
+    Verifica che il tokenizer calcoli esattamente il numero di token.
+    La stringa 'Hello, world!' corrisponde a 4 token in cl100k_base.
+    (Double Check)
+    """
+    payload = {"message": "Hello, world!"}
+
+    client.post("/ingest", json=payload)
+
+    # Forziamo il processamento della coda
+    client.post("/flush")
+
+    response = client.get("/metrics")
+    metrics = response.json()
+
+    assert metrics["llm_estimated_input_tokens"] == 4
+
+
+def test_little_often_batch_deduplication():
+    """
+    Simula l'invio di 10 log identici in rapida successione.
+    Verifica che vengano deduplicati e che i token LLM
+    siano contati per 1 solo log, simulando il batching (Little Often).
+    """
+    payload = {"message": "Hello, world!"}
+
+    # Inviamo 10 messaggi identici
+    for _ in range(10):
+        client.post("/ingest", json=payload)
+
+    # Forziamo il processamento della coda
+    client.post("/flush")
+
+    response = client.get("/metrics")
+    metrics = response.json()
+
+    # 1. 10 richieste ricevute dal mock SQS
+    assert metrics["total_requests"] == 10
+
+    # 2. Almeno 1 invocazione Lambda simulata
+    assert metrics.get("lambda_simulated_invocations", 0) > 0
+
+    # 3. I token stimati devono essere solo 4 (quelli di 1 singolo messaggio),
+    # NON 40 (che avremmo senza deduplicazione).
+    assert metrics["llm_estimated_input_tokens"] == 4