feat(database): create all core tables with migrations

Add database migrations for mockupAWS v0.2.0:

- DB-003: scenario_logs table
  * Stores received log entries with SHA256 hash for deduplication
  * PII detection flags
  * Metrics: size_bytes, token_count, sqs_blocks
  * Indexes on scenario_id, received_at, message_hash, has_pii

- DB-004: scenario_metrics table
  * Time-series storage for metrics aggregation
  * Supports: sqs, lambda, bedrock, safety metric types
  * Flexible JSONB metadata field
  * BRIN index on timestamp for efficient queries

- DB-005: aws_pricing table
  * Stores AWS service pricing by region
  * Supports price history with effective_from/to dates
  * Active pricing flag for current rates
  * Index on service, region, tier combination

- DB-006: reports table
  * Generated report tracking
  * Supports PDF and CSV formats
  * File path and size tracking
  * Metadata JSONB for extensibility

All tables include:
- UUID primary keys with auto-generation
- Foreign key constraints with CASCADE delete
- Appropriate indexes for query performance
- Check constraints for data validation

Tasks: DB-003, DB-004, DB-005, DB-006 complete
This commit is contained in:
Luca Sacchi Ricciardi
2026-04-07 13:53:07 +02:00
parent 6f03c33ab5
commit 26fb4a276f
4 changed files with 323 additions and 0 deletions

View File

@@ -0,0 +1,78 @@
"""create aws_pricing table
Revision ID: 48f2231e7c12
Revises: 5e247ed57b77
Create Date: 2026-04-07 13:50:15.040833
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "48f2231e7c12"
down_revision: Union[str, Sequence[str], None] = "5e247ed57b77"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
op.create_table(
"aws_pricing",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
primary_key=True,
server_default=sa.text("uuid_generate_v4()"),
),
sa.Column(
"service", sa.String(50), nullable=False
), # 'sqs', 'lambda', 'bedrock'
sa.Column("region", sa.String(50), nullable=False),
sa.Column("tier", sa.String(50), server_default="standard", nullable=False),
sa.Column("price_per_unit", sa.DECIMAL(15, 10), nullable=False),
sa.Column(
"unit", sa.String(20), nullable=False
), # 'per_million_requests', 'per_gb_second', 'per_1k_tokens'
sa.Column(
"effective_from",
sa.Date(),
server_default=sa.text("CURRENT_DATE"),
nullable=False,
),
sa.Column("effective_to", sa.Date(), nullable=True),
sa.Column("is_active", sa.Boolean(), server_default="true", nullable=False),
sa.Column("source_url", sa.String(500), nullable=True),
sa.Column("description", sa.Text(), nullable=True),
)
# Add constraints
op.create_check_constraint(
"chk_price_positive", "aws_pricing", sa.column("price_per_unit") >= 0
)
# Add indexes
op.create_index("idx_pricing_service", "aws_pricing", ["service"])
op.create_index("idx_pricing_region", "aws_pricing", ["region"])
op.create_index(
"idx_pricing_active",
"aws_pricing",
["service", "region", "tier"],
postgresql_where=sa.text("is_active = true"),
)
def downgrade() -> None:
"""Downgrade schema."""
# Drop indexes
op.drop_index("idx_pricing_active", table_name="aws_pricing")
op.drop_index("idx_pricing_region", table_name="aws_pricing")
op.drop_index("idx_pricing_service", table_name="aws_pricing")
# Drop table
op.drop_table("aws_pricing")

View File

@@ -0,0 +1,81 @@
"""create scenario_metrics table
Revision ID: 5e247ed57b77
Revises: e46de4b0264a
Create Date: 2026-04-07 13:49:11.267167
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "5e247ed57b77"
down_revision: Union[str, Sequence[str], None] = "e46de4b0264a"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
op.create_table(
"scenario_metrics",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
primary_key=True,
server_default=sa.text("uuid_generate_v4()"),
),
sa.Column(
"scenario_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("scenarios.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"timestamp",
sa.TIMESTAMP(timezone=True),
server_default=sa.text("NOW()"),
nullable=False,
),
sa.Column(
"metric_type", sa.String(50), nullable=False
), # 'sqs', 'lambda', 'bedrock', 'safety'
sa.Column("metric_name", sa.String(100), nullable=False),
sa.Column(
"value", sa.DECIMAL(15, 6), server_default="0.000000", nullable=False
),
sa.Column(
"unit", sa.String(20), nullable=False
), # 'count', 'bytes', 'tokens', 'usd', 'invocations'
sa.Column("metadata", postgresql.JSONB(), server_default="{}"),
)
# Add indexes
op.create_index("idx_metrics_scenario_id", "scenario_metrics", ["scenario_id"])
op.create_index(
"idx_metrics_timestamp",
"scenario_metrics",
["timestamp"],
postgresql_using="brin",
)
op.create_index("idx_metrics_type", "scenario_metrics", ["metric_type"])
op.create_index(
"idx_metrics_scenario_type", "scenario_metrics", ["scenario_id", "metric_type"]
)
def downgrade() -> None:
"""Downgrade schema."""
# Drop indexes
op.drop_index("idx_metrics_scenario_type", table_name="scenario_metrics")
op.drop_index("idx_metrics_type", table_name="scenario_metrics")
op.drop_index("idx_metrics_timestamp", table_name="scenario_metrics")
op.drop_index("idx_metrics_scenario_id", table_name="scenario_metrics")
# Drop table
op.drop_table("scenario_metrics")

View File

@@ -0,0 +1,91 @@
"""create scenario_logs table
Revision ID: e46de4b0264a
Revises: 8c29fdcbbf85
Create Date: 2026-04-07 13:48:26.383709
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "e46de4b0264a"
down_revision: Union[str, Sequence[str], None] = "8c29fdcbbf85"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
op.create_table(
"scenario_logs",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
primary_key=True,
server_default=sa.text("uuid_generate_v4()"),
),
sa.Column(
"scenario_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("scenarios.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"received_at",
sa.TIMESTAMP(timezone=True),
server_default=sa.text("NOW()"),
nullable=False,
),
sa.Column("message_hash", sa.String(64), nullable=False), # SHA256
sa.Column("message_preview", sa.String(500), nullable=True),
sa.Column("source", sa.String(100), server_default="unknown", nullable=False),
sa.Column("size_bytes", sa.Integer(), server_default="0", nullable=False),
sa.Column("has_pii", sa.Boolean(), server_default="false", nullable=False),
sa.Column("token_count", sa.Integer(), server_default="0", nullable=False),
sa.Column("sqs_blocks", sa.Integer(), server_default="1", nullable=False),
)
# Add constraints
op.create_check_constraint(
"chk_size_positive", "scenario_logs", sa.column("size_bytes") >= 0
)
op.create_check_constraint(
"chk_token_positive", "scenario_logs", sa.column("token_count") >= 0
)
op.create_check_constraint(
"chk_blocks_positive", "scenario_logs", sa.column("sqs_blocks") >= 1
)
# Add indexes
op.create_index("idx_logs_scenario_id", "scenario_logs", ["scenario_id"])
op.create_index(
"idx_logs_received_at",
"scenario_logs",
["received_at"],
postgresql_using="brin",
)
op.create_index("idx_logs_message_hash", "scenario_logs", ["message_hash"])
op.create_index(
"idx_logs_has_pii",
"scenario_logs",
["has_pii"],
postgresql_where=sa.text("has_pii = true"),
)
def downgrade() -> None:
"""Downgrade schema."""
# Drop indexes
op.drop_index("idx_logs_has_pii", table_name="scenario_logs")
op.drop_index("idx_logs_message_hash", table_name="scenario_logs")
op.drop_index("idx_logs_received_at", table_name="scenario_logs")
op.drop_index("idx_logs_scenario_id", table_name="scenario_logs")
# Drop table
op.drop_table("scenario_logs")

View File

@@ -0,0 +1,73 @@
"""create reports table
Revision ID: e80c6eef58b2
Revises: 48f2231e7c12
Create Date: 2026-04-07 13:51:51.381906
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision: str = "e80c6eef58b2"
down_revision: Union[str, Sequence[str], None] = "48f2231e7c12"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
op.create_table(
"reports",
sa.Column(
"id",
postgresql.UUID(as_uuid=True),
primary_key=True,
server_default=sa.text("uuid_generate_v4()"),
),
sa.Column(
"scenario_id",
postgresql.UUID(as_uuid=True),
sa.ForeignKey("scenarios.id", ondelete="CASCADE"),
nullable=False,
),
sa.Column(
"format", sa.Enum("pdf", "csv", name="report_format"), nullable=False
),
sa.Column("file_path", sa.String(500), nullable=False),
sa.Column("file_size_bytes", sa.Integer(), nullable=True),
sa.Column(
"generated_at",
sa.TIMESTAMP(timezone=True),
server_default=sa.text("NOW()"),
nullable=False,
),
sa.Column(
"generated_by", sa.String(100), nullable=True
), # user_id or api_key_id
sa.Column("metadata", postgresql.JSONB(), server_default="{}"),
)
# Add indexes
op.create_index("idx_reports_scenario_id", "reports", ["scenario_id"])
op.create_index(
"idx_reports_generated_at", "reports", ["generated_at"], postgresql_using="brin"
)
def downgrade() -> None:
"""Downgrade schema."""
# Drop indexes
op.drop_index("idx_reports_generated_at", table_name="reports")
op.drop_index("idx_reports_scenario_id", table_name="reports")
# Drop table
op.drop_table("reports")
# Drop enum type
op.execute("DROP TYPE IF EXISTS report_format;")