From 26fb4a276f16371e7bc30cb41d52eeab22a1517b Mon Sep 17 00:00:00 2001 From: Luca Sacchi Ricciardi Date: Tue, 7 Apr 2026 13:53:07 +0200 Subject: [PATCH] feat(database): create all core tables with migrations Add database migrations for mockupAWS v0.2.0: - DB-003: scenario_logs table * Stores received log entries with SHA256 hash for deduplication * PII detection flags * Metrics: size_bytes, token_count, sqs_blocks * Indexes on scenario_id, received_at, message_hash, has_pii - DB-004: scenario_metrics table * Time-series storage for metrics aggregation * Supports: sqs, lambda, bedrock, safety metric types * Flexible JSONB metadata field * BRIN index on timestamp for efficient queries - DB-005: aws_pricing table * Stores AWS service pricing by region * Supports price history with effective_from/to dates * Active pricing flag for current rates * Index on service, region, tier combination - DB-006: reports table * Generated report tracking * Supports PDF and CSV formats * File path and size tracking * Metadata JSONB for extensibility All tables include: - UUID primary keys with auto-generation - Foreign key constraints with CASCADE delete - Appropriate indexes for query performance - Check constraints for data validation Tasks: DB-003, DB-004, DB-005, DB-006 complete --- .../48f2231e7c12_create_aws_pricing_table.py | 78 ++++++++++++++++ ...47ed57b77_create_scenario_metrics_table.py | 81 +++++++++++++++++ ...e46de4b0264a_create_scenario_logs_table.py | 91 +++++++++++++++++++ .../e80c6eef58b2_create_reports_table.py | 73 +++++++++++++++ 4 files changed, 323 insertions(+) create mode 100644 alembic/versions/48f2231e7c12_create_aws_pricing_table.py create mode 100644 alembic/versions/5e247ed57b77_create_scenario_metrics_table.py create mode 100644 alembic/versions/e46de4b0264a_create_scenario_logs_table.py create mode 100644 alembic/versions/e80c6eef58b2_create_reports_table.py diff --git a/alembic/versions/48f2231e7c12_create_aws_pricing_table.py b/alembic/versions/48f2231e7c12_create_aws_pricing_table.py new file mode 100644 index 0000000..9fb488c --- /dev/null +++ b/alembic/versions/48f2231e7c12_create_aws_pricing_table.py @@ -0,0 +1,78 @@ +"""create aws_pricing table + +Revision ID: 48f2231e7c12 +Revises: 5e247ed57b77 +Create Date: 2026-04-07 13:50:15.040833 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision: str = "48f2231e7c12" +down_revision: Union[str, Sequence[str], None] = "5e247ed57b77" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.create_table( + "aws_pricing", + sa.Column( + "id", + postgresql.UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("uuid_generate_v4()"), + ), + sa.Column( + "service", sa.String(50), nullable=False + ), # 'sqs', 'lambda', 'bedrock' + sa.Column("region", sa.String(50), nullable=False), + sa.Column("tier", sa.String(50), server_default="standard", nullable=False), + sa.Column("price_per_unit", sa.DECIMAL(15, 10), nullable=False), + sa.Column( + "unit", sa.String(20), nullable=False + ), # 'per_million_requests', 'per_gb_second', 'per_1k_tokens' + sa.Column( + "effective_from", + sa.Date(), + server_default=sa.text("CURRENT_DATE"), + nullable=False, + ), + sa.Column("effective_to", sa.Date(), nullable=True), + sa.Column("is_active", sa.Boolean(), server_default="true", nullable=False), + sa.Column("source_url", sa.String(500), nullable=True), + sa.Column("description", sa.Text(), nullable=True), + ) + + # Add constraints + op.create_check_constraint( + "chk_price_positive", "aws_pricing", sa.column("price_per_unit") >= 0 + ) + + # Add indexes + op.create_index("idx_pricing_service", "aws_pricing", ["service"]) + op.create_index("idx_pricing_region", "aws_pricing", ["region"]) + op.create_index( + "idx_pricing_active", + "aws_pricing", + ["service", "region", "tier"], + postgresql_where=sa.text("is_active = true"), + ) + + +def downgrade() -> None: + """Downgrade schema.""" + # Drop indexes + op.drop_index("idx_pricing_active", table_name="aws_pricing") + op.drop_index("idx_pricing_region", table_name="aws_pricing") + op.drop_index("idx_pricing_service", table_name="aws_pricing") + + # Drop table + op.drop_table("aws_pricing") diff --git a/alembic/versions/5e247ed57b77_create_scenario_metrics_table.py b/alembic/versions/5e247ed57b77_create_scenario_metrics_table.py new file mode 100644 index 0000000..ae83fb2 --- /dev/null +++ b/alembic/versions/5e247ed57b77_create_scenario_metrics_table.py @@ -0,0 +1,81 @@ +"""create scenario_metrics table + +Revision ID: 5e247ed57b77 +Revises: e46de4b0264a +Create Date: 2026-04-07 13:49:11.267167 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision: str = "5e247ed57b77" +down_revision: Union[str, Sequence[str], None] = "e46de4b0264a" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.create_table( + "scenario_metrics", + sa.Column( + "id", + postgresql.UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("uuid_generate_v4()"), + ), + sa.Column( + "scenario_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("scenarios.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "timestamp", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("NOW()"), + nullable=False, + ), + sa.Column( + "metric_type", sa.String(50), nullable=False + ), # 'sqs', 'lambda', 'bedrock', 'safety' + sa.Column("metric_name", sa.String(100), nullable=False), + sa.Column( + "value", sa.DECIMAL(15, 6), server_default="0.000000", nullable=False + ), + sa.Column( + "unit", sa.String(20), nullable=False + ), # 'count', 'bytes', 'tokens', 'usd', 'invocations' + sa.Column("metadata", postgresql.JSONB(), server_default="{}"), + ) + + # Add indexes + op.create_index("idx_metrics_scenario_id", "scenario_metrics", ["scenario_id"]) + op.create_index( + "idx_metrics_timestamp", + "scenario_metrics", + ["timestamp"], + postgresql_using="brin", + ) + op.create_index("idx_metrics_type", "scenario_metrics", ["metric_type"]) + op.create_index( + "idx_metrics_scenario_type", "scenario_metrics", ["scenario_id", "metric_type"] + ) + + +def downgrade() -> None: + """Downgrade schema.""" + # Drop indexes + op.drop_index("idx_metrics_scenario_type", table_name="scenario_metrics") + op.drop_index("idx_metrics_type", table_name="scenario_metrics") + op.drop_index("idx_metrics_timestamp", table_name="scenario_metrics") + op.drop_index("idx_metrics_scenario_id", table_name="scenario_metrics") + + # Drop table + op.drop_table("scenario_metrics") diff --git a/alembic/versions/e46de4b0264a_create_scenario_logs_table.py b/alembic/versions/e46de4b0264a_create_scenario_logs_table.py new file mode 100644 index 0000000..64ddfa5 --- /dev/null +++ b/alembic/versions/e46de4b0264a_create_scenario_logs_table.py @@ -0,0 +1,91 @@ +"""create scenario_logs table + +Revision ID: e46de4b0264a +Revises: 8c29fdcbbf85 +Create Date: 2026-04-07 13:48:26.383709 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision: str = "e46de4b0264a" +down_revision: Union[str, Sequence[str], None] = "8c29fdcbbf85" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.create_table( + "scenario_logs", + sa.Column( + "id", + postgresql.UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("uuid_generate_v4()"), + ), + sa.Column( + "scenario_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("scenarios.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "received_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("NOW()"), + nullable=False, + ), + sa.Column("message_hash", sa.String(64), nullable=False), # SHA256 + sa.Column("message_preview", sa.String(500), nullable=True), + sa.Column("source", sa.String(100), server_default="unknown", nullable=False), + sa.Column("size_bytes", sa.Integer(), server_default="0", nullable=False), + sa.Column("has_pii", sa.Boolean(), server_default="false", nullable=False), + sa.Column("token_count", sa.Integer(), server_default="0", nullable=False), + sa.Column("sqs_blocks", sa.Integer(), server_default="1", nullable=False), + ) + + # Add constraints + op.create_check_constraint( + "chk_size_positive", "scenario_logs", sa.column("size_bytes") >= 0 + ) + op.create_check_constraint( + "chk_token_positive", "scenario_logs", sa.column("token_count") >= 0 + ) + op.create_check_constraint( + "chk_blocks_positive", "scenario_logs", sa.column("sqs_blocks") >= 1 + ) + + # Add indexes + op.create_index("idx_logs_scenario_id", "scenario_logs", ["scenario_id"]) + op.create_index( + "idx_logs_received_at", + "scenario_logs", + ["received_at"], + postgresql_using="brin", + ) + op.create_index("idx_logs_message_hash", "scenario_logs", ["message_hash"]) + op.create_index( + "idx_logs_has_pii", + "scenario_logs", + ["has_pii"], + postgresql_where=sa.text("has_pii = true"), + ) + + +def downgrade() -> None: + """Downgrade schema.""" + # Drop indexes + op.drop_index("idx_logs_has_pii", table_name="scenario_logs") + op.drop_index("idx_logs_message_hash", table_name="scenario_logs") + op.drop_index("idx_logs_received_at", table_name="scenario_logs") + op.drop_index("idx_logs_scenario_id", table_name="scenario_logs") + + # Drop table + op.drop_table("scenario_logs") diff --git a/alembic/versions/e80c6eef58b2_create_reports_table.py b/alembic/versions/e80c6eef58b2_create_reports_table.py new file mode 100644 index 0000000..c6c2251 --- /dev/null +++ b/alembic/versions/e80c6eef58b2_create_reports_table.py @@ -0,0 +1,73 @@ +"""create reports table + +Revision ID: e80c6eef58b2 +Revises: 48f2231e7c12 +Create Date: 2026-04-07 13:51:51.381906 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + + +# revision identifiers, used by Alembic. +revision: str = "e80c6eef58b2" +down_revision: Union[str, Sequence[str], None] = "48f2231e7c12" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.create_table( + "reports", + sa.Column( + "id", + postgresql.UUID(as_uuid=True), + primary_key=True, + server_default=sa.text("uuid_generate_v4()"), + ), + sa.Column( + "scenario_id", + postgresql.UUID(as_uuid=True), + sa.ForeignKey("scenarios.id", ondelete="CASCADE"), + nullable=False, + ), + sa.Column( + "format", sa.Enum("pdf", "csv", name="report_format"), nullable=False + ), + sa.Column("file_path", sa.String(500), nullable=False), + sa.Column("file_size_bytes", sa.Integer(), nullable=True), + sa.Column( + "generated_at", + sa.TIMESTAMP(timezone=True), + server_default=sa.text("NOW()"), + nullable=False, + ), + sa.Column( + "generated_by", sa.String(100), nullable=True + ), # user_id or api_key_id + sa.Column("metadata", postgresql.JSONB(), server_default="{}"), + ) + + # Add indexes + op.create_index("idx_reports_scenario_id", "reports", ["scenario_id"]) + op.create_index( + "idx_reports_generated_at", "reports", ["generated_at"], postgresql_using="brin" + ) + + +def downgrade() -> None: + """Downgrade schema.""" + # Drop indexes + op.drop_index("idx_reports_generated_at", table_name="reports") + op.drop_index("idx_reports_scenario_id", table_name="reports") + + # Drop table + op.drop_table("reports") + + # Drop enum type + op.execute("DROP TYPE IF EXISTS report_format;")