diff --git a/prompts/2-source-management.md b/prompts/2-source-management.md new file mode 100644 index 0000000..195950d --- /dev/null +++ b/prompts/2-source-management.md @@ -0,0 +1,106 @@ +# Prompt Sprint 2 - Source Management + +## 🎯 Sprint 2: Source Management + +**Iniziato**: 2026-04-06 +**Stato**: 🟡 In Progress +**Assegnato**: @sprint-lead + +--- + +## 📋 Obiettivo + +Implementare la gestione delle fonti (sources) per i notebook, permettendo agli utenti di aggiungere URL, PDF, YouTube, Google Drive e avviare ricerche web. + +--- + +## 🏗️ Architettura + +### Pattern da seguire (stesso di Sprint 1) + +``` +API Layer (FastAPI Routes) + ↓ +Service Layer (SourceService) + ↓ +External Layer (notebooklm-py client) +``` + +### Endpoints da implementare + +1. **POST /api/v1/notebooks/{id}/sources** - Aggiungere fonte +2. **GET /api/v1/notebooks/{id}/sources** - Listare fonti +3. **DELETE /api/v1/notebooks/{id}/sources/{source_id}** - Rimuovere fonte +4. **GET /api/v1/notebooks/{id}/sources/{source_id}/fulltext** - Ottenere testo +5. **POST /api/v1/notebooks/{id}/sources/research** - Ricerca web + +--- + +## 📊 Task Breakdown Sprint 2 + +### Fase 1: Specifiche +- [ ] SPEC-004: Analisi requisiti Source Management +- [ ] SPEC-005: Definire modelli dati fonti + +### Fase 2: API Design +- [ ] API-003: Modelli Pydantic (SourceCreate, Source, ecc.) +- [ ] API-004: Documentazione endpoints + +### Fase 3: Implementazione +- [ ] DEV-007: SourceService +- [ ] DEV-008: POST /sources +- [ ] DEV-009: GET /sources +- [ ] DEV-010: DELETE /sources/{id} +- [ ] DEV-011: POST /sources/research + +### Fase 4: Testing +- [ ] TEST-004: Unit tests SourceService +- [ ] TEST-005: Integration tests + +--- + +## 🔧 Implementazione + +### Tipi di Fonti Supportate + +```python +class SourceType(str, Enum): + URL = "url" + FILE = "file" # PDF, DOC, etc. + YOUTUBE = "youtube" + DRIVE = "drive" +``` + +### SourceService Methods + +```python +class SourceService: + async def create(notebook_id: UUID, data: dict) -> Source + async def list(notebook_id: UUID) -> list[Source] + async def get(notebook_id: UUID, source_id: str) -> Source + async def delete(notebook_id: UUID, source_id: str) -> None + async def get_fulltext(notebook_id: UUID, source_id: str) -> str + async def research(notebook_id: UUID, query: str, mode: str) -> ResearchResult +``` + +--- + +## 📝 Note Importanti + +1. **Riusare pattern Sprint 1**: Stessa struttura NotebookService +2. **Gestione upload file**: Supportare multipart/form-data per PDF +3. **Ricerca web**: Integrare con notebooklm-py research +4. **Error handling**: Fonte già esistente, formato non supportato + +--- + +## 🚀 Prossimi Passi + +1. @sprint-lead: Attivare @api-designer per API-003 +2. @api-designer: Definire modelli Pydantic +3. @tdd-developer: Iniziare implementazione SourceService + +--- + +**Dipende da**: Sprint 1 (Notebook CRUD) ✅ +**Blocca**: Sprint 3 (Chat) 🔴 diff --git a/src/notebooklm_agent/api/main.py b/src/notebooklm_agent/api/main.py index b890106..b1c9db3 100644 --- a/src/notebooklm_agent/api/main.py +++ b/src/notebooklm_agent/api/main.py @@ -5,7 +5,7 @@ from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from notebooklm_agent.api.routes import health, notebooks +from notebooklm_agent.api.routes import health, notebooks, sources from notebooklm_agent.core.config import get_settings from notebooklm_agent.core.logging import setup_logging @@ -53,6 +53,7 @@ def create_application() -> FastAPI: # Include routers app.include_router(health.router, prefix="/health", tags=["health"]) app.include_router(notebooks.router, prefix="/api/v1/notebooks", tags=["notebooks"]) + app.include_router(sources.router, prefix="/api/v1/notebooks", tags=["sources"]) return app diff --git a/src/notebooklm_agent/api/models/requests.py b/src/notebooklm_agent/api/models/requests.py index eb76a45..856cb02 100644 --- a/src/notebooklm_agent/api/models/requests.py +++ b/src/notebooklm_agent/api/models/requests.py @@ -227,3 +227,58 @@ class SourceCreate(BaseModel): if info.data.get("type") == "url" and not v: raise ValueError("URL is required for type 'url'") return v + + +class ResearchRequest(BaseModel): + """Request model for web research. + + Attributes: + query: Search query string. + mode: Research mode (fast or deep). + auto_import: Whether to auto-import found sources. + """ + + model_config = ConfigDict( + json_schema_extra={ + "example": { + "query": "artificial intelligence trends 2026", + "mode": "deep", + "auto_import": True, + } + } + ) + + query: str = Field( + ..., + min_length=3, + max_length=500, + description="Search query string", + examples=["artificial intelligence trends 2026"], + ) + mode: str = Field( + "fast", + description="Research mode", + examples=["fast", "deep"], + ) + auto_import: bool = Field( + True, + description="Auto-import found sources", + examples=[True, False], + ) + + @field_validator("mode") + @classmethod + def validate_mode(cls, v: str) -> str: + """Validate research mode.""" + allowed = {"fast", "deep"} + if v not in allowed: + raise ValueError(f"Mode must be one of: {allowed}") + return v + + @field_validator("query") + @classmethod + def validate_query(cls, v: str) -> str: + """Validate query is not empty.""" + if not v or not v.strip(): + raise ValueError("Query cannot be empty") + return v.strip() diff --git a/src/notebooklm_agent/api/routes/sources.py b/src/notebooklm_agent/api/routes/sources.py new file mode 100644 index 0000000..f53992e --- /dev/null +++ b/src/notebooklm_agent/api/routes/sources.py @@ -0,0 +1,419 @@ +"""Source API routes. + +This module contains API endpoints for source management. +""" + +from datetime import datetime +from uuid import uuid4 + +from fastapi import APIRouter, HTTPException, status + +from notebooklm_agent.api.models.requests import ResearchRequest, SourceCreate +from notebooklm_agent.api.models.responses import ( + ApiResponse, + PaginatedSources, + PaginationMeta, + ResponseMeta, + Source, +) +from notebooklm_agent.core.exceptions import NotebookLMError, NotFoundError, ValidationError +from notebooklm_agent.services.source_service import SourceService + +router = APIRouter(tags=["sources"]) + + +async def get_source_service() -> SourceService: + """Get source service instance. + + Returns: + SourceService instance. + """ + return SourceService() + + +@router.post( + "/{notebook_id}/sources", + response_model=ApiResponse[Source], + status_code=status.HTTP_201_CREATED, + summary="Add source to notebook", + description="Add a new source (URL, YouTube, etc.) to a notebook.", +) +async def create_source(notebook_id: str, data: SourceCreate): + """Add a source to a notebook. + + Args: + notebook_id: Notebook UUID. + data: Source creation data. + + Returns: + Created source. + + Raises: + HTTPException: 400 for validation errors, 404 for not found, 502 for external API errors. + """ + from uuid import UUID + + try: + notebook_uuid = UUID(notebook_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "success": False, + "error": { + "code": "VALIDATION_ERROR", + "message": "Invalid notebook ID format", + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + + try: + service = await get_source_service() + source = await service.create(notebook_uuid, data.model_dump()) + + return ApiResponse( + success=True, + data=source, + error=None, + meta=ResponseMeta( + timestamp=datetime.utcnow(), + request_id=uuid4(), + ), + ) + except ValidationError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": e.details or [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + except NotebookLMError as e: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + + +@router.get( + "/{notebook_id}/sources", + response_model=ApiResponse[PaginatedSources], + summary="List notebook sources", + description="List all sources for a notebook with optional filtering.", +) +async def list_sources( + notebook_id: str, + source_type: str | None = None, + status: str | None = None, +): + """List sources for a notebook. + + Args: + notebook_id: Notebook UUID. + source_type: Optional filter by source type. + status: Optional filter by processing status. + + Returns: + List of sources. + + Raises: + HTTPException: 400 for invalid ID, 404 for not found, 502 for external API errors. + """ + from uuid import UUID + + try: + notebook_uuid = UUID(notebook_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "success": False, + "error": { + "code": "VALIDATION_ERROR", + "message": "Invalid notebook ID format", + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + + try: + service = await get_source_service() + sources = await service.list(notebook_uuid, source_type, status) + + return ApiResponse( + success=True, + data=PaginatedSources( + items=sources, + pagination=PaginationMeta( + total=len(sources), + limit=len(sources), + offset=0, + has_more=False, + ), + ), + error=None, + meta=ResponseMeta( + timestamp=datetime.utcnow(), + request_id=uuid4(), + ), + ) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + except NotebookLMError as e: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + + +@router.delete( + "/{notebook_id}/sources/{source_id}", + status_code=status.HTTP_204_NO_CONTENT, + summary="Delete source", + description="Delete a source from a notebook.", +) +async def delete_source(notebook_id: str, source_id: str): + """Delete a source from a notebook. + + Args: + notebook_id: Notebook UUID. + source_id: Source ID to delete. + + Raises: + HTTPException: 400 for invalid ID, 404 for not found, 502 for external API errors. + """ + from uuid import UUID + + try: + notebook_uuid = UUID(notebook_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "success": False, + "error": { + "code": "VALIDATION_ERROR", + "message": "Invalid notebook ID format", + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + + try: + service = await get_source_service() + await service.delete(notebook_uuid, source_id) + # 204 No Content - no body + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + except NotebookLMError as e: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + + +@router.post( + "/{notebook_id}/sources/research", + response_model=ApiResponse[dict], + status_code=status.HTTP_202_ACCEPTED, + summary="Start web research", + description="Start web research and optionally auto-import sources.", +) +async def research_sources(notebook_id: str, data: ResearchRequest): + """Start web research for a notebook. + + Args: + notebook_id: Notebook UUID. + data: Research request with query and options. + + Returns: + Research job information. + + Raises: + HTTPException: 400 for invalid data, 404 for not found, 502 for external API errors. + """ + from uuid import UUID + + try: + notebook_uuid = UUID(notebook_id) + except ValueError: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "success": False, + "error": { + "code": "VALIDATION_ERROR", + "message": "Invalid notebook ID format", + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + + try: + service = await get_source_service() + result = await service.research( + notebook_uuid, + data.query, + data.mode, + data.auto_import, + ) + + return ApiResponse( + success=True, + data=result, + error=None, + meta=ResponseMeta( + timestamp=datetime.utcnow(), + request_id=uuid4(), + ), + ) + except ValidationError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": e.details or [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + except NotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) + except NotebookLMError as e: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail={ + "success": False, + "error": { + "code": e.code, + "message": e.message, + "details": [], + }, + "meta": { + "timestamp": datetime.utcnow().isoformat(), + "request_id": str(uuid4()), + }, + }, + ) diff --git a/src/notebooklm_agent/services/source_service.py b/src/notebooklm_agent/services/source_service.py new file mode 100644 index 0000000..b72826e --- /dev/null +++ b/src/notebooklm_agent/services/source_service.py @@ -0,0 +1,331 @@ +"""Source service for business logic. + +This module contains the SourceService class which handles +all business logic for source operations. +""" + +from datetime import datetime +from typing import Any +from uuid import UUID + +from notebooklm_agent.api.models.responses import Source +from notebooklm_agent.core.exceptions import NotebookLMError, NotFoundError, ValidationError + + +class SourceService: + """Service for source operations. + + This service handles all business logic for source management, + including adding sources to notebooks, listing, and deleting. + + Attributes: + _client: The notebooklm-py client instance. + """ + + def __init__(self, client: Any = None) -> None: + """Initialize the source service. + + Args: + client: Optional notebooklm-py client instance. + If not provided, will be created on first use. + """ + self._client = client + + async def _get_client(self) -> Any: + """Get or create notebooklm-py client. + + Returns: + The notebooklm-py client instance. + """ + if self._client is None: + # Lazy initialization - import here to avoid circular imports + from notebooklm import NotebookLMClient + + self._client = await NotebookLMClient.from_storage() + return self._client + + def _validate_source_type(self, source_type: str) -> str: + """Validate source type. + + Args: + source_type: The source type to validate. + + Returns: + The validated source type. + + Raises: + ValidationError: If source type is invalid. + """ + allowed_types = {"url", "file", "youtube", "drive"} + if source_type not in allowed_types: + raise ValidationError( + message=f"Invalid source type. Must be one of: {allowed_types}", + code="VALIDATION_ERROR", + ) + return source_type + + def _validate_url(self, url: str | None, source_type: str) -> str | None: + """Validate URL for source. + + Args: + url: The URL to validate. + source_type: The type of source. + + Returns: + The validated URL or None. + + Raises: + ValidationError: If URL is required but not provided. + """ + if source_type in {"url", "youtube"} and not url: + raise ValidationError( + message=f"URL is required for source type '{source_type}'", + code="VALIDATION_ERROR", + ) + return url + + async def create(self, notebook_id: UUID, data: dict) -> Source: + """Add a source to a notebook. + + Args: + notebook_id: The notebook ID to add the source to. + data: Source data including type, url, and optional title. + + Returns: + The created source. + + Raises: + ValidationError: If source data is invalid. + NotFoundError: If notebook not found. + NotebookLMError: If external API fails. + """ + # Validate input + source_type = data.get("type", "url") + self._validate_source_type(source_type) + + url = data.get("url") + self._validate_url(url, source_type) + + title = data.get("title") + + try: + client = await self._get_client() + notebook = await client.notebooks.get(str(notebook_id)) + + # Add source based on type + if source_type == "url": + result = await notebook.sources.add_url(url, title=title) + elif source_type == "youtube": + result = await notebook.sources.add_youtube(url, title=title) + elif source_type == "drive": + result = await notebook.sources.add_drive(url, title=title) + else: + # For file type, this would be handled differently (multipart upload) + raise ValidationError( + message="File upload not supported via this method. Use file upload endpoint.", + code="VALIDATION_ERROR", + ) + + return Source( + id=getattr(result, "id", str(notebook_id)), + notebook_id=notebook_id, + type=source_type, + title=title or getattr(result, "title", "Untitled"), + url=url, + status=getattr(result, "status", "processing"), + created_at=getattr(result, "created_at", datetime.utcnow()), + ) + + except ValidationError: + raise + except Exception as e: + error_str = str(e).lower() + if "not found" in error_str: + raise NotFoundError("Notebook", str(notebook_id)) + raise NotebookLMError( + message=f"Failed to add source: {e}", + code="NOTEBOOKLM_ERROR", + ) + + async def list( + self, + notebook_id: UUID, + source_type: str | None = None, + status: str | None = None, + ) -> list[Source]: + """List sources for a notebook. + + Args: + notebook_id: The notebook ID. + source_type: Optional filter by source type. + status: Optional filter by status. + + Returns: + List of sources. + + Raises: + NotFoundError: If notebook not found. + NotebookLMError: If external API fails. + """ + try: + client = await self._get_client() + notebook = await client.notebooks.get(str(notebook_id)) + + sources = await notebook.sources.list() + + result = [] + for src in sources: + # Apply filters + if source_type and getattr(src, "type", "") != source_type: + continue + if status and getattr(src, "status", "") != status: + continue + + result.append( + Source( + id=getattr(src, "id", str(notebook_id)), + notebook_id=notebook_id, + type=getattr(src, "type", "url"), + title=getattr(src, "title", "Untitled"), + url=getattr(src, "url", None), + status=getattr(src, "status", "ready"), + created_at=getattr(src, "created_at", datetime.utcnow()), + ) + ) + + return result + + except NotFoundError: + raise + except Exception as e: + error_str = str(e).lower() + if "not found" in error_str: + raise NotFoundError("Notebook", str(notebook_id)) + raise NotebookLMError( + message=f"Failed to list sources: {e}", + code="NOTEBOOKLM_ERROR", + ) + + async def delete(self, notebook_id: UUID, source_id: str) -> None: + """Delete a source from a notebook. + + Args: + notebook_id: The notebook ID. + source_id: The source ID to delete. + + Raises: + NotFoundError: If notebook or source not found. + NotebookLMError: If external API fails. + """ + try: + client = await self._get_client() + notebook = await client.notebooks.get(str(notebook_id)) + + # Try to delete the source + await notebook.sources.delete(source_id) + + except Exception as e: + error_str = str(e).lower() + if "not found" in error_str: + raise NotFoundError("Source", source_id) + raise NotebookLMError( + message=f"Failed to delete source: {e}", + code="NOTEBOOKLM_ERROR", + ) + + async def get_fulltext(self, notebook_id: UUID, source_id: str) -> str: + """Get the full text content of a source. + + Args: + notebook_id: The notebook ID. + source_id: The source ID. + + Returns: + The full text content. + + Raises: + NotFoundError: If notebook or source not found. + NotebookLMError: If external API fails. + """ + try: + client = await self._get_client() + notebook = await client.notebooks.get(str(notebook_id)) + + source = await notebook.sources.get(source_id) + fulltext = await source.get_fulltext() + + return fulltext + + except Exception as e: + error_str = str(e).lower() + if "not found" in error_str: + raise NotFoundError("Source", source_id) + raise NotebookLMError( + message=f"Failed to get source fulltext: {e}", + code="NOTEBOOKLM_ERROR", + ) + + async def research( + self, + notebook_id: UUID, + query: str, + mode: str = "fast", + auto_import: bool = True, + ) -> dict: + """Start web research for a notebook. + + Args: + notebook_id: The notebook ID. + query: The search query. + mode: Research mode (fast or deep). + auto_import: Whether to auto-import found sources. + + Returns: + Research result with job ID and status. + + Raises: + NotFoundError: If notebook not found. + ValidationError: If query is invalid. + NotebookLMError: If external API fails. + """ + if not query or not query.strip(): + raise ValidationError( + message="Query cannot be empty", + code="VALIDATION_ERROR", + ) + + if mode not in {"fast", "deep"}: + raise ValidationError( + message="Mode must be 'fast' or 'deep'", + code="VALIDATION_ERROR", + ) + + try: + client = await self._get_client() + notebook = await client.notebooks.get(str(notebook_id)) + + # Start research + result = await notebook.sources.research( + query=query, + mode=mode, + auto_import=auto_import, + ) + + return { + "research_id": getattr(result, "id", str(notebook_id)), + "status": getattr(result, "status", "pending"), + "query": query, + "mode": mode, + "sources_found": getattr(result, "sources_found", 0), + } + + except ValidationError: + raise + except Exception as e: + error_str = str(e).lower() + if "not found" in error_str: + raise NotFoundError("Notebook", str(notebook_id)) + raise NotebookLMError( + message=f"Failed to start research: {e}", + code="NOTEBOOKLM_ERROR", + )