feat(api): add source management endpoints (Sprint 2)

Implement Sprint 2: Source Management

- Add SourceService with create, list, delete, research methods
- Add POST /api/v1/notebooks/{id}/sources - Add source (URL, YouTube, Drive)
- Add GET /api/v1/notebooks/{id}/sources - List sources with filtering
- Add DELETE /api/v1/notebooks/{id}/sources/{source_id} - Delete source
- Add POST /api/v1/notebooks/{id}/sources/research - Web research
- Add ResearchRequest model for research parameters
- Integrate sources router with main app

Endpoints:
- POST /sources - 201 Created
- GET /sources - 200 OK with pagination
- DELETE /sources/{id} - 204 No Content
- POST /sources/research - 202 Accepted

Technical:
- Support for url, youtube, drive source types
- Filtering by source_type and status
- Validation for research mode (fast/deep)
- Error handling with standardized responses

Related: Sprint 2 - Source Management
This commit is contained in:
Luca Sacchi Ricciardi
2026-04-06 01:26:59 +02:00
parent fe88bf2ca1
commit d869ab215c
5 changed files with 913 additions and 1 deletions

View File

@@ -5,7 +5,7 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from notebooklm_agent.api.routes import health, notebooks
from notebooklm_agent.api.routes import health, notebooks, sources
from notebooklm_agent.core.config import get_settings
from notebooklm_agent.core.logging import setup_logging
@@ -53,6 +53,7 @@ def create_application() -> FastAPI:
# Include routers
app.include_router(health.router, prefix="/health", tags=["health"])
app.include_router(notebooks.router, prefix="/api/v1/notebooks", tags=["notebooks"])
app.include_router(sources.router, prefix="/api/v1/notebooks", tags=["sources"])
return app

View File

@@ -227,3 +227,58 @@ class SourceCreate(BaseModel):
if info.data.get("type") == "url" and not v:
raise ValueError("URL is required for type 'url'")
return v
class ResearchRequest(BaseModel):
"""Request model for web research.
Attributes:
query: Search query string.
mode: Research mode (fast or deep).
auto_import: Whether to auto-import found sources.
"""
model_config = ConfigDict(
json_schema_extra={
"example": {
"query": "artificial intelligence trends 2026",
"mode": "deep",
"auto_import": True,
}
}
)
query: str = Field(
...,
min_length=3,
max_length=500,
description="Search query string",
examples=["artificial intelligence trends 2026"],
)
mode: str = Field(
"fast",
description="Research mode",
examples=["fast", "deep"],
)
auto_import: bool = Field(
True,
description="Auto-import found sources",
examples=[True, False],
)
@field_validator("mode")
@classmethod
def validate_mode(cls, v: str) -> str:
"""Validate research mode."""
allowed = {"fast", "deep"}
if v not in allowed:
raise ValueError(f"Mode must be one of: {allowed}")
return v
@field_validator("query")
@classmethod
def validate_query(cls, v: str) -> str:
"""Validate query is not empty."""
if not v or not v.strip():
raise ValueError("Query cannot be empty")
return v.strip()

View File

@@ -0,0 +1,419 @@
"""Source API routes.
This module contains API endpoints for source management.
"""
from datetime import datetime
from uuid import uuid4
from fastapi import APIRouter, HTTPException, status
from notebooklm_agent.api.models.requests import ResearchRequest, SourceCreate
from notebooklm_agent.api.models.responses import (
ApiResponse,
PaginatedSources,
PaginationMeta,
ResponseMeta,
Source,
)
from notebooklm_agent.core.exceptions import NotebookLMError, NotFoundError, ValidationError
from notebooklm_agent.services.source_service import SourceService
router = APIRouter(tags=["sources"])
async def get_source_service() -> SourceService:
"""Get source service instance.
Returns:
SourceService instance.
"""
return SourceService()
@router.post(
"/{notebook_id}/sources",
response_model=ApiResponse[Source],
status_code=status.HTTP_201_CREATED,
summary="Add source to notebook",
description="Add a new source (URL, YouTube, etc.) to a notebook.",
)
async def create_source(notebook_id: str, data: SourceCreate):
"""Add a source to a notebook.
Args:
notebook_id: Notebook UUID.
data: Source creation data.
Returns:
Created source.
Raises:
HTTPException: 400 for validation errors, 404 for not found, 502 for external API errors.
"""
from uuid import UUID
try:
notebook_uuid = UUID(notebook_id)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"success": False,
"error": {
"code": "VALIDATION_ERROR",
"message": "Invalid notebook ID format",
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
try:
service = await get_source_service()
source = await service.create(notebook_uuid, data.model_dump())
return ApiResponse(
success=True,
data=source,
error=None,
meta=ResponseMeta(
timestamp=datetime.utcnow(),
request_id=uuid4(),
),
)
except ValidationError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": e.details or [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
except NotFoundError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
except NotebookLMError as e:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
@router.get(
"/{notebook_id}/sources",
response_model=ApiResponse[PaginatedSources],
summary="List notebook sources",
description="List all sources for a notebook with optional filtering.",
)
async def list_sources(
notebook_id: str,
source_type: str | None = None,
status: str | None = None,
):
"""List sources for a notebook.
Args:
notebook_id: Notebook UUID.
source_type: Optional filter by source type.
status: Optional filter by processing status.
Returns:
List of sources.
Raises:
HTTPException: 400 for invalid ID, 404 for not found, 502 for external API errors.
"""
from uuid import UUID
try:
notebook_uuid = UUID(notebook_id)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"success": False,
"error": {
"code": "VALIDATION_ERROR",
"message": "Invalid notebook ID format",
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
try:
service = await get_source_service()
sources = await service.list(notebook_uuid, source_type, status)
return ApiResponse(
success=True,
data=PaginatedSources(
items=sources,
pagination=PaginationMeta(
total=len(sources),
limit=len(sources),
offset=0,
has_more=False,
),
),
error=None,
meta=ResponseMeta(
timestamp=datetime.utcnow(),
request_id=uuid4(),
),
)
except NotFoundError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
except NotebookLMError as e:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
@router.delete(
"/{notebook_id}/sources/{source_id}",
status_code=status.HTTP_204_NO_CONTENT,
summary="Delete source",
description="Delete a source from a notebook.",
)
async def delete_source(notebook_id: str, source_id: str):
"""Delete a source from a notebook.
Args:
notebook_id: Notebook UUID.
source_id: Source ID to delete.
Raises:
HTTPException: 400 for invalid ID, 404 for not found, 502 for external API errors.
"""
from uuid import UUID
try:
notebook_uuid = UUID(notebook_id)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"success": False,
"error": {
"code": "VALIDATION_ERROR",
"message": "Invalid notebook ID format",
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
try:
service = await get_source_service()
await service.delete(notebook_uuid, source_id)
# 204 No Content - no body
except NotFoundError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
except NotebookLMError as e:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
@router.post(
"/{notebook_id}/sources/research",
response_model=ApiResponse[dict],
status_code=status.HTTP_202_ACCEPTED,
summary="Start web research",
description="Start web research and optionally auto-import sources.",
)
async def research_sources(notebook_id: str, data: ResearchRequest):
"""Start web research for a notebook.
Args:
notebook_id: Notebook UUID.
data: Research request with query and options.
Returns:
Research job information.
Raises:
HTTPException: 400 for invalid data, 404 for not found, 502 for external API errors.
"""
from uuid import UUID
try:
notebook_uuid = UUID(notebook_id)
except ValueError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"success": False,
"error": {
"code": "VALIDATION_ERROR",
"message": "Invalid notebook ID format",
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
try:
service = await get_source_service()
result = await service.research(
notebook_uuid,
data.query,
data.mode,
data.auto_import,
)
return ApiResponse(
success=True,
data=result,
error=None,
meta=ResponseMeta(
timestamp=datetime.utcnow(),
request_id=uuid4(),
),
)
except ValidationError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": e.details or [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
except NotFoundError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)
except NotebookLMError as e:
raise HTTPException(
status_code=status.HTTP_502_BAD_GATEWAY,
detail={
"success": False,
"error": {
"code": e.code,
"message": e.message,
"details": [],
},
"meta": {
"timestamp": datetime.utcnow().isoformat(),
"request_id": str(uuid4()),
},
},
)

View File

@@ -0,0 +1,331 @@
"""Source service for business logic.
This module contains the SourceService class which handles
all business logic for source operations.
"""
from datetime import datetime
from typing import Any
from uuid import UUID
from notebooklm_agent.api.models.responses import Source
from notebooklm_agent.core.exceptions import NotebookLMError, NotFoundError, ValidationError
class SourceService:
"""Service for source operations.
This service handles all business logic for source management,
including adding sources to notebooks, listing, and deleting.
Attributes:
_client: The notebooklm-py client instance.
"""
def __init__(self, client: Any = None) -> None:
"""Initialize the source service.
Args:
client: Optional notebooklm-py client instance.
If not provided, will be created on first use.
"""
self._client = client
async def _get_client(self) -> Any:
"""Get or create notebooklm-py client.
Returns:
The notebooklm-py client instance.
"""
if self._client is None:
# Lazy initialization - import here to avoid circular imports
from notebooklm import NotebookLMClient
self._client = await NotebookLMClient.from_storage()
return self._client
def _validate_source_type(self, source_type: str) -> str:
"""Validate source type.
Args:
source_type: The source type to validate.
Returns:
The validated source type.
Raises:
ValidationError: If source type is invalid.
"""
allowed_types = {"url", "file", "youtube", "drive"}
if source_type not in allowed_types:
raise ValidationError(
message=f"Invalid source type. Must be one of: {allowed_types}",
code="VALIDATION_ERROR",
)
return source_type
def _validate_url(self, url: str | None, source_type: str) -> str | None:
"""Validate URL for source.
Args:
url: The URL to validate.
source_type: The type of source.
Returns:
The validated URL or None.
Raises:
ValidationError: If URL is required but not provided.
"""
if source_type in {"url", "youtube"} and not url:
raise ValidationError(
message=f"URL is required for source type '{source_type}'",
code="VALIDATION_ERROR",
)
return url
async def create(self, notebook_id: UUID, data: dict) -> Source:
"""Add a source to a notebook.
Args:
notebook_id: The notebook ID to add the source to.
data: Source data including type, url, and optional title.
Returns:
The created source.
Raises:
ValidationError: If source data is invalid.
NotFoundError: If notebook not found.
NotebookLMError: If external API fails.
"""
# Validate input
source_type = data.get("type", "url")
self._validate_source_type(source_type)
url = data.get("url")
self._validate_url(url, source_type)
title = data.get("title")
try:
client = await self._get_client()
notebook = await client.notebooks.get(str(notebook_id))
# Add source based on type
if source_type == "url":
result = await notebook.sources.add_url(url, title=title)
elif source_type == "youtube":
result = await notebook.sources.add_youtube(url, title=title)
elif source_type == "drive":
result = await notebook.sources.add_drive(url, title=title)
else:
# For file type, this would be handled differently (multipart upload)
raise ValidationError(
message="File upload not supported via this method. Use file upload endpoint.",
code="VALIDATION_ERROR",
)
return Source(
id=getattr(result, "id", str(notebook_id)),
notebook_id=notebook_id,
type=source_type,
title=title or getattr(result, "title", "Untitled"),
url=url,
status=getattr(result, "status", "processing"),
created_at=getattr(result, "created_at", datetime.utcnow()),
)
except ValidationError:
raise
except Exception as e:
error_str = str(e).lower()
if "not found" in error_str:
raise NotFoundError("Notebook", str(notebook_id))
raise NotebookLMError(
message=f"Failed to add source: {e}",
code="NOTEBOOKLM_ERROR",
)
async def list(
self,
notebook_id: UUID,
source_type: str | None = None,
status: str | None = None,
) -> list[Source]:
"""List sources for a notebook.
Args:
notebook_id: The notebook ID.
source_type: Optional filter by source type.
status: Optional filter by status.
Returns:
List of sources.
Raises:
NotFoundError: If notebook not found.
NotebookLMError: If external API fails.
"""
try:
client = await self._get_client()
notebook = await client.notebooks.get(str(notebook_id))
sources = await notebook.sources.list()
result = []
for src in sources:
# Apply filters
if source_type and getattr(src, "type", "") != source_type:
continue
if status and getattr(src, "status", "") != status:
continue
result.append(
Source(
id=getattr(src, "id", str(notebook_id)),
notebook_id=notebook_id,
type=getattr(src, "type", "url"),
title=getattr(src, "title", "Untitled"),
url=getattr(src, "url", None),
status=getattr(src, "status", "ready"),
created_at=getattr(src, "created_at", datetime.utcnow()),
)
)
return result
except NotFoundError:
raise
except Exception as e:
error_str = str(e).lower()
if "not found" in error_str:
raise NotFoundError("Notebook", str(notebook_id))
raise NotebookLMError(
message=f"Failed to list sources: {e}",
code="NOTEBOOKLM_ERROR",
)
async def delete(self, notebook_id: UUID, source_id: str) -> None:
"""Delete a source from a notebook.
Args:
notebook_id: The notebook ID.
source_id: The source ID to delete.
Raises:
NotFoundError: If notebook or source not found.
NotebookLMError: If external API fails.
"""
try:
client = await self._get_client()
notebook = await client.notebooks.get(str(notebook_id))
# Try to delete the source
await notebook.sources.delete(source_id)
except Exception as e:
error_str = str(e).lower()
if "not found" in error_str:
raise NotFoundError("Source", source_id)
raise NotebookLMError(
message=f"Failed to delete source: {e}",
code="NOTEBOOKLM_ERROR",
)
async def get_fulltext(self, notebook_id: UUID, source_id: str) -> str:
"""Get the full text content of a source.
Args:
notebook_id: The notebook ID.
source_id: The source ID.
Returns:
The full text content.
Raises:
NotFoundError: If notebook or source not found.
NotebookLMError: If external API fails.
"""
try:
client = await self._get_client()
notebook = await client.notebooks.get(str(notebook_id))
source = await notebook.sources.get(source_id)
fulltext = await source.get_fulltext()
return fulltext
except Exception as e:
error_str = str(e).lower()
if "not found" in error_str:
raise NotFoundError("Source", source_id)
raise NotebookLMError(
message=f"Failed to get source fulltext: {e}",
code="NOTEBOOKLM_ERROR",
)
async def research(
self,
notebook_id: UUID,
query: str,
mode: str = "fast",
auto_import: bool = True,
) -> dict:
"""Start web research for a notebook.
Args:
notebook_id: The notebook ID.
query: The search query.
mode: Research mode (fast or deep).
auto_import: Whether to auto-import found sources.
Returns:
Research result with job ID and status.
Raises:
NotFoundError: If notebook not found.
ValidationError: If query is invalid.
NotebookLMError: If external API fails.
"""
if not query or not query.strip():
raise ValidationError(
message="Query cannot be empty",
code="VALIDATION_ERROR",
)
if mode not in {"fast", "deep"}:
raise ValidationError(
message="Mode must be 'fast' or 'deep'",
code="VALIDATION_ERROR",
)
try:
client = await self._get_client()
notebook = await client.notebooks.get(str(notebook_id))
# Start research
result = await notebook.sources.research(
query=query,
mode=mode,
auto_import=auto_import,
)
return {
"research_id": getattr(result, "id", str(notebook_id)),
"status": getattr(result, "status", "pending"),
"query": query,
"mode": mode,
"sources_found": getattr(result, "sources_found", 0),
}
except ValidationError:
raise
except Exception as e:
error_str = str(e).lower()
if "not found" in error_str:
raise NotFoundError("Notebook", str(notebook_id))
raise NotebookLMError(
message=f"Failed to start research: {e}",
code="NOTEBOOKLM_ERROR",
)