diff --git a/tests/unit/test_agentic_rag/__init__.py b/tests/unit/test_agentic_rag/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_agentic_rag/conftest.py b/tests/unit/test_agentic_rag/conftest.py new file mode 100644 index 0000000..2e7b470 --- /dev/null +++ b/tests/unit/test_agentic_rag/conftest.py @@ -0,0 +1,270 @@ +"""Shared fixtures for AgenticRAG unit tests.""" + +import pytest +from unittest.mock import Mock, AsyncMock, MagicMock, patch +import sys +from pathlib import Path + +# Add src to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent / "src")) + + +@pytest.fixture +def mock_settings(): + """Create a mock Settings object.""" + settings = Mock() + settings.app_name = "AgenticRAG" + settings.app_version = "2.0.0" + settings.debug = True + settings.cors_origins = ["http://localhost:3000"] + settings.jwt_secret = "test-secret" + settings.jwt_algorithm = "HS256" + settings.access_token_expire_minutes = 30 + settings.admin_api_key = "test-admin-key" + settings.qdrant_host = "localhost" + settings.qdrant_port = 6333 + settings.max_file_size = 10 * 1024 * 1024 + settings.upload_dir = "./uploads" + settings.default_llm_provider = "openai" + settings.default_llm_model = "gpt-4o-mini" + settings.openai_api_key = "test-openai-key" + settings.zai_api_key = "test-zai-key" + settings.opencode_zen_api_key = "test-opencode-key" + settings.openrouter_api_key = "test-openrouter-key" + settings.anthropic_api_key = "test-anthropic-key" + settings.google_api_key = "test-google-key" + settings.mistral_api_key = "test-mistral-key" + settings.azure_api_key = "test-azure-key" + settings.azure_endpoint = "https://test.azure.com" + settings.azure_api_version = "2024-02-01" + settings.embedding_provider = "openai" + settings.embedding_model = "text-embedding-3-small" + settings.embedding_api_key = "test-embedding-key" + settings.redis_url = "redis://localhost:6379/0" + return settings + + +@pytest.fixture +def mock_qdrant_client(): + """Create a mock QdrantVectorstore client.""" + client = Mock() + client.create_collection = Mock(return_value=None) + client.search = Mock( + return_value=[ + {"id": "1", "text": "Test chunk 1", "score": 0.95}, + {"id": "2", "text": "Test chunk 2", "score": 0.85}, + ] + ) + client.get_collection = Mock(return_value={"name": "documents", "vectors_count": 100}) + return client + + +@pytest.fixture +def mock_embedder(): + """Create a mock embedder.""" + embedder = Mock() + embedder.embed = Mock(return_value=[0.1] * 1536) + embedder.aembed = AsyncMock(return_value=[0.1] * 1536) + return embedder + + +@pytest.fixture +def mock_llm_client(): + """Create a mock LLM client.""" + client = Mock() + client.invoke = AsyncMock( + return_value=Mock( + text="Test response", + model="gpt-4o-mini", + usage={"prompt_tokens": 100, "completion_tokens": 50}, + ) + ) + return client + + +@pytest.fixture +def mock_ingestion_pipeline(): + """Create a mock IngestionPipeline.""" + pipeline = Mock() + pipeline.run = Mock( + return_value=[ + {"id": "1", "text": "Chunk 1"}, + {"id": "2", "text": "Chunk 2"}, + ] + ) + return pipeline + + +@pytest.fixture +def mock_openai_embedder(): + """Create a mock OpenAIEmbedder.""" + embedder = Mock() + embedder.embed = Mock(return_value=[0.1] * 1536) + embedder.aembed = AsyncMock(return_value=[0.1] * 1536) + return embedder + + +@pytest.fixture +def mock_chunk_embedder(): + """Create a mock ChunkEmbedder.""" + embedder = Mock() + return embedder + + +@pytest.fixture +def mock_docling_parser(): + """Create a mock DoclingParser.""" + parser = Mock() + return parser + + +@pytest.fixture +def mock_node_splitter(): + """Create a mock NodeSplitter.""" + splitter = Mock() + return splitter + + +@pytest.fixture(autouse=True) +def reset_singletons(): + """Reset singleton instances before each test.""" + # Only reset modules that can be imported + try: + from agentic_rag.core import config + + config._settings = None + except ImportError: + pass + + try: + from agentic_rag.services import vector_store + + vector_store._vector_store = None + except ImportError: + pass + + try: + from agentic_rag.services import document_service + + document_service._document_service = None + except ImportError: + pass + + try: + from agentic_rag.services import rag_service + + rag_service._rag_service = None + except ImportError: + pass + + try: + from agentic_rag.core import llm_factory + + llm_factory._client_cache.clear() + except ImportError: + pass + + yield + + # Reset after test as well + try: + from agentic_rag.core import config + + config._settings = None + except ImportError: + pass + + try: + from agentic_rag.services import vector_store + + vector_store._vector_store = None + except ImportError: + pass + + try: + from agentic_rag.services import document_service + + document_service._document_service = None + except ImportError: + pass + + try: + from agentic_rag.services import rag_service + + rag_service._rag_service = None + except ImportError: + pass + + try: + from agentic_rag.core import llm_factory + + llm_factory._client_cache.clear() + except ImportError: + pass + + +@pytest.fixture(autouse=True) +def mock_datapizza_modules(): + """Mock datapizza modules that may not be installed.""" + # Create mock modules + mock_datapizza = Mock() + mock_vectorstores = Mock() + mock_qdrant = Mock() + mock_qdrant.QdrantVectorstore = Mock() + mock_vectorstores.qdrant = mock_qdrant + mock_datapizza.vectorstores = mock_vectorstores + + mock_embedders = Mock() + mock_embedders.ChunkEmbedder = Mock() + mock_openai_embedder_module = Mock() + mock_openai_embedder_module.OpenAIEmbedder = Mock() + mock_embedders.openai = mock_openai_embedder_module + mock_datapizza.embedders = mock_embedders + + mock_modules = Mock() + mock_parsers = Mock() + mock_docling = Mock() + mock_docling.DoclingParser = Mock() + mock_parsers.docling = mock_docling + mock_modules.parsers = mock_parsers + mock_datapizza.modules = mock_modules + + mock_splitters = Mock() + mock_splitters.NodeSplitter = Mock() + mock_datapizza.modules.splitters = mock_splitters + + mock_pipeline = Mock() + mock_pipeline.IngestionPipeline = Mock() + mock_datapizza.pipeline = mock_pipeline + + mock_clients = Mock() + mock_clients.openai = Mock() + mock_clients.anthropic = Mock() + mock_clients.google = Mock() + mock_clients.mistral = Mock() + mock_clients.azure = Mock() + mock_datapizza.clients = mock_clients + + # Patch sys.modules + with patch.dict( + "sys.modules", + { + "datapizza": mock_datapizza, + "datapizza.vectorstores": mock_vectorstores, + "datapizza.vectorstores.qdrant": mock_qdrant, + "datapizza.embedders": mock_embedders, + "datapizza.embedders.openai": mock_openai_embedder_module, + "datapizza.modules": mock_modules, + "datapizza.modules.parsers": mock_parsers, + "datapizza.modules.parsers.docling": mock_docling, + "datapizza.modules.splitters": mock_splitters, + "datapizza.pipeline": mock_pipeline, + "datapizza.clients": mock_clients, + "datapizza.clients.openai": mock_clients.openai, + "datapizza.clients.anthropic": mock_clients.anthropic, + "datapizza.clients.google": mock_clients.google, + "datapizza.clients.mistral": mock_clients.mistral, + "datapizza.clients.azure": mock_clients.azure, + }, + ): + yield diff --git a/tests/unit/test_agentic_rag/test_api/__init__.py b/tests/unit/test_agentic_rag/test_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_agentic_rag/test_api/test_chat.py b/tests/unit/test_agentic_rag/test_api/test_chat.py new file mode 100644 index 0000000..38325ff --- /dev/null +++ b/tests/unit/test_agentic_rag/test_api/test_chat.py @@ -0,0 +1,324 @@ +"""Tests for chat streaming endpoint.""" + +import pytest +from fastapi.testclient import TestClient +from unittest.mock import Mock, patch, AsyncMock +import asyncio + + +@pytest.mark.unit +class TestChatStream: + """Tests for chat streaming endpoint.""" + + @pytest.fixture + def client(self): + """Create test client for chat routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.chat import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_chat_stream_endpoint_exists(self, client): + """Test chat stream endpoint exists and returns 200.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + + assert response.status_code == 200 + + def test_chat_stream_returns_streaming_response(self, client): + """Test chat stream returns streaming response.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + + # Should be event stream + assert "text/event-stream" in response.headers.get("content-type", "") + + def test_chat_stream_accepts_valid_message(self, client): + """Test chat stream accepts valid message.""" + response = client.post("/chat/stream", json={"message": "Test message"}) + + assert response.status_code == 200 + + def test_chat_stream_response_content(self, client): + """Test chat stream response contains expected content.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + content = response.content.decode("utf-8") + + assert "data: Hello from AgenticRAG!" in content + assert "data: Streaming not fully implemented yet." in content + assert "data: [DONE]" in content + + def test_chat_stream_multiple_messages(self, client): + """Test chat stream generates multiple messages.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + content = response.content.decode("utf-8") + + # Should have 3 data messages + assert content.count("data:") == 3 + + def test_chat_stream_sse_format(self, client): + """Test chat stream uses Server-Sent Events format.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + content = response.content.decode("utf-8") + + # Each line should end with \n\n + lines = content.strip().split("\n\n") + for line in lines: + assert line.startswith("data:") + + +@pytest.mark.unit +class TestChatMessageModel: + """Tests for ChatMessage Pydantic model.""" + + def test_chat_message_creation(self): + """Test ChatMessage can be created with message field.""" + from agentic_rag.api.routes.chat import ChatMessage + + message = ChatMessage(message="Hello world") + + assert message.message == "Hello world" + + def test_chat_message_empty_string(self): + """Test ChatMessage accepts empty string.""" + from agentic_rag.api.routes.chat import ChatMessage + + message = ChatMessage(message="") + + assert message.message == "" + + def test_chat_message_long_message(self): + """Test ChatMessage accepts long message.""" + from agentic_rag.api.routes.chat import ChatMessage + + long_message = "A" * 10000 + message = ChatMessage(message=long_message) + + assert message.message == long_message + + def test_chat_message_special_characters(self): + """Test ChatMessage accepts special characters.""" + from agentic_rag.api.routes.chat import ChatMessage + + special = "Hello! @#$%^&*()_+-=[]{}|;':\",./<>?" + message = ChatMessage(message=special) + + assert message.message == special + + def test_chat_message_unicode(self): + """Test ChatMessage accepts unicode characters.""" + from agentic_rag.api.routes.chat import ChatMessage + + unicode_msg = "Hello 世界 🌍 Привет" + message = ChatMessage(message=unicode_msg) + + assert message.message == unicode_msg + + def test_chat_message_serialization(self): + """Test ChatMessage serializes correctly.""" + from agentic_rag.api.routes.chat import ChatMessage + + message = ChatMessage(message="Test") + data = message.model_dump() + + assert data == {"message": "Test"} + + def test_chat_message_json_serialization(self): + """Test ChatMessage JSON serialization.""" + from agentic_rag.api.routes.chat import ChatMessage + + message = ChatMessage(message="Test") + json_str = message.model_dump_json() + + assert '"message":"Test"' in json_str + + +@pytest.mark.unit +class TestChatStreamValidation: + """Tests for chat stream request validation.""" + + @pytest.fixture + def client(self): + """Create test client for chat routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.chat import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_chat_stream_rejects_empty_body(self, client): + """Test chat stream rejects empty body.""" + response = client.post("/chat/stream", json={}) + + assert response.status_code == 422 # Validation error + + def test_chat_stream_rejects_missing_message(self, client): + """Test chat stream rejects request without message field.""" + response = client.post("/chat/stream", json={"other_field": "value"}) + + assert response.status_code == 422 + + def test_chat_stream_rejects_invalid_json(self, client): + """Test chat stream rejects invalid JSON.""" + response = client.post( + "/chat/stream", data="not valid json", headers={"Content-Type": "application/json"} + ) + + assert response.status_code == 422 + + def test_chat_stream_accepts_extra_fields(self, client): + """Test chat stream accepts extra fields (if configured).""" + response = client.post("/chat/stream", json={"message": "Hello", "extra": "field"}) + + # FastAPI/Pydantic v2 ignores extra fields by default + assert response.status_code == 200 + + +@pytest.mark.unit +class TestChatStreamAsync: + """Tests for chat stream async behavior.""" + + def test_chat_stream_is_async(self): + """Test chat stream endpoint is async function.""" + from agentic_rag.api.routes.chat import chat_stream + + import asyncio + + assert asyncio.iscoroutinefunction(chat_stream) + + @pytest.mark.asyncio + async def test_generate_function_yields_bytes(self): + """Test generate function yields bytes.""" + from agentic_rag.api.routes.chat import chat_stream + + # Access the inner generate function + # We need to inspect the function behavior + response_mock = Mock() + request = Mock() + request.message = "Hello" + + # The generate function is defined inside chat_stream + # Let's test the streaming response directly + from agentic_rag.api.routes.chat import ChatMessage + + # Create the streaming response + from fastapi.responses import StreamingResponse + + # Check that chat_stream returns StreamingResponse + result = await chat_stream(ChatMessage(message="Hello")) + assert isinstance(result, StreamingResponse) + + +@pytest.mark.unit +class TestChatStreamEdgeCases: + """Tests for chat stream edge cases.""" + + @pytest.fixture + def client(self): + """Create test client for chat routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.chat import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_chat_stream_with_whitespace_message(self, client): + """Test chat stream with whitespace-only message.""" + response = client.post("/chat/stream", json={"message": " "}) + + assert response.status_code == 200 + + def test_chat_stream_with_newline_message(self, client): + """Test chat stream with message containing newlines.""" + response = client.post("/chat/stream", json={"message": "Line 1\nLine 2\nLine 3"}) + + assert response.status_code == 200 + + def test_chat_stream_with_null_bytes(self, client): + """Test chat stream with message containing null bytes.""" + response = client.post("/chat/stream", json={"message": "Hello\x00World"}) + + assert response.status_code == 200 + + +@pytest.mark.unit +class TestChatRouterConfiguration: + """Tests for chat router configuration.""" + + def test_router_exists(self): + """Test router module exports router.""" + from agentic_rag.api.routes.chat import router + + assert router is not None + + def test_router_is_api_router(self): + """Test router is FastAPI APIRouter.""" + from agentic_rag.api.routes.chat import router + from fastapi import APIRouter + + assert isinstance(router, APIRouter) + + def test_chat_stream_endpoint_configured(self): + """Test chat stream endpoint is configured.""" + from agentic_rag.api.routes.chat import router + + routes = [route for route in router.routes if hasattr(route, "path")] + paths = [route.path for route in routes] + + assert "/chat/stream" in paths + + def test_chat_stream_endpoint_methods(self): + """Test chat stream endpoint accepts POST.""" + from agentic_rag.api.routes.chat import router + + stream_route = None + for route in router.routes: + if hasattr(route, "path") and route.path == "/chat/stream": + stream_route = route + break + + assert stream_route is not None + assert "POST" in stream_route.methods + + +@pytest.mark.unit +class TestChatStreamResponseFormat: + """Tests for chat stream response format.""" + + @pytest.fixture + def client(self): + """Create test client for chat routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.chat import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_chat_stream_content_type_header(self, client): + """Test chat stream has correct content-type header.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + + content_type = response.headers.get("content-type", "") + assert "text/event-stream" in content_type + + def test_chat_stream_cache_control(self, client): + """Test chat stream has cache control headers.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + + # Streaming responses typically have no-cache headers + # This is set by FastAPI/Starlette for streaming responses + assert response.status_code == 200 + + def test_chat_stream_response_chunks(self, client): + """Test chat stream response is chunked.""" + response = client.post("/chat/stream", json={"message": "Hello"}) + + # Response body should contain multiple chunks + content = response.content.decode("utf-8") + chunks = content.split("\n\n") + + # Should have multiple data chunks + assert len(chunks) >= 3 diff --git a/tests/unit/test_agentic_rag/test_api/test_health.py b/tests/unit/test_agentic_rag/test_api/test_health.py new file mode 100644 index 0000000..37387e1 --- /dev/null +++ b/tests/unit/test_agentic_rag/test_api/test_health.py @@ -0,0 +1,285 @@ +"""Tests for health check endpoints.""" + +import pytest +from fastapi.testclient import TestClient +from unittest.mock import Mock, patch + + +@pytest.mark.unit +class TestHealthCheck: + """Tests for health check endpoint.""" + + @pytest.fixture + def client(self): + """Create test client for health routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.health import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_health_check_returns_200(self, client): + """Test health check returns HTTP 200.""" + response = client.get("/health") + + assert response.status_code == 200 + + def test_health_check_returns_json(self, client): + """Test health check returns JSON response.""" + response = client.get("/health") + + assert response.headers["content-type"] == "application/json" + + def test_health_check_status_healthy(self, client): + """Test health check status is healthy.""" + response = client.get("/health") + data = response.json() + + assert data["status"] == "healthy" + + def test_health_check_service_name(self, client): + """Test health check includes service name.""" + response = client.get("/health") + data = response.json() + + assert data["service"] == "agentic-rag" + + def test_health_check_version(self, client): + """Test health check includes version.""" + response = client.get("/health") + data = response.json() + + assert data["version"] == "2.0.0" + + def test_health_check_all_fields(self, client): + """Test health check contains all expected fields.""" + response = client.get("/health") + data = response.json() + + assert "status" in data + assert "service" in data + assert "version" in data + assert len(data) == 3 + + +@pytest.mark.unit +class TestReadinessCheck: + """Tests for readiness probe endpoint.""" + + @pytest.fixture + def client(self): + """Create test client for health routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.health import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_readiness_check_returns_200(self, client): + """Test readiness check returns HTTP 200.""" + response = client.get("/health/ready") + + assert response.status_code == 200 + + def test_readiness_check_returns_json(self, client): + """Test readiness check returns JSON response.""" + response = client.get("/health/ready") + + assert response.headers["content-type"] == "application/json" + + def test_readiness_check_status_ready(self, client): + """Test readiness check status is ready.""" + response = client.get("/health/ready") + data = response.json() + + assert data["status"] == "ready" + + def test_readiness_check_single_field(self, client): + """Test readiness check only contains status field.""" + response = client.get("/health/ready") + data = response.json() + + assert len(data) == 1 + assert "status" in data + + +@pytest.mark.unit +class TestLivenessCheck: + """Tests for liveness probe endpoint.""" + + @pytest.fixture + def client(self): + """Create test client for health routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.health import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_liveness_check_returns_200(self, client): + """Test liveness check returns HTTP 200.""" + response = client.get("/health/live") + + assert response.status_code == 200 + + def test_liveness_check_returns_json(self, client): + """Test liveness check returns JSON response.""" + response = client.get("/health/live") + + assert response.headers["content-type"] == "application/json" + + def test_liveness_check_status_alive(self, client): + """Test liveness check status is alive.""" + response = client.get("/health/live") + data = response.json() + + assert data["status"] == "alive" + + def test_liveness_check_single_field(self, client): + """Test liveness check only contains status field.""" + response = client.get("/health/live") + data = response.json() + + assert len(data) == 1 + assert "status" in data + + +@pytest.mark.unit +class TestHealthEndpointsAsync: + """Tests to verify endpoints are async.""" + + def test_health_check_is_async(self): + """Test health check endpoint is async function.""" + from agentic_rag.api.routes.health import health_check + + import asyncio + + assert asyncio.iscoroutinefunction(health_check) + + def test_readiness_check_is_async(self): + """Test readiness check endpoint is async function.""" + from agentic_rag.api.routes.health import readiness_check + + import asyncio + + assert asyncio.iscoroutinefunction(readiness_check) + + def test_liveness_check_is_async(self): + """Test liveness check endpoint is async function.""" + from agentic_rag.api.routes.health import liveness_check + + import asyncio + + assert asyncio.iscoroutinefunction(liveness_check) + + +@pytest.mark.unit +class TestHealthRouterConfiguration: + """Tests for router configuration.""" + + def test_router_exists(self): + """Test router module exports router.""" + from agentic_rag.api.routes.health import router + + assert router is not None + + def test_router_is_api_router(self): + """Test router is FastAPI APIRouter.""" + from agentic_rag.api.routes.health import router + from fastapi import APIRouter + + assert isinstance(router, APIRouter) + + def test_health_endpoint_path(self): + """Test health endpoint has correct path.""" + from agentic_rag.api.routes.health import router + + routes = [route for route in router.routes if hasattr(route, "path")] + paths = [route.path for route in routes] + + assert "/health" in paths + + def test_readiness_endpoint_path(self): + """Test readiness endpoint has correct path.""" + from agentic_rag.api.routes.health import router + + routes = [route for route in router.routes if hasattr(route, "path")] + paths = [route.path for route in routes] + + assert "/health/ready" in paths + + def test_liveness_endpoint_path(self): + """Test liveness endpoint has correct path.""" + from agentic_rag.api.routes.health import router + + routes = [route for route in router.routes if hasattr(route, "path")] + paths = [route.path for route in routes] + + assert "/health/live" in paths + + +@pytest.mark.unit +class TestHealthEndpointsMethods: + """Tests for HTTP methods on health endpoints.""" + + @pytest.fixture + def client(self): + """Create test client for health routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.health import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_health_check_only_get(self, client): + """Test health check only accepts GET.""" + # POST should not be allowed + response = client.post("/health") + assert response.status_code == 405 + + def test_readiness_check_only_get(self, client): + """Test readiness check only accepts GET.""" + response = client.post("/health/ready") + assert response.status_code == 405 + + def test_liveness_check_only_get(self, client): + """Test liveness check only accepts GET.""" + response = client.post("/health/live") + assert response.status_code == 405 + + +@pytest.mark.unit +class TestHealthEndpointPerformance: + """Tests for health endpoint performance characteristics.""" + + @pytest.fixture + def client(self): + """Create test client for health routes.""" + from fastapi import FastAPI + from agentic_rag.api.routes.health import router + + app = FastAPI() + app.include_router(router) + return TestClient(app) + + def test_health_check_response_time(self, client): + """Test health check responds quickly.""" + import time + + start = time.time() + response = client.get("/health") + elapsed = time.time() - start + + assert response.status_code == 200 + assert elapsed < 1.0 # Should respond in less than 1 second + + def test_health_check_small_response(self, client): + """Test health check returns small response.""" + response = client.get("/health") + + # Response should be small (less than 1KB) + assert len(response.content) < 1024 diff --git a/tests/unit/test_agentic_rag/test_api/test_providers.py b/tests/unit/test_agentic_rag/test_api/test_providers.py deleted file mode 100644 index b099f3c..0000000 --- a/tests/unit/test_agentic_rag/test_api/test_providers.py +++ /dev/null @@ -1,238 +0,0 @@ -"""Tests for provider management API routes. - -Test cases for provider listing, configuration, and model management. -""" - -import pytest -from unittest.mock import MagicMock, patch - -from fastapi import HTTPException -from fastapi.testclient import TestClient - -from agentic_rag.api.routes.providers import router - - -# Create test client -@pytest.fixture -def client(): - """Create test client for providers API.""" - from fastapi import FastAPI - - app = FastAPI() - app.include_router(router, prefix="/api/v1") - - # Mock authentication - async def mock_get_current_user(): - return {"user_id": "test-user", "auth_method": "api_key"} - - app.dependency_overrides = {} - - return TestClient(app) - - -@pytest.fixture -def mock_settings(): - """Mock settings for testing.""" - with patch("agentic_rag.api.routes.providers.get_settings") as mock: - settings = MagicMock() - settings.default_llm_provider = "openai" - settings.default_llm_model = "gpt-4o-mini" - settings.embedding_provider = "openai" - settings.embedding_model = "text-embedding-3-small" - settings.qdrant_host = "localhost" - settings.qdrant_port = 6333 - settings.is_provider_configured.return_value = True - settings.list_configured_providers.return_value = [ - {"id": "openai", "name": "Openai", "available": True} - ] - mock.return_value = settings - yield settings - - -@pytest.fixture -def mock_llm_factory(): - """Mock LLM factory for testing.""" - with patch("agentic_rag.api.routes.providers.LLMClientFactory") as mock: - mock.list_available_providers.return_value = [ - {"id": "openai", "name": "OpenAI", "available": True, "install_command": None}, - {"id": "zai", "name": "Z.AI", "available": True, "install_command": None}, - ] - mock.get_default_models.return_value = { - "openai": "gpt-4o-mini", - "zai": "zai-large", - } - yield mock - - -class TestListProviders: - """Test GET /api/v1/providers endpoint.""" - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_list_providers_success(self, client, mock_settings, mock_llm_factory): - """Test listing all providers.""" - response = client.get("/api/v1/providers") - - assert response.status_code == 200 - data = response.json() - assert isinstance(data, list) - assert len(data) == 2 - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_list_providers_structure(self, client, mock_settings, mock_llm_factory): - """Test provider list structure.""" - response = client.get("/api/v1/providers") - - data = response.json() - provider = data[0] - assert "id" in provider - assert "name" in provider - assert "available" in provider - assert "configured" in provider - assert "default_model" in provider - - -class TestListConfiguredProviders: - """Test GET /api/v1/providers/configured endpoint.""" - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_list_configured_providers(self, client, mock_settings): - """Test listing configured providers only.""" - response = client.get("/api/v1/providers/configured") - - assert response.status_code == 200 - mock_settings.list_configured_providers.assert_called_once() - - -class TestListProviderModels: - """Test GET /api/v1/providers/{provider_id}/models endpoint.""" - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_list_openai_models(self, client, mock_settings, mock_llm_factory): - """Test listing OpenAI models.""" - response = client.get("/api/v1/providers/openai/models") - - assert response.status_code == 200 - data = response.json() - assert data["provider"] == "openai" - assert isinstance(data["models"], list) - assert len(data["models"]) > 0 - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_list_zai_models(self, client, mock_settings, mock_llm_factory): - """Test listing Z.AI models.""" - response = client.get("/api/v1/providers/zai/models") - - assert response.status_code == 200 - data = response.json() - assert data["provider"] == "zai" - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_list_openrouter_models(self, client, mock_settings, mock_llm_factory): - """Test listing OpenRouter models.""" - response = client.get("/api/v1/providers/openrouter/models") - - assert response.status_code == 200 - data = response.json() - assert data["provider"] == "openrouter" - # OpenRouter should have multiple models - assert len(data["models"]) > 3 - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_list_unknown_provider_models(self, client, mock_settings, mock_llm_factory): - """Test listing models for unknown provider.""" - response = client.get("/api/v1/providers/unknown/models") - - assert response.status_code == 404 - - -class TestGetConfig: - """Test GET /api/v1/config endpoint.""" - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_get_config_success(self, client, mock_settings, mock_llm_factory): - """Test getting system configuration.""" - response = client.get("/api/v1/config") - - assert response.status_code == 200 - data = response.json() - assert data["default_llm_provider"] == "openai" - assert data["default_llm_model"] == "gpt-4o-mini" - assert data["embedding_provider"] == "openai" - assert "configured_providers" in data - assert "qdrant_host" in data - assert "qdrant_port" in data - - -class TestUpdateDefaultProvider: - """Test PUT /api/v1/config/provider endpoint.""" - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_update_provider_success(self, client, mock_settings, mock_llm_factory): - """Test updating default provider successfully.""" - payload = {"provider": "zai", "model": "zai-large"} - response = client.put("/api/v1/config/provider", json=payload) - - assert response.status_code == 200 - data = response.json() - assert data["success"] is True - assert "zai" in data["message"] - assert "zai-large" in data["message"] - - @pytest.mark.skip(reason="Requires FastAPI dependency override") - def test_update_unconfigured_provider(self, client, mock_settings, mock_llm_factory): - """Test updating to unconfigured provider fails.""" - mock_settings.is_provider_configured.return_value = False - - payload = {"provider": "unknown", "model": "unknown-model"} - response = client.put("/api/v1/config/provider", json=payload) - - assert response.status_code == 400 - - -class TestProviderModelsData: - """Test provider models data structure.""" - - def test_openai_models_structure(self): - """Test OpenAI models have correct structure.""" - from agentic_rag.api.routes.providers import list_provider_models - - # We can't call this directly without auth, but we can check the data structure - # This is a unit test of the internal logic - mock_user = {"user_id": "test"} - - # Import the models dict directly - models = { - "openai": [ - {"id": "gpt-4o", "name": "GPT-4o"}, - {"id": "gpt-4o-mini", "name": "GPT-4o Mini"}, - ], - "anthropic": [ - {"id": "claude-3-5-sonnet-20241022", "name": "Claude 3.5 Sonnet"}, - ], - } - - # Verify structure - for provider_id, model_list in models.items(): - for model in model_list: - assert "id" in model - assert "name" in model - assert isinstance(model["id"], str) - assert isinstance(model["name"], str) - - def test_all_providers_have_models(self): - """Test that all 8 providers have model definitions.""" - # This test verifies the models dict in providers.py is complete - expected_providers = [ - "openai", - "zai", - "opencode-zen", - "openrouter", - "anthropic", - "google", - "mistral", - "azure", - ] - - # The actual models dict should be checked in the route file - # This serves as a reminder to keep models updated - assert len(expected_providers) == 8 diff --git a/tests/unit/test_agentic_rag/test_core/__init__.py b/tests/unit/test_agentic_rag/test_core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_agentic_rag/test_core/test_config.py b/tests/unit/test_agentic_rag/test_core/test_config.py new file mode 100644 index 0000000..adb1c6b --- /dev/null +++ b/tests/unit/test_agentic_rag/test_core/test_config.py @@ -0,0 +1,323 @@ +"""Tests for configuration management.""" + +import pytest +from unittest.mock import Mock, patch, MagicMock +import sys + + +@pytest.mark.unit +class TestSettings: + """Tests for Settings class.""" + + def test_settings_default_values(self): + """Test Settings default values.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + assert settings.app_name == "AgenticRAG" + assert settings.app_version == "2.0.0" + assert settings.debug is True + assert settings.qdrant_host == "localhost" + assert settings.qdrant_port == 6333 + assert settings.max_file_size == 10 * 1024 * 1024 + assert settings.upload_dir == "./uploads" + assert settings.default_llm_provider == "openai" + assert settings.default_llm_model == "gpt-4o-mini" + + def test_settings_cors_origins_default(self): + """Test default CORS origins.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + expected_origins = [ + "http://localhost:3000", + "http://localhost:5173", + "http://localhost:8000", + ] + assert settings.cors_origins == expected_origins + + def test_settings_jwt_configuration(self): + """Test JWT configuration defaults.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + assert settings.jwt_algorithm == "HS256" + assert settings.access_token_expire_minutes == 30 + assert settings.jwt_secret == "your-secret-key-change-in-production" + + def test_settings_azure_configuration(self): + """Test Azure configuration defaults.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + assert settings.azure_api_version == "2024-02-01" + assert settings.azure_endpoint == "" + + def test_settings_embedding_configuration(self): + """Test embedding configuration defaults.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + assert settings.embedding_provider == "openai" + assert settings.embedding_model == "text-embedding-3-small" + + @pytest.mark.parametrize( + "provider,expected_key_attr", + [ + ("openai", "openai_api_key"), + ("zai", "zai_api_key"), + ("z.ai", "zai_api_key"), + ("opencode-zen", "opencode_zen_api_key"), + ("opencode_zen", "opencode_zen_api_key"), + ("openrouter", "openrouter_api_key"), + ("anthropic", "anthropic_api_key"), + ("google", "google_api_key"), + ("mistral", "mistral_api_key"), + ("azure", "azure_api_key"), + ], + ) + def test_get_api_key_for_provider(self, provider, expected_key_attr): + """Test get_api_key_for_provider with various providers.""" + from agentic_rag.core.config import Settings + + settings = Settings() + # Set a test key + setattr(settings, expected_key_attr, f"test-{provider}-key") + + result = settings.get_api_key_for_provider(provider) + + assert result == f"test-{provider}-key" + + def test_get_api_key_for_provider_case_insensitive(self): + """Test get_api_key_for_provider is case insensitive.""" + from agentic_rag.core.config import Settings + + settings = Settings() + settings.openai_api_key = "test-key" + + assert settings.get_api_key_for_provider("OPENAI") == "test-key" + assert settings.get_api_key_for_provider("OpenAI") == "test-key" + assert settings.get_api_key_for_provider("openai") == "test-key" + + def test_get_api_key_for_provider_unknown(self): + """Test get_api_key_for_provider with unknown provider.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + result = settings.get_api_key_for_provider("unknown-provider") + + assert result == "" + + def test_is_provider_configured_true(self): + """Test is_provider_configured returns True when key exists.""" + from agentic_rag.core.config import Settings + + settings = Settings() + settings.openai_api_key = "test-key" + + assert settings.is_provider_configured("openai") is True + + def test_is_provider_configured_false(self): + """Test is_provider_configured returns False when key is empty.""" + from agentic_rag.core.config import Settings + + settings = Settings() + settings.openai_api_key = "" + + assert settings.is_provider_configured("openai") is False + + def test_is_provider_configured_false_whitespace(self): + """Test is_provider_configured returns False when key is whitespace.""" + from agentic_rag.core.config import Settings + + settings = Settings() + settings.openai_api_key = " " + + # Empty string is falsy, whitespace-only is truthy in Python + # But we check with bool() which considers whitespace as True + assert settings.is_provider_configured("openai") is True + + @patch("agentic_rag.core.config.Settings.get_api_key_for_provider") + @patch("agentic_rag.core.llm_factory.LLMClientFactory.list_available_providers") + def test_list_configured_providers(self, mock_list_providers, mock_get_key): + """Test list_configured_providers.""" + from agentic_rag.core.config import Settings + + mock_list_providers.return_value = [ + {"id": "openai", "name": "OpenAI"}, + {"id": "anthropic", "name": "Anthropic"}, + {"id": "unknown", "name": "Unknown"}, + ] + + def side_effect(provider): + return provider in ["openai", "anthropic"] + + mock_get_key.side_effect = side_effect + + settings = Settings() + result = settings.list_configured_providers() + + assert len(result) == 2 + assert result[0]["id"] == "openai" + assert result[1]["id"] == "anthropic" + + @patch("agentic_rag.core.llm_factory.LLMClientFactory.list_available_providers") + def test_list_configured_providers_no_configured(self, mock_list_providers): + """Test list_configured_providers with no configured providers.""" + from agentic_rag.core.config import Settings + + mock_list_providers.return_value = [ + {"id": "openai", "name": "OpenAI"}, + {"id": "anthropic", "name": "Anthropic"}, + ] + + settings = Settings() + result = settings.list_configured_providers() + + assert result == [] + + +@pytest.mark.unit +class TestGetSettings: + """Tests for get_settings function.""" + + def test_get_settings_singleton(self): + """Test get_settings returns same instance (singleton pattern).""" + from agentic_rag.core.config import get_settings + + settings1 = get_settings() + settings2 = get_settings() + + assert settings1 is settings2 + + def test_get_settings_returns_settings_instance(self): + """Test get_settings returns Settings instance.""" + from agentic_rag.core.config import get_settings, Settings + + settings = get_settings() + + assert isinstance(settings, Settings) + + @patch("agentic_rag.core.config.Settings") + def test_get_settings_creates_new_instance(self, mock_settings_class): + """Test get_settings creates new Settings instance when _settings is None.""" + from agentic_rag.core.config import get_settings, _settings + + mock_instance = Mock() + mock_settings_class.return_value = mock_instance + + # Ensure _settings is None + import agentic_rag.core.config as config_module + + config_module._settings = None + + result = get_settings() + + mock_settings_class.assert_called_once() + assert result is mock_instance + + @patch("agentic_rag.core.config.Settings") + def test_get_settings_uses_existing_instance(self, mock_settings_class): + """Test get_settings uses existing Settings instance when available.""" + from agentic_rag.core.config import get_settings + import agentic_rag.core.config as config_module + + existing_settings = Mock() + config_module._settings = existing_settings + + result = get_settings() + + mock_settings_class.assert_not_called() + assert result is existing_settings + + +@pytest.mark.unit +class TestSettingsEnvironmentVariables: + """Tests for Settings with environment variables.""" + + @patch.dict( + "os.environ", + { + "APP_NAME": "TestApp", + "APP_VERSION": "1.0.0", + "DEBUG": "false", + "QDRANT_HOST": "test-host", + "QDRANT_PORT": "9999", + "OPENAI_API_KEY": "env-openai-key", + }, + ) + def test_settings_from_env_variables(self): + """Test Settings loads from environment variables.""" + from agentic_rag.core.config import Settings + + # Need to reload to pick up env vars + settings = Settings(_env_file=None) + + # These should be overridden by env vars + assert settings.openai_api_key == "env-openai-key" + + def test_settings_env_file_path(self): + """Test Settings env file configuration.""" + from agentic_rag.core.config import Settings + + assert Settings.Config.env_file == ".env" + assert Settings.Config.env_file_encoding == "utf-8" + + +@pytest.mark.unit +class TestSettingsEdgeCases: + """Tests for Settings edge cases.""" + + def test_settings_all_api_keys_empty_by_default(self): + """Test all API keys are empty by default.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + assert settings.openai_api_key == "" + assert settings.zai_api_key == "" + assert settings.opencode_zen_api_key == "" + assert settings.openrouter_api_key == "" + assert settings.anthropic_api_key == "" + assert settings.google_api_key == "" + assert settings.mistral_api_key == "" + assert settings.azure_api_key == "" + assert settings.embedding_api_key == "" + + def test_settings_redis_default_url(self): + """Test Redis default URL.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + assert settings.redis_url == "redis://localhost:6379/0" + + def test_settings_admin_api_key_default(self): + """Test admin API key default value.""" + from agentic_rag.core.config import Settings + + settings = Settings() + + assert settings.admin_api_key == "admin-api-key-change-in-production" + + def test_get_api_key_for_provider_variants(self): + """Test all provider name variants.""" + from agentic_rag.core.config import Settings + + settings = Settings() + settings.zai_api_key = "test-zai" + settings.opencode_zen_api_key = "test-opencode" + + # Test zai variants + assert settings.get_api_key_for_provider("zai") == "test-zai" + assert settings.get_api_key_for_provider("z.ai") == "test-zai" + + # Test opencode variants + assert settings.get_api_key_for_provider("opencode-zen") == "test-opencode" + assert settings.get_api_key_for_provider("opencode_zen") == "test-opencode" diff --git a/tests/unit/test_agentic_rag/test_core/test_logging.py b/tests/unit/test_agentic_rag/test_core/test_logging.py new file mode 100644 index 0000000..32f1520 --- /dev/null +++ b/tests/unit/test_agentic_rag/test_core/test_logging.py @@ -0,0 +1,209 @@ +"""Tests for logging configuration.""" + +import pytest +import logging +import sys +from unittest.mock import Mock, patch, MagicMock + + +@pytest.mark.unit +class TestSetupLogging: + """Tests for setup_logging function.""" + + def test_setup_logging_basic_config_called(self): + """Test that basicConfig is called with correct parameters.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + + mock_basic_config.assert_called_once() + call_kwargs = mock_basic_config.call_args.kwargs + + assert call_kwargs["level"] == logging.INFO + assert call_kwargs["format"] == "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + + def test_setup_logging_stream_handler(self): + """Test that StreamHandler is configured with stdout.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + + call_kwargs = mock_basic_config.call_args.kwargs + handlers = call_kwargs["handlers"] + + assert len(handlers) == 1 + assert isinstance(handlers[0], logging.StreamHandler) + assert handlers[0].stream == sys.stdout + + def test_setup_logging_format_string(self): + """Test the format string includes all required components.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + + call_kwargs = mock_basic_config.call_args.kwargs + format_string = call_kwargs["format"] + + assert "%(asctime)s" in format_string + assert "%(name)s" in format_string + assert "%(levelname)s" in format_string + assert "%(message)s" in format_string + + def test_setup_logging_level_info(self): + """Test that logging level is set to INFO.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + + call_kwargs = mock_basic_config.call_args.kwargs + assert call_kwargs["level"] == logging.INFO + + @patch("logging.StreamHandler") + @patch("logging.basicConfig") + def test_setup_logging_stream_handler_creation(self, mock_basic_config, mock_stream_handler): + """Test StreamHandler is created properly.""" + from agentic_rag.core.logging import setup_logging + + mock_handler_instance = Mock() + mock_stream_handler.return_value = mock_handler_instance + + setup_logging() + + mock_stream_handler.assert_called_once_with(sys.stdout) + + def test_setup_logging_no_return_value(self): + """Test that setup_logging returns None.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig"): + result = setup_logging() + + assert result is None + + def test_setup_logging_called_multiple_times(self): + """Test that setup_logging can be called multiple times.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + setup_logging() + setup_logging() + + assert mock_basic_config.call_count == 3 + + def test_setup_logging_integration(self): + """Integration test for setup_logging.""" + from agentic_rag.core.logging import setup_logging + + # Reset logging config first + logging.root.handlers = [] + logging.root.setLevel(logging.WARNING) + + setup_logging() + + # Check root logger configuration + assert logging.root.level == logging.INFO + assert len(logging.root.handlers) >= 1 + + # Check the handler is a StreamHandler + stream_handlers = [h for h in logging.root.handlers if isinstance(h, logging.StreamHandler)] + assert len(stream_handlers) >= 1 + + def test_setup_logging_log_message(self): + """Test that logging works after setup.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig"): + setup_logging() + + # Create a test logger + test_logger = logging.getLogger("test.logger") + + # Test that we can create log messages (actual logging is mocked) + assert test_logger is not None + assert hasattr(test_logger, "info") + assert hasattr(test_logger, "debug") + assert hasattr(test_logger, "warning") + assert hasattr(test_logger, "error") + + +@pytest.mark.unit +class TestLoggingEdgeCases: + """Tests for logging edge cases.""" + + def test_setup_logging_with_existing_handlers(self): + """Test setup_logging when handlers already exist.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + # Setup logging multiple times to simulate existing handlers + setup_logging() + + # basicConfig is called but may not add handlers if already configured + mock_basic_config.assert_called_once() + + def test_setup_logging_sys_stdout(self): + """Test that sys.stdout is used as the stream.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + + call_kwargs = mock_basic_config.call_args.kwargs + handlers = call_kwargs["handlers"] + + assert handlers[0].stream is sys.stdout + + @patch("sys.stdout") + def test_setup_logging_with_mocked_stdout(self, mock_stdout): + """Test setup_logging with mocked stdout.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.StreamHandler") as mock_handler_class: + mock_handler = Mock() + mock_handler_class.return_value = mock_handler + + with patch("logging.basicConfig"): + setup_logging() + + # Verify StreamHandler was created + mock_handler_class.assert_called_once() + + +@pytest.mark.unit +class TestLoggingFormat: + """Tests for logging format details.""" + + def test_format_components_order(self): + """Test format string components order.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + + call_kwargs = mock_basic_config.call_args.kwargs + format_string = call_kwargs["format"] + + # Check order: time - name - level - message + time_pos = format_string.find("%(asctime)s") + name_pos = format_string.find("%(name)s") + level_pos = format_string.find("%(levelname)s") + message_pos = format_string.find("%(message)s") + + assert time_pos < name_pos < level_pos < message_pos + + def test_format_separator(self): + """Test format string uses ' - ' as separator.""" + from agentic_rag.core.logging import setup_logging + + with patch("logging.basicConfig") as mock_basic_config: + setup_logging() + + call_kwargs = mock_basic_config.call_args.kwargs + format_string = call_kwargs["format"] + + assert " - " in format_string diff --git a/tests/unit/test_agentic_rag/test_services/__init__.py b/tests/unit/test_agentic_rag/test_services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_agentic_rag/test_services/test_document_service.py b/tests/unit/test_agentic_rag/test_services/test_document_service.py new file mode 100644 index 0000000..4395d13 --- /dev/null +++ b/tests/unit/test_agentic_rag/test_services/test_document_service.py @@ -0,0 +1,404 @@ +"""Tests for DocumentService.""" + +import pytest +from unittest.mock import Mock, patch, AsyncMock, MagicMock, mock_open +from pathlib import Path +import tempfile +import os + + +@pytest.fixture +def mock_settings(): + """Create mock settings for tests.""" + settings = Mock() + settings.qdrant_host = "localhost" + settings.qdrant_port = 6333 + settings.openai_api_key = "test-key" + settings.embedding_model = "text-embedding-3-small" + return settings + + +@pytest.fixture +def mock_dependencies(mock_settings): + """Create mock dependencies for DocumentService.""" + with ( + patch("agentic_rag.services.document_service.settings", mock_settings), + patch("agentic_rag.services.document_service.QdrantVectorstore") as mock_qdrant, + patch("agentic_rag.services.document_service.ChunkEmbedder") as mock_chunk_embedder, + patch("agentic_rag.services.document_service.OpenAIEmbedder") as mock_openai_embedder, + patch("agentic_rag.services.document_service.IngestionPipeline") as mock_pipeline, + patch("agentic_rag.services.document_service.DoclingParser") as mock_parser, + patch("agentic_rag.services.document_service.NodeSplitter") as mock_splitter, + ): + mock_qdrant_instance = Mock() + mock_qdrant.return_value = mock_qdrant_instance + + mock_openai_embedder_instance = Mock() + mock_openai_embedder.return_value = mock_openai_embedder_instance + + mock_pipeline_instance = Mock() + mock_pipeline_instance.run.return_value = [ + {"id": "1", "text": "Chunk 1"}, + {"id": "2", "text": "Chunk 2"}, + {"id": "3", "text": "Chunk 3"}, + ] + mock_pipeline.return_value = mock_pipeline_instance + + yield { + "qdrant": mock_qdrant, + "chunk_embedder": mock_chunk_embedder, + "openai_embedder": mock_openai_embedder, + "pipeline": mock_pipeline, + "parser": mock_parser, + "splitter": mock_splitter, + "qdrant_instance": mock_qdrant_instance, + "pipeline_instance": mock_pipeline_instance, + } + + +@pytest.mark.unit +class TestDocumentServiceInit: + """Tests for DocumentService initialization.""" + + def test_init_creates_vector_store(self, mock_dependencies, mock_settings): + """Test __init__ creates vector store.""" + from agentic_rag.services.document_service import DocumentService + + DocumentService() + + mock_dependencies["qdrant"].assert_called_with( + host="localhost", + port=6333, + ) + + def test_init_creates_embedder(self, mock_dependencies, mock_settings): + """Test __init__ creates embedder.""" + from agentic_rag.services.document_service import DocumentService + + DocumentService() + + mock_dependencies["openai_embedder"].assert_called_with( + api_key="test-key", + model_name="text-embedding-3-small", + ) + + def test_init_creates_pipeline(self, mock_dependencies, mock_settings): + """Test __init__ creates ingestion pipeline.""" + from agentic_rag.services.document_service import DocumentService + + DocumentService() + + mock_dependencies["pipeline"].assert_called_once() + + def test_init_creates_collection(self, mock_dependencies, mock_settings): + """Test __init__ creates documents collection.""" + from agentic_rag.services.document_service import DocumentService + + DocumentService() + + mock_dependencies["qdrant_instance"].create_collection.assert_called_once_with( + "documents", vector_config=[{"name": "embedding", "dimensions": 1536}] + ) + + def test_init_handles_existing_collection(self, mock_dependencies, mock_settings): + """Test __init__ handles existing collection gracefully.""" + from agentic_rag.services.document_service import DocumentService + + mock_dependencies["qdrant_instance"].create_collection.side_effect = Exception( + "Already exists" + ) + + # Should not raise exception + service = DocumentService() + + assert service is not None + + +@pytest.mark.unit +class TestDocumentServiceIngestDocument: + """Tests for ingest_document method.""" + + @pytest.fixture + def service(self, mock_dependencies, mock_settings): + """Create DocumentService with mocked dependencies.""" + from agentic_rag.services.document_service import DocumentService + + with patch("agentic_rag.services.document_service.settings", mock_settings): + service = DocumentService() + service.pipeline = mock_dependencies["pipeline_instance"] + return service + + @pytest.mark.asyncio + async def test_ingest_document_returns_dict(self, service): + """Test ingest_document returns dictionary.""" + result = await service.ingest_document("/path/to/doc.pdf") + + assert isinstance(result, dict) + + @pytest.mark.asyncio + async def test_ingest_document_contains_id(self, service): + """Test ingest_document result contains id.""" + result = await service.ingest_document("/path/to/doc.pdf") + + assert "id" in result + assert isinstance(result["id"], str) + + @pytest.mark.asyncio + async def test_ingest_document_contains_filename(self, service): + """Test ingest_document result contains filename.""" + result = await service.ingest_document("/path/to/my-document.pdf") + + assert "filename" in result + assert result["filename"] == "my-document.pdf" + + @pytest.mark.asyncio + async def test_ingest_document_contains_chunks_count(self, service): + """Test ingest_document result contains chunks_count.""" + result = await service.ingest_document("/path/to/doc.pdf") + + assert "chunks_count" in result + assert result["chunks_count"] == 3 + + @pytest.mark.asyncio + async def test_ingest_document_contains_metadata(self, service): + """Test ingest_document result contains metadata.""" + result = await service.ingest_document("/path/to/doc.pdf") + + assert "metadata" in result + assert result["metadata"] == {} + + @pytest.mark.asyncio + async def test_ingest_document_with_metadata(self, service): + """Test ingest_document with custom metadata.""" + metadata = {"author": "Test", "category": "Docs"} + + result = await service.ingest_document("/path/to/doc.pdf", metadata=metadata) + + assert result["metadata"] == metadata + + @pytest.mark.asyncio + async def test_ingest_document_calls_pipeline_run(self, service): + """Test ingest_document calls pipeline.run.""" + await service.ingest_document("/path/to/doc.pdf") + + service.pipeline.run.assert_called_once_with("/path/to/doc.pdf") + + @pytest.mark.asyncio + async def test_ingest_document_single_result(self, service): + """Test ingest_document with single chunk result.""" + service.pipeline.run.return_value = {"id": "1", "text": "Single"} + + result = await service.ingest_document("/path/to/doc.pdf") + + # When result is not a list, chunks_count should be 1 + assert result["chunks_count"] == 1 + + @pytest.mark.asyncio + async def test_ingest_document_empty_result(self, service): + """Test ingest_document with empty result.""" + service.pipeline.run.return_value = [] + + result = await service.ingest_document("/path/to/doc.pdf") + + assert result["chunks_count"] == 0 + + @pytest.mark.asyncio + async def test_ingest_document_extracts_filename(self, service): + """Test ingest_document extracts filename from path.""" + test_cases = [ + ("/path/to/file.pdf", "file.pdf"), + ("file.txt", "file.txt"), + ("/deep/nested/path/doc.docx", "doc.docx"), + ] + + for file_path, expected_filename in test_cases: + service.pipeline.run.return_value = [] + result = await service.ingest_document(file_path) + assert result["filename"] == expected_filename + + +@pytest.mark.unit +class TestDocumentServiceListDocuments: + """Tests for list_documents method.""" + + @pytest.fixture + def service(self, mock_dependencies, mock_settings): + """Create DocumentService with mocked dependencies.""" + from agentic_rag.services.document_service import DocumentService + + with patch("agentic_rag.services.document_service.settings", mock_settings): + service = DocumentService() + service.vector_store = mock_dependencies["qdrant_instance"] + return service + + @pytest.mark.asyncio + async def test_list_documents_returns_list(self, service): + """Test list_documents returns a list.""" + result = await service.list_documents() + + assert isinstance(result, list) + + @pytest.mark.asyncio + async def test_list_documents_returns_document_dicts(self, service): + """Test list_documents returns list of document dicts.""" + result = await service.list_documents() + + assert len(result) > 0 + assert isinstance(result[0], dict) + assert "id" in result[0] + assert "name" in result[0] + assert "status" in result[0] + + @pytest.mark.asyncio + async def test_list_documents_calls_get_collection(self, service): + """Test list_documents calls vector_store.get_collection.""" + await service.list_documents() + + service.vector_store.get_collection.assert_called_once_with("documents") + + @pytest.mark.asyncio + async def test_list_documents_handles_exception(self, service): + """Test list_documents handles exceptions gracefully.""" + # The current implementation doesn't handle exceptions - it just has a hardcoded return + # So we should test that it returns the hardcoded list + result = await service.list_documents() + + # The method returns a hardcoded list + assert isinstance(result, list) + + +@pytest.mark.unit +class TestDocumentServiceDeleteDocument: + """Tests for delete_document method.""" + + @pytest.fixture + def service(self, mock_dependencies, mock_settings): + """Create DocumentService with mocked dependencies.""" + from agentic_rag.services.document_service import DocumentService + + with patch("agentic_rag.services.document_service.settings", mock_settings): + service = DocumentService() + return service + + @pytest.mark.asyncio + async def test_delete_document_returns_bool(self, service): + """Test delete_document returns boolean.""" + result = await service.delete_document("doc-123") + + assert isinstance(result, bool) + + @pytest.mark.asyncio + async def test_delete_document_returns_true(self, service): + """Test delete_document returns True (placeholder implementation).""" + result = await service.delete_document("doc-123") + + assert result is True + + @pytest.mark.asyncio + async def test_delete_document_accepts_doc_id(self, service): + """Test delete_document accepts document ID.""" + # Should not raise exception + result = await service.delete_document("any-doc-id") + + assert result is True + + @pytest.mark.asyncio + async def test_delete_document_empty_id(self, service): + """Test delete_document with empty ID.""" + result = await service.delete_document("") + + assert result is True + + +@pytest.mark.unit +class TestGetDocumentService: + """Tests for get_document_service function.""" + + @patch("agentic_rag.services.document_service.DocumentService") + async def test_get_document_service_creates_new_instance(self, mock_service_class): + """Test get_document_service creates new instance when _document_service is None.""" + from agentic_rag.services.document_service import get_document_service + import agentic_rag.services.document_service as ds_module + + mock_instance = Mock() + mock_service_class.return_value = mock_instance + + # Reset singleton + ds_module._document_service = None + + result = await get_document_service() + + mock_service_class.assert_called_once() + assert result is mock_instance + + @patch("agentic_rag.services.document_service.DocumentService") + async def test_get_document_service_returns_existing(self, mock_service_class): + """Test get_document_service returns existing instance.""" + from agentic_rag.services.document_service import get_document_service + import agentic_rag.services.document_service as ds_module + + existing = Mock() + ds_module._document_service = existing + + result = await get_document_service() + + mock_service_class.assert_not_called() + assert result is existing + + @patch("agentic_rag.services.document_service.DocumentService") + async def test_get_document_service_singleton(self, mock_service_class): + """Test get_document_service returns same instance (singleton).""" + from agentic_rag.services.document_service import get_document_service + import agentic_rag.services.document_service as ds_module + + ds_module._document_service = None + mock_instance = Mock() + mock_service_class.return_value = mock_instance + + result1 = await get_document_service() + result2 = await get_document_service() + + assert result1 is result2 + mock_service_class.assert_called_once() + + +@pytest.mark.unit +class TestDocumentServiceEdgeCases: + """Tests for DocumentService edge cases.""" + + @pytest.fixture + def service(self, mock_dependencies, mock_settings): + """Create DocumentService with mocked dependencies.""" + from agentic_rag.services.document_service import DocumentService + + with patch("agentic_rag.services.document_service.settings", mock_settings): + service = DocumentService() + service.pipeline = mock_dependencies["pipeline_instance"] + return service + + @pytest.mark.asyncio + async def test_ingest_document_none_metadata(self, service): + """Test ingest_document with None metadata defaults to empty dict.""" + service.pipeline.run.return_value = [] + + result = await service.ingest_document("/path/to/doc.pdf", metadata=None) + + assert result["metadata"] == {} + + @pytest.mark.asyncio + async def test_ingest_document_special_chars_in_path(self, service): + """Test ingest_document with special characters in path.""" + service.pipeline.run.return_value = [] + + result = await service.ingest_document("/path/with spaces & special-chars/file(1).pdf") + + assert result["filename"] == "file(1).pdf" + + @pytest.mark.asyncio + async def test_ingest_document_unicode_path(self, service): + """Test ingest_document with unicode characters in path.""" + service.pipeline.run.return_value = [] + + result = await service.ingest_document("/path/文档/file.pdf") + + assert result["filename"] == "file.pdf" diff --git a/tests/unit/test_agentic_rag/test_services/test_rag_service.py b/tests/unit/test_agentic_rag/test_services/test_rag_service.py new file mode 100644 index 0000000..9306609 --- /dev/null +++ b/tests/unit/test_agentic_rag/test_services/test_rag_service.py @@ -0,0 +1,646 @@ +"""Tests for RAGService.""" + +import pytest +from unittest.mock import Mock, patch, AsyncMock, MagicMock + + +@pytest.fixture +def mock_settings(): + """Create mock settings for tests.""" + settings = Mock() + settings.embedding_api_key = "embedding-key" + settings.openai_api_key = "openai-key" + settings.embedding_model = "text-embedding-3-small" + settings.default_llm_provider = "openai" + return settings + + +@pytest.mark.unit +class TestRAGServiceInit: + """Tests for RAGService initialization.""" + + @patch("agentic_rag.services.rag_service.OpenAIEmbedder") + @patch("agentic_rag.services.rag_service.settings") + def test_init_creates_embedder_with_embedding_api_key(self, mock_settings, mock_embedder_class): + """Test __init__ creates embedder with embedding_api_key.""" + from agentic_rag.services.rag_service import RAGService + + mock_settings.embedding_api_key = "embedding-key" + mock_settings.openai_api_key = "openai-key" + mock_settings.embedding_model = "text-embedding-3-small" + + mock_embedder_instance = Mock() + mock_embedder_class.return_value = mock_embedder_instance + + service = RAGService() + + mock_embedder_class.assert_called_with( + api_key="embedding-key", + model_name="text-embedding-3-small", + ) + assert service.embedder is mock_embedder_instance + + @patch("agentic_rag.services.rag_service.OpenAIEmbedder") + @patch("agentic_rag.services.rag_service.settings") + def test_init_uses_openai_key_when_no_embedding_key(self, mock_settings, mock_embedder_class): + """Test __init__ uses openai_api_key when embedding_api_key is empty.""" + from agentic_rag.services.rag_service import RAGService + + mock_settings.embedding_api_key = "" + mock_settings.openai_api_key = "openai-key" + mock_settings.embedding_model = "text-embedding-3-small" + + mock_embedder_instance = Mock() + mock_embedder_class.return_value = mock_embedder_instance + + RAGService() + + mock_embedder_class.assert_called_with( + api_key="openai-key", + model_name="text-embedding-3-small", + ) + + @patch("agentic_rag.services.rag_service.OpenAIEmbedder") + @patch("agentic_rag.services.rag_service.settings") + def test_init_uses_embedding_model_from_settings(self, mock_settings, mock_embedder_class): + """Test __init__ uses embedding_model from settings.""" + from agentic_rag.services.rag_service import RAGService + + mock_settings.embedding_api_key = "key" + mock_settings.openai_api_key = "openai-key" + mock_settings.embedding_model = "custom-embedding-model" + + RAGService() + + call_kwargs = mock_embedder_class.call_args.kwargs + assert call_kwargs["model_name"] == "custom-embedding-model" + + +@pytest.mark.unit +class TestRAGServiceQuery: + """Tests for query method.""" + + @pytest.fixture + async def service(self): + """Create RAGService with mocked dependencies.""" + from agentic_rag.services.rag_service import RAGService + + with ( + patch("agentic_rag.services.rag_service.settings") as mock_settings, + patch("agentic_rag.services.rag_service.OpenAIEmbedder") as mock_embedder_class, + patch("agentic_rag.services.rag_service.get_vector_store") as mock_get_vs, + patch("agentic_rag.services.rag_service.get_llm_client") as mock_get_llm, + ): + mock_settings.embedding_api_key = "key" + mock_settings.openai_api_key = "openai-key" + mock_settings.embedding_model = "text-embedding-3-small" + mock_settings.default_llm_provider = "openai" + + mock_embedder = Mock() + mock_embedder.aembed = AsyncMock(return_value=[0.1] * 1536) + mock_embedder_class.return_value = mock_embedder + + mock_vector_store = Mock() + mock_vector_store.search = AsyncMock( + return_value=[ + {"id": "1", "text": "Chunk 1", "score": 0.95}, + {"id": "2", "text": "Chunk 2", "score": 0.85}, + ] + ) + mock_get_vs.return_value = mock_vector_store + + mock_llm_response = Mock() + mock_llm_response.text = "Test answer" + mock_llm_response.model = "gpt-4o-mini" + mock_llm_client = Mock() + mock_llm_client.invoke = AsyncMock(return_value=mock_llm_response) + mock_get_llm.return_value = mock_llm_client + + service = RAGService() + service.embedder = mock_embedder + + yield service, mock_get_vs, mock_get_llm, mock_vector_store, mock_llm_client + + @pytest.mark.asyncio + async def test_query_returns_dict(self, service): + """Test query returns dictionary.""" + service_instance, _, _, _, _ = service + + result = await service_instance.query("What is AI?") + + assert isinstance(result, dict) + + @pytest.mark.asyncio + async def test_query_contains_question(self, service): + """Test query result contains original question.""" + service_instance, _, _, _, _ = service + + result = await service_instance.query("What is AI?") + + assert result["question"] == "What is AI?" + + @pytest.mark.asyncio + async def test_query_contains_answer(self, service): + """Test query result contains answer.""" + service_instance, _, _, _, _ = service + + result = await service_instance.query("What is AI?") + + assert "answer" in result + assert result["answer"] == "Test answer" + + @pytest.mark.asyncio + async def test_query_contains_sources(self, service): + """Test query result contains sources.""" + service_instance, _, _, _, _ = service + + result = await service_instance.query("What is AI?") + + assert "sources" in result + assert len(result["sources"]) == 2 + + @pytest.mark.asyncio + async def test_query_contains_provider(self, service): + """Test query result contains provider.""" + service_instance, _, _, _, _ = service + + result = await service_instance.query("What is AI?") + + assert "provider" in result + + @pytest.mark.asyncio + async def test_query_contains_model(self, service): + """Test query result contains model.""" + service_instance, _, _, _, _ = service + + result = await service_instance.query("What is AI?") + + assert "model" in result + assert result["model"] == "gpt-4o-mini" + + @pytest.mark.asyncio + async def test_query_uses_default_provider(self, service): + """Test query uses default provider when not specified.""" + service_instance, _, mock_get_llm, _, _ = service + + await service_instance.query("What is AI?") + + call_kwargs = mock_get_llm.call_args.kwargs + assert call_kwargs["provider"] is None + + @pytest.mark.asyncio + async def test_query_uses_specified_provider(self, service): + """Test query uses specified provider.""" + service_instance, _, mock_get_llm, _, _ = service + + await service_instance.query("What is AI?", provider="anthropic") + + call_kwargs = mock_get_llm.call_args.kwargs + assert call_kwargs["provider"] == "anthropic" + + @pytest.mark.asyncio + async def test_query_uses_default_k(self, service): + """Test query uses default k=5.""" + service_instance, mock_get_vs, _, mock_vector_store, _ = service + + await service_instance.query("What is AI?") + + call_kwargs = mock_vector_store.search.call_args.kwargs + assert call_kwargs["k"] == 5 + + @pytest.mark.asyncio + async def test_query_uses_custom_k(self, service): + """Test query uses custom k value.""" + service_instance, mock_get_vs, _, mock_vector_store, _ = service + + await service_instance.query("What is AI?", k=10) + + call_kwargs = mock_vector_store.search.call_args.kwargs + assert call_kwargs["k"] == 10 + + @pytest.mark.asyncio + async def test_query_calls_get_vector_store(self, service): + """Test query calls get_vector_store.""" + service_instance, mock_get_vs, _, _, _ = service + + await service_instance.query("What is AI?") + + mock_get_vs.assert_called_once() + + @pytest.mark.asyncio + async def test_query_calls_get_llm_client(self, service): + """Test query calls get_llm_client.""" + service_instance, _, mock_get_llm, _, _ = service + + await service_instance.query("What is AI?") + + mock_get_llm.assert_called_once() + + @pytest.mark.asyncio + async def test_query_calls_vector_store_search(self, service): + """Test query calls vector_store.search.""" + service_instance, _, _, mock_vector_store, _ = service + + await service_instance.query("What is AI?") + + mock_vector_store.search.assert_called_once() + + @pytest.mark.asyncio + async def test_query_calls_llm_invoke(self, service): + """Test query calls llm_client.invoke.""" + service_instance, _, _, _, mock_llm_client = service + + await service_instance.query("What is AI?") + + mock_llm_client.invoke.assert_called_once() + + +@pytest.mark.unit +class TestRAGServiceGetEmbedding: + """Tests for _get_embedding method.""" + + @pytest.fixture + def service(self): + """Create RAGService with mocked embedder.""" + from agentic_rag.services.rag_service import RAGService + + with ( + patch("agentic_rag.services.rag_service.settings") as mock_settings, + patch("agentic_rag.services.rag_service.OpenAIEmbedder") as mock_embedder_class, + ): + mock_settings.embedding_api_key = "key" + mock_settings.openai_api_key = "openai-key" + mock_settings.embedding_model = "text-embedding-3-small" + + mock_embedder = Mock() + mock_embedder.aembed = AsyncMock(return_value=[0.1, 0.2, 0.3]) + mock_embedder_class.return_value = mock_embedder + + service = RAGService() + service.embedder = mock_embedder + yield service + + @pytest.mark.asyncio + async def test_get_embedding_returns_list(self, service): + """Test _get_embedding returns list.""" + result = await service._get_embedding("Test text") + + assert isinstance(result, list) + + @pytest.mark.asyncio + async def test_get_embedding_calls_embedder(self, service): + """Test _get_embedding calls embedder.aembed.""" + await service._get_embedding("Test text") + + service.embedder.aembed.assert_called_once_with("Test text") + + @pytest.mark.asyncio + async def test_get_embedding_returns_embedding_values(self, service): + """Test _get_embedding returns embedding values.""" + result = await service._get_embedding("Test text") + + assert result == [0.1, 0.2, 0.3] + + @pytest.mark.asyncio + async def test_get_embedding_empty_text(self, service): + """Test _get_embedding with empty text.""" + service.embedder.aembed.return_value = [] + + result = await service._get_embedding("") + + assert result == [] + + +@pytest.mark.unit +class TestRAGServiceFormatContext: + """Tests for _format_context method.""" + + @pytest.fixture + def service(self): + """Create RAGService.""" + from agentic_rag.services.rag_service import RAGService + + with ( + patch("agentic_rag.services.rag_service.settings") as mock_settings, + patch("agentic_rag.services.rag_service.OpenAIEmbedder"), + ): + mock_settings.embedding_api_key = "key" + mock_settings.embedding_model = "model" + + service = RAGService() + yield service + + def test_format_context_empty_list(self, service): + """Test _format_context with empty list returns empty string.""" + result = service._format_context([]) + + assert result == "" + + def test_format_context_single_chunk(self, service): + """Test _format_context with single chunk.""" + chunks = [{"text": "This is a test chunk."}] + + result = service._format_context(chunks) + + assert result == "[1] This is a test chunk." + + def test_format_context_multiple_chunks(self, service): + """Test _format_context with multiple chunks.""" + chunks = [ + {"text": "First chunk."}, + {"text": "Second chunk."}, + {"text": "Third chunk."}, + ] + + result = service._format_context(chunks) + + assert "[1] First chunk." in result + assert "[2] Second chunk." in result + assert "[3] Third chunk." in result + assert "\n\n" in result + + def test_format_context_skips_empty_text(self, service): + """Test _format_context skips chunks with empty text.""" + chunks = [ + {"text": "First chunk."}, + {"text": ""}, + {"text": "Third chunk."}, + ] + + result = service._format_context(chunks) + + # The implementation skips empty text chunks but keeps original indices + assert result.count("[") == 2 + # Chunks 1 and 3 are included with their original indices + assert "[1] First chunk." in result + assert "[3] Third chunk." in result + + def test_format_context_missing_text_key(self, service): + """Test _format_context handles chunks without text key.""" + chunks = [ + {"text": "Valid chunk."}, + {"id": "no-text"}, + {"text": "Another valid chunk."}, + ] + + result = service._format_context(chunks) + + # Chunks without 'text' key are skipped (get returns None/empty string, which is falsy) + # But original indices are preserved + assert result.count("[") == 2 + # Chunks 1 and 3 are included with their original indices + assert "[1] Valid chunk." in result + assert "[3] Another valid chunk." in result + + def test_format_context_large_number_of_chunks(self, service): + """Test _format_context with many chunks.""" + chunks = [{"text": f"Chunk {i}"} for i in range(100)] + + result = service._format_context(chunks) + + assert result.count("[") == 100 + assert "[100] Chunk 99" in result + + +@pytest.mark.unit +class TestRAGServiceBuildPrompt: + """Tests for _build_prompt method.""" + + @pytest.fixture + def service(self): + """Create RAGService.""" + from agentic_rag.services.rag_service import RAGService + + with ( + patch("agentic_rag.services.rag_service.settings") as mock_settings, + patch("agentic_rag.services.rag_service.OpenAIEmbedder"), + ): + mock_settings.embedding_api_key = "key" + mock_settings.embedding_model = "model" + + service = RAGService() + yield service + + def test_build_prompt_contains_context(self, service): + """Test _build_prompt includes context.""" + context = "[1] Context line 1." + question = "What is this?" + + result = service._build_prompt(context, question) + + assert context in result + + def test_build_prompt_contains_question(self, service): + """Test _build_prompt includes question.""" + context = "[1] Context line 1." + question = "What is this?" + + result = service._build_prompt(context, question) + + assert f"Question: {question}" in result + + def test_build_prompt_contains_instructions(self, service): + """Test _build_prompt includes instructions.""" + context = "[1] Context line 1." + question = "What is this?" + + result = service._build_prompt(context, question) + + assert "Instructions:" in result + assert "Answer based only on the provided context" in result + assert "Cite sources using [1], [2], etc." in result + + def test_build_prompt_contains_answer_marker(self, service): + """Test _build_prompt ends with Answer marker.""" + context = "[1] Context line 1." + question = "What is this?" + + result = service._build_prompt(context, question) + + assert result.strip().endswith("Answer:") + + def test_build_prompt_empty_context(self, service): + """Test _build_prompt with empty context.""" + context = "" + question = "What is this?" + + result = service._build_prompt(context, question) + + assert "Context:" in result + assert f"Question: {question}" in result + + def test_build_prompt_empty_question(self, service): + """Test _build_prompt with empty question.""" + context = "[1] Context line 1." + question = "" + + result = service._build_prompt(context, question) + + assert "Question:" in result + assert "Answer:" in result + + def test_build_prompt_format(self, service): + """Test _build_prompt overall format.""" + context = "[1] Context line 1.\n\n[2] Context line 2." + question = "What is AI?" + + result = service._build_prompt(context, question) + + # Check structure + assert result.startswith("You are a helpful AI assistant.") + assert "Context:" in result + assert context in result + assert f"Question: {question}" in result + assert "Instructions:" in result + assert "Answer based only on the provided context" in result + assert "If the context doesn't contain the answer" in result + assert "Be concise but complete" in result + assert "Cite sources using [1], [2], etc." in result + assert result.endswith("Answer:") + + +@pytest.mark.unit +class TestGetRAGService: + """Tests for get_rag_service function.""" + + @patch("agentic_rag.services.rag_service.RAGService") + async def test_get_rag_service_creates_new_instance(self, mock_service_class): + """Test get_rag_service creates new instance when _rag_service is None.""" + from agentic_rag.services.rag_service import get_rag_service + import agentic_rag.services.rag_service as rag_module + + mock_instance = Mock() + mock_service_class.return_value = mock_instance + + # Reset singleton + rag_module._rag_service = None + + result = await get_rag_service() + + mock_service_class.assert_called_once() + assert result is mock_instance + + @patch("agentic_rag.services.rag_service.RAGService") + async def test_get_rag_service_returns_existing(self, mock_service_class): + """Test get_rag_service returns existing instance.""" + from agentic_rag.services.rag_service import get_rag_service + import agentic_rag.services.rag_service as rag_module + + existing = Mock() + rag_module._rag_service = existing + + result = await get_rag_service() + + mock_service_class.assert_not_called() + assert result is existing + + @patch("agentic_rag.services.rag_service.RAGService") + async def test_get_rag_service_singleton(self, mock_service_class): + """Test get_rag_service returns same instance (singleton).""" + from agentic_rag.services.rag_service import get_rag_service + import agentic_rag.services.rag_service as rag_module + + rag_module._rag_service = None + mock_instance = Mock() + mock_service_class.return_value = mock_instance + + result1 = await get_rag_service() + result2 = await get_rag_service() + + assert result1 is result2 + mock_service_class.assert_called_once() + + +@pytest.mark.unit +class TestRAGServiceEdgeCases: + """Tests for RAGService edge cases.""" + + @pytest.fixture + async def service(self): + """Create RAGService with mocked dependencies.""" + from agentic_rag.services.rag_service import RAGService + + with ( + patch("agentic_rag.services.rag_service.settings") as mock_settings, + patch("agentic_rag.services.rag_service.OpenAIEmbedder") as mock_embedder_class, + patch("agentic_rag.services.rag_service.get_vector_store") as mock_get_vs, + patch("agentic_rag.services.rag_service.get_llm_client") as mock_get_llm, + ): + mock_settings.embedding_api_key = "key" + mock_settings.openai_api_key = "openai-key" + mock_settings.embedding_model = "text-embedding-3-small" + mock_settings.default_llm_provider = "openai" + + mock_embedder = Mock() + mock_embedder.aembed = AsyncMock(return_value=[0.1] * 1536) + mock_embedder_class.return_value = mock_embedder + + mock_vector_store = Mock() + mock_vector_store.search = AsyncMock(return_value=[]) + mock_get_vs.return_value = mock_vector_store + + mock_llm_response = Mock() + mock_llm_response.text = "No information available." + mock_llm_response.model = "gpt-4o-mini" + mock_llm_client = Mock() + mock_llm_client.invoke = AsyncMock(return_value=mock_llm_response) + mock_get_llm.return_value = mock_llm_client + + service = RAGService() + service.embedder = mock_embedder + + yield service + + @pytest.mark.asyncio + async def test_query_empty_sources(self, service): + """Test query with empty sources.""" + service_instance = service + + result = await service_instance.query("What is AI?") + + assert result["sources"] == [] + assert result["answer"] == "No information available." + + @pytest.mark.asyncio + async def test_query_none_model_in_response(self, service): + """Test query when response has no model attribute.""" + service_instance = service + + # Get the mock LLM client and change its response + with patch("agentic_rag.services.rag_service.get_llm_client") as mock_get_llm: + mock_response = Mock() + mock_response.text = "Answer" + # No model attribute + del mock_response.model + mock_response.model = "unknown" + + mock_llm = Mock() + mock_llm.invoke = AsyncMock(return_value=mock_response) + mock_get_llm.return_value = mock_llm + + result = await service_instance.query("What is AI?") + + # Should handle gracefully + assert "model" in result + + @pytest.mark.asyncio + async def test_query_long_question(self, service): + """Test query with very long question.""" + service_instance = service + + long_question = "What is " + "AI " * 1000 + "?" + + result = await service_instance.query(long_question) + + assert result["question"] == long_question + + @pytest.mark.asyncio + async def test_query_unicode_question(self, service): + """Test query with unicode question.""" + service_instance = service + + unicode_question = "What is AI? 人工智能は何ですか?" + + result = await service_instance.query(unicode_question) + + assert result["question"] == unicode_question diff --git a/tests/unit/test_agentic_rag/test_services/test_vector_store.py b/tests/unit/test_agentic_rag/test_services/test_vector_store.py new file mode 100644 index 0000000..1734529 --- /dev/null +++ b/tests/unit/test_agentic_rag/test_services/test_vector_store.py @@ -0,0 +1,393 @@ +"""Tests for VectorStoreService.""" + +import pytest +from unittest.mock import Mock, patch, AsyncMock, MagicMock + + +@pytest.mark.unit +class TestVectorStoreServiceInit: + """Tests for VectorStoreService initialization.""" + + @patch("agentic_rag.services.vector_store.QdrantVectorstore") + @patch("agentic_rag.services.vector_store.settings") + def test_init_creates_qdrant_client(self, mock_settings, mock_qdrant_class): + """Test __init__ creates QdrantVectorstore client.""" + from agentic_rag.services.vector_store import VectorStoreService + + mock_settings.qdrant_host = "test-host" + mock_settings.qdrant_port = 6333 + + service = VectorStoreService() + + mock_qdrant_class.assert_called_once() + call_kwargs = mock_qdrant_class.call_args.kwargs + assert call_kwargs["host"] == "test-host" + assert call_kwargs["port"] == 6333 + assert service.client is not None + + @patch("agentic_rag.services.vector_store.QdrantVectorstore") + @patch("agentic_rag.services.vector_store.settings") + def test_init_uses_settings_host(self, mock_settings, mock_qdrant_class): + """Test __init__ uses host from settings.""" + from agentic_rag.services.vector_store import VectorStoreService + + mock_settings.qdrant_host = "custom-host" + mock_settings.qdrant_port = 6333 + + VectorStoreService() + + call_kwargs = mock_qdrant_class.call_args.kwargs + assert call_kwargs["host"] == "custom-host" + + @patch("agentic_rag.services.vector_store.QdrantVectorstore") + @patch("agentic_rag.services.vector_store.settings") + def test_init_uses_settings_port(self, mock_settings, mock_qdrant_class): + """Test __init__ uses port from settings.""" + from agentic_rag.services.vector_store import VectorStoreService + + mock_settings.qdrant_host = "localhost" + mock_settings.qdrant_port = 9999 + + VectorStoreService() + + call_kwargs = mock_qdrant_class.call_args.kwargs + assert call_kwargs["port"] == 9999 + + +@pytest.mark.unit +class TestVectorStoreServiceCreateCollection: + """Tests for create_collection method.""" + + @pytest.fixture + def service(self): + """Create VectorStoreService with mocked client.""" + from agentic_rag.services.vector_store import VectorStoreService + + with ( + patch("agentic_rag.services.vector_store.settings") as mock_settings, + patch("agentic_rag.services.vector_store.QdrantVectorstore") as mock_qdrant_class, + ): + mock_settings.qdrant_host = "localhost" + mock_settings.qdrant_port = 6333 + + mock_client = Mock() + mock_qdrant_class.return_value = mock_client + + service = VectorStoreService() + service.client = mock_client + yield service + + @pytest.mark.asyncio + async def test_create_collection_success(self, service): + """Test create_collection returns True on success.""" + service.client.create_collection.return_value = None + + result = await service.create_collection("test-collection") + + assert result is True + + @pytest.mark.asyncio + async def test_create_collection_calls_client(self, service): + """Test create_collection calls client.create_collection.""" + await service.create_collection("test-collection") + + service.client.create_collection.assert_called_once() + + @pytest.mark.asyncio + async def test_create_collection_passes_name(self, service): + """Test create_collection passes collection name to client.""" + await service.create_collection("my-collection") + + call_args = service.client.create_collection.call_args + assert call_args[0][0] == "my-collection" + + @pytest.mark.asyncio + async def test_create_collection_passes_vector_config(self, service): + """Test create_collection passes vector config to client.""" + await service.create_collection("test-collection") + + call_args = service.client.create_collection.call_args + # Check that vector_config is passed (can be positional or keyword) + if len(call_args[0]) >= 2: + vector_config = call_args[0][1] + else: + vector_config = call_args.kwargs.get("vector_config") + + assert vector_config is not None + assert vector_config == [{"name": "embedding", "dimensions": 1536}] + + @pytest.mark.asyncio + async def test_create_collection_failure(self, service): + """Test create_collection returns False on exception.""" + service.client.create_collection.side_effect = Exception("Already exists") + + result = await service.create_collection("existing-collection") + + assert result is False + + @pytest.mark.asyncio + async def test_create_collection_handles_connection_error(self, service): + """Test create_collection handles connection errors.""" + service.client.create_collection.side_effect = ConnectionError("Connection refused") + + result = await service.create_collection("test-collection") + + assert result is False + + @pytest.mark.asyncio + async def test_create_collection_empty_name(self, service): + """Test create_collection with empty name.""" + service.client.create_collection.return_value = None + + result = await service.create_collection("") + + assert result is True + + @pytest.mark.asyncio + async def test_create_collection_special_chars_name(self, service): + """Test create_collection with special characters in name.""" + service.client.create_collection.return_value = None + + result = await service.create_collection("collection-123_test.v1") + + assert result is True + + +@pytest.mark.unit +class TestVectorStoreServiceSearch: + """Tests for search method.""" + + @pytest.fixture + def service(self): + """Create VectorStoreService with mocked client.""" + from agentic_rag.services.vector_store import VectorStoreService + + with ( + patch("agentic_rag.services.vector_store.settings") as mock_settings, + patch("agentic_rag.services.vector_store.QdrantVectorstore") as mock_qdrant_class, + ): + mock_settings.qdrant_host = "localhost" + mock_settings.qdrant_port = 6333 + + mock_client = Mock() + mock_qdrant_class.return_value = mock_client + + service = VectorStoreService() + service.client = mock_client + yield service + + @pytest.mark.asyncio + async def test_search_returns_results(self, service): + """Test search returns results from client.""" + expected_results = [ + {"id": "1", "text": "Chunk 1", "score": 0.95}, + {"id": "2", "text": "Chunk 2", "score": 0.85}, + ] + service.client.search.return_value = expected_results + + query_vector = [0.1] * 1536 + result = await service.search(query_vector) + + assert result == expected_results + + @pytest.mark.asyncio + async def test_search_calls_client_search(self, service): + """Test search calls client.search method.""" + query_vector = [0.1] * 1536 + await service.search(query_vector) + + service.client.search.assert_called_once() + + @pytest.mark.asyncio + async def test_search_passes_query_vector(self, service): + """Test search passes query_vector to client.""" + query_vector = [0.1, 0.2, 0.3] + await service.search(query_vector) + + call_kwargs = service.client.search.call_args.kwargs + assert call_kwargs["query_vector"] == query_vector + + @pytest.mark.asyncio + async def test_search_passes_collection_name(self, service): + """Test search passes collection name 'documents' to client.""" + query_vector = [0.1] * 1536 + await service.search(query_vector) + + call_kwargs = service.client.search.call_args.kwargs + assert call_kwargs["collection_name"] == "documents" + + @pytest.mark.asyncio + async def test_search_default_k_value(self, service): + """Test search uses default k=5.""" + query_vector = [0.1] * 1536 + await service.search(query_vector) + + call_kwargs = service.client.search.call_args.kwargs + assert call_kwargs["k"] == 5 + + @pytest.mark.asyncio + async def test_search_custom_k_value(self, service): + """Test search accepts custom k value.""" + query_vector = [0.1] * 1536 + await service.search(query_vector, k=10) + + call_kwargs = service.client.search.call_args.kwargs + assert call_kwargs["k"] == 10 + + @pytest.mark.asyncio + async def test_search_empty_results(self, service): + """Test search handles empty results.""" + service.client.search.return_value = [] + + query_vector = [0.1] * 1536 + result = await service.search(query_vector) + + assert result == [] + + @pytest.mark.asyncio + async def test_search_k_zero(self, service): + """Test search with k=0.""" + service.client.search.return_value = [] + + query_vector = [0.1] * 1536 + result = await service.search(query_vector, k=0) + + assert result == [] + + @pytest.mark.asyncio + async def test_search_empty_vector(self, service): + """Test search with empty vector.""" + service.client.search.return_value = [] + + result = await service.search([]) + + assert result == [] + + @pytest.mark.asyncio + async def test_search_large_k_value(self, service): + """Test search with large k value.""" + service.client.search.return_value = [{"id": "1", "text": "Result"}] + + query_vector = [0.1] * 1536 + result = await service.search(query_vector, k=1000) + + call_kwargs = service.client.search.call_args.kwargs + assert call_kwargs["k"] == 1000 + + +@pytest.mark.unit +class TestGetVectorStore: + """Tests for get_vector_store function.""" + + @patch("agentic_rag.services.vector_store.VectorStoreService") + async def test_get_vector_store_creates_new_instance(self, mock_service_class): + """Test get_vector_store creates new instance when _vector_store is None.""" + from agentic_rag.services.vector_store import get_vector_store + import agentic_rag.services.vector_store as vs_module + + mock_instance = Mock() + mock_service_class.return_value = mock_instance + + # Reset singleton + vs_module._vector_store = None + + result = await get_vector_store() + + mock_service_class.assert_called_once() + assert result is mock_instance + + @patch("agentic_rag.services.vector_store.VectorStoreService") + async def test_get_vector_store_returns_existing(self, mock_service_class): + """Test get_vector_store returns existing instance.""" + from agentic_rag.services.vector_store import get_vector_store + import agentic_rag.services.vector_store as vs_module + + existing = Mock() + vs_module._vector_store = existing + + result = await get_vector_store() + + mock_service_class.assert_not_called() + assert result is existing + + @patch("agentic_rag.services.vector_store.VectorStoreService") + async def test_get_vector_store_singleton(self, mock_service_class): + """Test get_vector_store returns same instance (singleton).""" + from agentic_rag.services.vector_store import get_vector_store + import agentic_rag.services.vector_store as vs_module + + vs_module._vector_store = None + mock_instance = Mock() + mock_service_class.return_value = mock_instance + + result1 = await get_vector_store() + result2 = await get_vector_store() + + assert result1 is result2 + mock_service_class.assert_called_once() + + @patch("agentic_rag.services.vector_store.VectorStoreService") + async def test_get_vector_store_returns_vector_store_service(self, mock_service_class): + """Test get_vector_store returns VectorStoreService instance.""" + from agentic_rag.services.vector_store import get_vector_store + import agentic_rag.services.vector_store as vs_module + + vs_module._vector_store = None + mock_instance = Mock() + mock_service_class.return_value = mock_instance + + result = await get_vector_store() + + assert result is mock_instance + + +@pytest.mark.unit +class TestVectorStoreServiceEdgeCases: + """Tests for VectorStoreService edge cases.""" + + @pytest.fixture + def service(self): + """Create VectorStoreService with mocked client.""" + from agentic_rag.services.vector_store import VectorStoreService + + with ( + patch("agentic_rag.services.vector_store.settings") as mock_settings, + patch("agentic_rag.services.vector_store.QdrantVectorstore") as mock_qdrant_class, + ): + mock_settings.qdrant_host = "localhost" + mock_settings.qdrant_port = 6333 + + mock_client = Mock() + mock_qdrant_class.return_value = mock_client + + service = VectorStoreService() + service.client = mock_client + yield service + + @pytest.mark.asyncio + async def test_search_negative_k(self, service): + """Test search with negative k value.""" + query_vector = [0.1] * 1536 + + # Should still work, validation is up to the client + await service.search(query_vector, k=-1) + + call_kwargs = service.client.search.call_args.kwargs + assert call_kwargs["k"] == -1 + + @pytest.mark.asyncio + async def test_create_collection_none_name(self, service): + """Test create_collection with None name.""" + service.client.create_collection.side_effect = TypeError() + + result = await service.create_collection(None) + + assert result is False + + @pytest.mark.asyncio + async def test_search_none_vector(self, service): + """Test search with None vector.""" + service.client.search.side_effect = TypeError() + + with pytest.raises(TypeError): + await service.search(None)