docs: add comprehensive README and project scaffolding

- README completo con istruzioni di installazione, configurazione e utilizzo
- API Swagger/OpenAPI documentata
- File env.example con variabili di configurazione
- Dockerfile multi-stage ottimizzato
- Docker Compose con Ollama e LLM Monitor
- Struttura completa dell'app FastAPI (main.py, config, api routes)
- Servizio client Ollama reusabile
- Dashboard web HTML con TailwindCSS
- Test suite con pytest
- Makefile per comandi comuni
- CONTRIBUTING.md per i contributori
- LICENSE MIT
- .editorconfig e .dockerignore
- requirements.txt e requirements-dev.txt
This commit is contained in:
Luca Sacchi Ricciardi
2026-04-24 19:11:58 +02:00
commit 4b782ffdc8
28 changed files with 2087 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
"""
LLM Monitor - Package principale
"""
__version__ = "1.0.0"
+3
View File
@@ -0,0 +1,3 @@
"""
API routes
"""
+70
View File
@@ -0,0 +1,70 @@
"""
Health check endpoints
"""
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from datetime import datetime
import requests
import logging
from app.config import settings
logger = logging.getLogger(__name__)
router = APIRouter()
class HealthResponse(BaseModel):
status: str
ollama_status: str
timestamp: datetime
class Config:
json_schema_extra = {
"example": {
"status": "healthy",
"ollama_status": "online",
"timestamp": "2024-01-15T10:30:00Z"
}
}
@router.get("/health", response_model=HealthResponse)
async def health_check():
"""
Health check dell'API e dello stato di Ollama
Returns:
HealthResponse: Status dell'API e di Ollama
"""
try:
# Check Ollama
response = requests.get(
f"{settings.OLLAMA_HOST}/api/tags",
timeout=settings.OLLAMA_TIMEOUT
)
ollama_status = "online" if response.status_code == 200 else "offline"
except Exception as e:
logger.warning(f"Ollama health check failed: {e}")
ollama_status = "offline"
return HealthResponse(
status="healthy",
ollama_status=ollama_status,
timestamp=datetime.utcnow()
)
@router.get("/ready")
async def ready():
"""
Readiness probe per Kubernetes/Docker
"""
try:
response = requests.get(
f"{settings.OLLAMA_HOST}/api/tags",
timeout=5
)
if response.status_code == 200:
return {"status": "ready"}
else:
raise HTTPException(status_code=503, detail="Service unavailable")
except Exception as e:
logger.error(f"Readiness check failed: {e}")
raise HTTPException(status_code=503, detail="Service unavailable")
+232
View File
@@ -0,0 +1,232 @@
"""
Models endpoints - Gestione dei modelli Ollama
"""
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from typing import List, Optional
from datetime import datetime
import requests
import logging
from app.config import settings
logger = logging.getLogger(__name__)
router = APIRouter()
class ModelInfo(BaseModel):
"""Informazioni su un modello"""
name: str
digest: str
size: int
modified_at: datetime
class Config:
json_schema_extra = {
"example": {
"name": "llama2",
"digest": "abc123def456...",
"size": 3825922048,
"modified_at": "2024-01-15T10:30:00Z"
}
}
class ModelsResponse(BaseModel):
"""Risposta con lista di modelli"""
models: List[ModelInfo]
total: int
class Config:
json_schema_extra = {
"example": {
"models": [
{
"name": "llama2",
"digest": "abc123def456...",
"size": 3825922048,
"modified_at": "2024-01-15T10:30:00Z"
}
],
"total": 1
}
}
@router.get("/models", response_model=ModelsResponse)
async def get_models():
"""
Recupera l'elenco di tutti i modelli caricati in Ollama
Returns:
ModelsResponse: Lista dei modelli disponibili
Raises:
HTTPException: Se Ollama non è disponibile
"""
try:
response = requests.get(
f"{settings.OLLAMA_HOST}/api/tags",
timeout=settings.OLLAMA_TIMEOUT
)
if response.status_code != 200:
raise HTTPException(
status_code=502,
detail="Ollama non disponibile"
)
data = response.json()
models_data = data.get("models", [])
models = [
ModelInfo(
name=model.get("name", "unknown"),
digest=model.get("digest", ""),
size=model.get("size", 0),
modified_at=datetime.fromisoformat(
model.get("modified_at", "").replace("Z", "+00:00")
) if model.get("modified_at") else datetime.utcnow()
)
for model in models_data
]
return ModelsResponse(
models=models,
total=len(models)
)
except requests.exceptions.Timeout:
raise HTTPException(
status_code=504,
detail="Timeout: Ollama non ha risposto in tempo"
)
except requests.exceptions.ConnectionError:
raise HTTPException(
status_code=502,
detail="Impossible connettersi a Ollama"
)
except Exception as e:
logger.error(f"Error fetching models: {e}")
raise HTTPException(
status_code=500,
detail="Errore nel recupero dei modelli"
)
@router.get("/models/{model_name}", response_model=ModelInfo)
async def get_model(model_name: str):
"""
Recupera le informazioni di un modello specifico
Args:
model_name: Nome del modello da cercare
Returns:
ModelInfo: Informazioni del modello
Raises:
HTTPException: Se il modello non esiste o Ollama non è disponibile
"""
try:
response = requests.get(
f"{settings.OLLAMA_HOST}/api/tags",
timeout=settings.OLLAMA_TIMEOUT
)
if response.status_code != 200:
raise HTTPException(
status_code=502,
detail="Ollama non disponibile"
)
data = response.json()
models_data = data.get("models", [])
# Cercare il modello
for model in models_data:
if model.get("name") == model_name:
return ModelInfo(
name=model.get("name", "unknown"),
digest=model.get("digest", ""),
size=model.get("size", 0),
modified_at=datetime.fromisoformat(
model.get("modified_at", "").replace("Z", "+00:00")
) if model.get("modified_at") else datetime.utcnow()
)
raise HTTPException(
status_code=404,
detail=f"Modello '{model_name}' non trovato"
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error fetching model: {e}")
raise HTTPException(
status_code=500,
detail="Errore nel recupero del modello"
)
@router.post("/models/{model_name}/pull")
async def pull_model(model_name: str):
"""
Scarica/carica un modello in Ollama
Args:
model_name: Nome del modello da scaricare
Returns:
dict: Status del download
"""
try:
response = requests.post(
f"{settings.OLLAMA_HOST}/api/pull",
json={"name": model_name},
timeout=None # Pull può essere lungo
)
if response.status_code not in [200, 201]:
raise HTTPException(
status_code=502,
detail="Errore nel pull del modello"
)
return {"status": "pulling", "model": model_name}
except Exception as e:
logger.error(f"Error pulling model: {e}")
raise HTTPException(
status_code=500,
detail="Errore nel pull del modello"
)
@router.delete("/models/{model_name}")
async def delete_model(model_name: str):
"""
Elimina un modello da Ollama
Args:
model_name: Nome del modello da eliminare
Returns:
dict: Confirmazione eliminazione
"""
try:
response = requests.delete(
f"{settings.OLLAMA_HOST}/api/delete",
json={"name": model_name},
timeout=settings.OLLAMA_TIMEOUT
)
if response.status_code not in [200, 204]:
raise HTTPException(
status_code=502,
detail="Errore nell'eliminazione del modello"
)
return {"status": "deleted", "model": model_name}
except Exception as e:
logger.error(f"Error deleting model: {e}")
raise HTTPException(
status_code=500,
detail="Errore nell'eliminazione del modello"
)
+34
View File
@@ -0,0 +1,34 @@
"""
Configurazione dell'applicazione tramite variabili di ambiente
"""
from pydantic_settings import BaseSettings
from typing import List
class Settings(BaseSettings):
"""Configurazione dell'applicazione"""
# Ollama
OLLAMA_HOST: str = "http://localhost:11434"
OLLAMA_TIMEOUT: int = 30
# API
API_HOST: str = "0.0.0.0"
API_PORT: int = 8000
API_WORKERS: int = 4
# CORS
CORS_ORIGINS: str = "http://localhost:3000,http://localhost:5173,http://localhost:8000"
# Logging
LOG_LEVEL: str = "INFO"
# Environment
ENVIRONMENT: str = "development"
class Config:
env_file = ".env"
env_file_encoding = "utf-8"
# Istanza globale della configurazione
settings = Settings()
+3
View File
@@ -0,0 +1,3 @@
"""
Services - Business logic
"""
+116
View File
@@ -0,0 +1,116 @@
"""
Ollama client service
"""
import requests
import logging
from typing import List, Dict, Optional
from app.config import settings
logger = logging.getLogger(__name__)
class OllamaClient:
"""Client per interagire con l'API Ollama"""
def __init__(self, host: str = None, timeout: int = None):
self.host = host or settings.OLLAMA_HOST
self.timeout = timeout or settings.OLLAMA_TIMEOUT
def get_models(self) -> List[Dict]:
"""
Recupera l'elenco dei modelli da Ollama
Returns:
List[Dict]: Lista dei modelli
"""
try:
response = requests.get(
f"{self.host}/api/tags",
timeout=self.timeout
)
response.raise_for_status()
return response.json().get("models", [])
except Exception as e:
logger.error(f"Error getting models from Ollama: {e}")
return []
def get_model(self, model_name: str) -> Optional[Dict]:
"""
Recupera informazioni su un modello specifico
Args:
model_name: Nome del modello
Returns:
Dict: Informazioni del modello, o None se non trovato
"""
try:
models = self.get_models()
for model in models:
if model.get("name") == model_name:
return model
return None
except Exception as e:
logger.error(f"Error getting model {model_name}: {e}")
return None
def is_available(self) -> bool:
"""
Verifica se Ollama è disponibile
Returns:
bool: True se disponibile, False altrimenti
"""
try:
response = requests.get(
f"{self.host}/api/tags",
timeout=5
)
return response.status_code == 200
except Exception:
return False
def pull_model(self, model_name: str) -> bool:
"""
Scarica/carica un modello
Args:
model_name: Nome del modello
Returns:
bool: True se ha successo
"""
try:
response = requests.post(
f"{self.host}/api/pull",
json={"name": model_name},
timeout=None
)
return response.status_code in [200, 201]
except Exception as e:
logger.error(f"Error pulling model {model_name}: {e}")
return False
def delete_model(self, model_name: str) -> bool:
"""
Elimina un modello
Args:
model_name: Nome del modello
Returns:
bool: True se ha successo
"""
try:
response = requests.delete(
f"{self.host}/api/delete",
json={"name": model_name},
timeout=self.timeout
)
return response.status_code in [200, 204]
except Exception as e:
logger.error(f"Error deleting model {model_name}: {e}")
return False
# Istanza globale del client Ollama
ollama_client = OllamaClient()
+3
View File
@@ -0,0 +1,3 @@
"""
Web templates and static files
"""
+224
View File
@@ -0,0 +1,224 @@
<!DOCTYPE html>
<html lang="it">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM Monitor - Dashboard Ollama</title>
<script src="https://cdn.tailwindcss.com"></script>
<style>
@keyframes spin {
to { transform: rotate(360deg); }
}
.animate-spin {
animation: spin 1s linear infinite;
}
</style>
</head>
<body class="bg-gray-900 text-white">
<div class="min-h-screen flex flex-col">
<!-- Header -->
<header class="bg-gray-800 border-b border-gray-700 sticky top-0 z-50">
<div class="max-w-7xl mx-auto px-4 py-6">
<div class="flex items-center justify-between">
<div class="flex items-center gap-3">
<div class="w-10 h-10 bg-gradient-to-br from-purple-500 to-pink-500 rounded-lg flex items-center justify-center font-bold text-lg">
🦙
</div>
<h1 class="text-2xl font-bold">LLM Monitor</h1>
</div>
<div class="flex items-center gap-4">
<div id="health-status" class="flex items-center gap-2">
<div id="status-indicator" class="w-3 h-3 bg-gray-500 rounded-full"></div>
<span id="status-text" class="text-sm text-gray-400">Controllo...</span>
</div>
</div>
</div>
</div>
</header>
<!-- Main Content -->
<main class="flex-1">
<div class="max-w-7xl mx-auto px-4 py-8">
<!-- Stats Cards -->
<div class="grid grid-cols-1 md:grid-cols-3 gap-6 mb-8">
<div class="bg-gray-800 rounded-lg p-6 border border-gray-700">
<div class="text-gray-400 text-sm font-medium">Modelli Caricati</div>
<div id="models-count" class="text-4xl font-bold mt-2">-</div>
</div>
<div class="bg-gray-800 rounded-lg p-6 border border-gray-700">
<div class="text-gray-400 text-sm font-medium">Spazio Totale</div>
<div id="total-size" class="text-4xl font-bold mt-2">-</div>
</div>
<div class="bg-gray-800 rounded-lg p-6 border border-gray-700">
<div class="text-gray-400 text-sm font-medium">Status Ollama</div>
<div id="ollama-status" class="text-4xl font-bold mt-2">-</div>
</div>
</div>
<!-- Models Section -->
<div class="bg-gray-800 rounded-lg border border-gray-700 p-6">
<div class="flex items-center justify-between mb-6">
<h2 class="text-xl font-bold">Modelli Disponibili</h2>
<button onclick="loadModels()" class="bg-purple-600 hover:bg-purple-700 px-4 py-2 rounded-lg text-sm font-medium transition">
🔄 Aggiorna
</button>
</div>
<!-- Models List -->
<div id="models-container" class="space-y-4">
<div class="text-center py-8">
<div class="animate-spin inline-block w-8 h-8 border-4 border-gray-600 border-t-purple-500 rounded-full"></div>
<p class="text-gray-400 mt-4">Caricamento modelli...</p>
</div>
</div>
</div>
<!-- API Documentation Section -->
<div class="mt-8 bg-blue-900 bg-opacity-20 border border-blue-700 rounded-lg p-6">
<h3 class="text-lg font-bold mb-4">📚 Documentazione API</h3>
<p class="text-gray-300 mb-4">La API è documentata e testabile direttamente da:</p>
<div class="flex gap-3 flex-wrap">
<a href="/docs" target="_blank" class="inline-block bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded-lg text-sm font-medium transition">
Swagger UI
</a>
<a href="/redoc" target="_blank" class="inline-block bg-blue-600 hover:bg-blue-700 px-4 py-2 rounded-lg text-sm font-medium transition">
ReDoc
</a>
</div>
</div>
</div>
</main>
<!-- Footer -->
<footer class="bg-gray-800 border-t border-gray-700 mt-12">
<div class="max-w-7xl mx-auto px-4 py-6 text-center text-gray-400 text-sm">
<p>LLM Monitor v1.0.0 • Fatto con ❤️ da <a href="https://lucasacchi.net" target="_blank" class="text-purple-400 hover:text-purple-300">LucaSacchi.Net</a></p>
</div>
</footer>
</div>
<script>
const API_BASE = "/api/v1";
// Formattare bytes in formato leggibile
function formatBytes(bytes) {
if (bytes === 0) return "0 B";
const k = 1024;
const sizes = ["B", "KB", "MB", "GB"];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return (bytes / Math.pow(k, i)).toFixed(2) + " " + sizes[i];
}
// Formattare data
function formatDate(dateString) {
const date = new Date(dateString);
return date.toLocaleDateString("it-IT", {
year: "numeric",
month: "short",
day: "numeric",
hour: "2-digit",
minute: "2-digit"
});
}
// Verificare health
async function checkHealth() {
try {
const response = await fetch(`${API_BASE}/health`);
if (response.ok) {
const data = await response.json();
const statusEl = document.getElementById("status-indicator");
const statusText = document.getElementById("status-text");
const ollamaStatus = data.ollama_status;
if (ollamaStatus === "online") {
statusEl.className = "w-3 h-3 bg-green-500 rounded-full";
statusText.className = "text-sm text-green-400";
statusText.textContent = "Ollama Online";
document.getElementById("ollama-status").innerHTML = "🟢 Online";
} else {
statusEl.className = "w-3 h-3 bg-red-500 rounded-full";
statusText.className = "text-sm text-red-400";
statusText.textContent = "Ollama Offline";
document.getElementById("ollama-status").innerHTML = "🔴 Offline";
}
}
} catch (error) {
console.error("Health check error:", error);
document.getElementById("status-indicator").className = "w-3 h-3 bg-red-500 rounded-full";
document.getElementById("status-text").textContent = "Errore connessione";
}
}
// Caricare modelli
async function loadModels() {
try {
const response = await fetch(`${API_BASE}/models`);
if (!response.ok) throw new Error("Errore nel caricamento");
const data = await response.json();
const models = data.models || [];
// Aggiornare conteggio
document.getElementById("models-count").textContent = models.length;
// Calcolare spazio totale
const totalSize = models.reduce((sum, m) => sum + m.size, 0);
document.getElementById("total-size").textContent = formatBytes(totalSize);
// Renderizzare modelli
if (models.length === 0) {
document.getElementById("models-container").innerHTML = `
<div class="text-center py-8 text-gray-400">
<p>Nessun modello caricato</p>
</div>
`;
} else {
document.getElementById("models-container").innerHTML = models.map(model => `
<div class="bg-gray-700 rounded-lg p-4 border border-gray-600 hover:border-purple-500 transition">
<div class="flex items-start justify-between mb-3">
<h3 class="text-lg font-semibold">${model.name}</h3>
<span class="bg-purple-600 px-3 py-1 rounded text-xs font-medium">Caricato</span>
</div>
<div class="grid grid-cols-2 gap-4 text-sm">
<div>
<p class="text-gray-400">Dimensione</p>
<p class="font-semibold">${formatBytes(model.size)}</p>
</div>
<div>
<p class="text-gray-400">Ultimo aggiornamento</p>
<p class="font-semibold">${formatDate(model.modified_at)}</p>
</div>
</div>
<div class="mt-3">
<p class="text-gray-400 text-xs">Digest</p>
<p class="font-mono text-xs bg-gray-800 p-2 rounded mt-1 break-all">${model.digest.substring(0, 64)}...</p>
</div>
</div>
`).join("");
}
} catch (error) {
console.error("Error loading models:", error);
document.getElementById("models-container").innerHTML = `
<div class="text-center py-8 text-red-400">
<p>❌ Errore nel caricamento dei modelli</p>
<p class="text-sm mt-2">${error.message}</p>
</div>
`;
}
}
// Inizializzazione
document.addEventListener("DOMContentLoaded", () => {
checkHealth();
loadModels();
// Refresh ogni 30 secondi
setInterval(() => {
checkHealth();
loadModels();
}, 30000);
});
</script>
</body>
</html>