diff --git a/app/api/models.py b/app/api/models.py index 5991017..3830831 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -119,6 +119,54 @@ async def get_models(): detail="Errore nel recupero dei modelli" ) + +@router.get("/models/running") +async def get_running_models() -> Dict[str, Any]: + """ + Recupera i modelli attualmente residenti in memoria, equivalenti a `ollama ps`. + + Returns: + Dict[str, Any]: Payload con modelli running e conteggio + """ + try: + response = requests.get( + f"{settings.OLLAMA_HOST}/api/ps", + timeout=settings.OLLAMA_TIMEOUT + ) + + if response.status_code != 200: + raise HTTPException( + status_code=502, + detail="Ollama non disponibile" + ) + + data = response.json() + models_data = data.get("models", []) + + return { + "models": models_data, + "total": len(models_data) + } + + except requests.exceptions.Timeout: + raise HTTPException( + status_code=504, + detail="Timeout: Ollama non ha risposto in tempo" + ) + except requests.exceptions.ConnectionError: + raise HTTPException( + status_code=502, + detail="Impossible connettersi a Ollama" + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Error fetching running models: {e}") + raise HTTPException( + status_code=500, + detail="Errore nel recupero dei modelli residenti" + ) + @router.get("/models/{model_name}", response_model=ModelInfo) async def get_model(model_name: str): """ diff --git a/app/web/static/js/models-running.js b/app/web/static/js/models-running.js new file mode 100644 index 0000000..f5cf950 --- /dev/null +++ b/app/web/static/js/models-running.js @@ -0,0 +1,160 @@ +class RunningModelsPage { + constructor() { + this.init(); + } + + init() { + document.getElementById("refresh-btn")?.addEventListener("click", () => { + this.loadRunningModels(); + }); + + this.loadRunningModels(); + } + + async loadRunningModels() { + const container = document.getElementById("running-models"); + if (!container) { + return; + } + + container.innerHTML = ` +
+
+

Aggiornamento in corso...

+
+ `; + + try { + const response = await fetch("/api/v1/models/running"); + if (!response.ok) { + throw new Error("Errore nel caricamento dei modelli residenti"); + } + + const data = await response.json(); + const models = data.models || []; + + this.renderStats(models); + this.renderRunningModels(models); + } catch (error) { + container.innerHTML = ` +
+

Errore nel caricamento di ollama ps

+
+ `; + this.renderStats([]); + console.error(error); + } + } + + renderStats(models) { + const runningCountEl = document.getElementById("running-count"); + const vramTotalEl = document.getElementById("vram-total"); + const lastRefreshEl = document.getElementById("last-refresh"); + + const totalVram = models.reduce((sum, model) => sum + (model.size_vram || 0), 0); + + if (runningCountEl) { + runningCountEl.textContent = String(models.length); + } + if (vramTotalEl) { + vramTotalEl.textContent = this.formatBytes(totalVram); + } + if (lastRefreshEl) { + lastRefreshEl.textContent = new Date().toLocaleString("it-IT"); + } + } + + renderRunningModels(models) { + const container = document.getElementById("running-models"); + if (!container) { + return; + } + + if (models.length === 0) { + container.innerHTML = ` +
+

Nessun modello residente in memoria al momento.

+
+ `; + return; + } + + container.innerHTML = models + .map((model) => this.renderModelCard(model)) + .join(""); + } + + renderModelCard(model) { + const name = this.escapeHtml(model.name || "unknown"); + const modelId = this.escapeHtml(model.model || "-"); + const size = this.formatBytes(model.size || 0); + const sizeVram = this.formatBytes(model.size_vram || 0); + const processor = this.escapeHtml(model.details?.processor || "-"); + const expiresAt = model.expires_at ? this.formatDateTime(model.expires_at) : "-"; + + return ` +
+
+
+

${name}

+

${modelId}

+
+ Pronto +
+
+
+

Dimensione modello

+

${size}

+
+
+

VRAM usata

+

${sizeVram}

+
+
+

Processor

+

${processor}

+
+
+

Scarico previsto

+

${expiresAt}

+
+
+
+ `; + } + + formatBytes(bytes) { + if (!bytes || bytes <= 0) { + return "0 B"; + } + const units = ["B", "KB", "MB", "GB", "TB"]; + const index = Math.min(Math.floor(Math.log(bytes) / Math.log(1024)), units.length - 1); + const value = bytes / Math.pow(1024, index); + return `${value.toFixed(2)} ${units[index]}`; + } + + formatDateTime(isoDate) { + const date = new Date(isoDate); + if (Number.isNaN(date.getTime())) { + return "-"; + } + + return date.toLocaleString("it-IT", { + year: "numeric", + month: "short", + day: "2-digit", + hour: "2-digit", + minute: "2-digit" + }); + } + + escapeHtml(text) { + const div = document.createElement("div"); + div.textContent = String(text); + return div.innerHTML; + } +} + +document.addEventListener("DOMContentLoaded", () => { + window.runningModelsPage = new RunningModelsPage(); +}); diff --git a/app/web/templates/index.html b/app/web/templates/index.html index ffa2c98..48f3cac 100644 --- a/app/web/templates/index.html +++ b/app/web/templates/index.html @@ -29,6 +29,7 @@

LLM Monitor

+ Modelli in Memoria
Controllo... diff --git a/app/web/templates/models_running.html b/app/web/templates/models_running.html new file mode 100644 index 0000000..6455904 --- /dev/null +++ b/app/web/templates/models_running.html @@ -0,0 +1,78 @@ + + + + + + LLM Monitor - Modelli in Memoria + + + + + +
+
+
+
+
+
+ 🧠 +
+
+

Modelli in Memoria

+

Vista dedicata all'output di ollama ps

+
+
+
+ Dashboard + +
+
+
+
+ +
+
+
+
+
Modelli Residenti
+
-
+
+
+
VRAM Totale Stimata
+
-
+
+
+
Ultimo Refresh
+
-
+
+
+ +
+

Output Ollama PS

+
+
+
+

Caricamento modelli residenti...

+
+
+
+
+
+ +
+
+

LLM Monitor v1.0.0 • Modelli residenti in memoria (ollama ps)

+
+
+
+ + + + diff --git a/main.py b/main.py index 900f098..ca12fa4 100644 --- a/main.py +++ b/main.py @@ -62,6 +62,12 @@ async def dashboard(): return FileResponse(templates_path / "index.html") +@app.get("/models-running") +async def models_running_page(): + """Pagina dedicata ai modelli residenti in memoria (ollama ps).""" + return FileResponse(templates_path / "models_running.html") + + @app.get("/favicon.ico", include_in_schema=False) async def favicon(): """Favicon dell'applicazione.""" diff --git a/tests/test_api.py b/tests/test_api.py index 8e1e413..16b52f0 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -46,6 +46,39 @@ def test_get_models(client, mock_models_response): assert len(data["models"]) == 2 assert data["models"][0]["name"] == "llama2" + +def test_get_running_models(client): + """Test getting running models (ollama ps).""" + with patch("requests.get") as mock_get: + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "models": [ + { + "name": "llama3.2:3b", + "size_vram": 2147483648, + "expires_at": "2026-04-24T10:30:00Z" + } + ] + } + mock_get.return_value = mock_response + + response = client.get("/api/v1/models/running") + assert response.status_code == 200 + data = response.json() + assert "models" in data + assert data["total"] == 1 + assert data["models"][0]["name"] == "llama3.2:3b" + + +def test_get_running_models_ollama_offline(client): + """Test running models when Ollama is offline.""" + with patch("requests.get") as mock_get: + mock_get.side_effect = Exception("Connection refused") + + response = client.get("/api/v1/models/running") + assert response.status_code == 500 + def test_get_models_ollama_offline(client): """Test getting models when Ollama is offline""" with patch("requests.get") as mock_get: @@ -126,6 +159,7 @@ def test_openapi_schema(client): assert "paths" in schema assert "/api/v1/health" in schema["paths"] assert "/api/v1/models" in schema["paths"] + assert "/api/v1/models/running" in schema["paths"] assert "/api/v1/models/{model_name}/show" in schema["paths"] assert "/api/v1/models/{model_name}/pull" not in schema["paths"]