26 lines
886 B
Python
26 lines
886 B
Python
import urllib.request
|
|
import json
|
|
|
|
url = "http://localhost:8000/v1/chat/completions"
|
|
headers = {"Content-Type": "application/json"}
|
|
data = {
|
|
"model": "/app/models/Qwen3.5-0.8B-UD-Q8_K_XL.gguf",
|
|
"messages": [
|
|
{"role": "system", "content": "Sei un assistente utile e conciso."},
|
|
{"role": "user", "content": "Quali sono i vantaggi principali del sistema operativo Linux?"}
|
|
],
|
|
"max_tokens": 100,
|
|
"temperature": 0.2
|
|
}
|
|
|
|
req = urllib.request.Request(url, data=json.dumps(data).encode('utf-8'), headers=headers)
|
|
|
|
print("Inviando richiesta a vLLM...")
|
|
try:
|
|
with urllib.request.urlopen(req) as response:
|
|
result = json.loads(response.read().decode('utf-8'))
|
|
print("\n✅ TEST SUPERATO. Risposta dal modello:\n")
|
|
print(result['choices'][0]['message']['content'])
|
|
except Exception as e:
|
|
print(f"\n❌ ERRORE DURANTE IL TEST: {e}")
|