FROM glm-4.7-flash:latest

# --- PARAMETERS (Spec-Driven Context Management) ---
# Imposta la context window a 198.000 token
PARAMETER num_ctx 198000
# Temperatura bilanciata (default 1, ma 0.7 aiuta la precisione su contesti lunghi)
PARAMETER temperature 0.7
# Gestione ripetizioni su testi lunghi
PARAMETER repeat_penalty 1.1
# Timeout lungo per gestire il caricamento del KV Cache su APU
PARAMETER num_predict -1