728 lines
44 KiB
HTML
728 lines
44 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="it" data-theme="dark">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>TurboQuant + llama.cpp su ROCm — Tutorial</title>
|
||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600&family=Inter:wght@300..700&display=swap" rel="stylesheet">
|
||
<style>
|
||
:root,[data-theme="dark"]{
|
||
--color-bg:#0d1117;--color-surface:#161b22;--color-surface-2:#21262d;
|
||
--color-surface-offset:#30363d;--color-border:#30363d;--color-divider:#21262d;
|
||
--color-text:#e6edf3;--color-text-muted:#8b949e;--color-text-faint:#484f58;
|
||
--color-primary:#39d353;--color-primary-hover:#2ea043;--color-primary-active:#1a7f37;
|
||
--color-primary-highlight:rgba(57,211,83,0.12);
|
||
--color-warning:#d29922;--color-error:#f85149;--color-blue:#58a6ff;
|
||
--color-purple:#bc8cff;--color-orange:#ffa657;
|
||
--font-body:'Inter',system-ui,sans-serif;--font-mono:'JetBrains Mono',monospace;
|
||
--text-xs:clamp(0.75rem,0.7rem + 0.25vw,0.875rem);
|
||
--text-sm:clamp(0.875rem,0.8rem + 0.35vw,1rem);
|
||
--text-base:clamp(1rem,0.95rem + 0.25vw,1.125rem);
|
||
--text-lg:clamp(1.125rem,1rem + 0.75vw,1.5rem);
|
||
--text-xl:clamp(1.5rem,1.2rem + 1.25vw,2.25rem);
|
||
--text-2xl:clamp(2rem,1.2rem + 2.5vw,3.5rem);
|
||
--space-1:.25rem;--space-2:.5rem;--space-3:.75rem;--space-4:1rem;
|
||
--space-5:1.25rem;--space-6:1.5rem;--space-8:2rem;--space-10:2.5rem;
|
||
--space-12:3rem;--space-16:4rem;
|
||
--radius-sm:.375rem;--radius-md:.5rem;--radius-lg:.75rem;--radius-xl:1rem;--radius-full:9999px;
|
||
--shadow-sm:0 1px 2px rgba(0,0,0,.4);--shadow-md:0 4px 12px rgba(0,0,0,.5);
|
||
--transition:180ms cubic-bezier(0.16,1,0.3,1);
|
||
--content-default:900px;--content-narrow:640px;
|
||
}
|
||
[data-theme="light"]{
|
||
--color-bg:#f6f8fa;--color-surface:#ffffff;--color-surface-2:#f6f8fa;
|
||
--color-surface-offset:#eaeef2;--color-border:#d0d7de;--color-divider:#eaeef2;
|
||
--color-text:#1f2328;--color-text-muted:#636c76;--color-text-faint:#aeb6c0;
|
||
--color-primary:#1a7f37;--color-primary-hover:#0d5e25;--color-primary-active:#08401a;
|
||
--color-primary-highlight:rgba(26,127,55,0.10);
|
||
--color-warning:#9a6700;--color-error:#d1242f;--color-blue:#0969da;
|
||
--color-purple:#8250df;--color-orange:#bc4c00;
|
||
--shadow-sm:0 1px 2px rgba(0,0,0,.07);--shadow-md:0 4px 12px rgba(0,0,0,.1);
|
||
}
|
||
*,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
|
||
html{scroll-behavior:smooth;scroll-padding-top:4rem}
|
||
body{font-family:var(--font-body);font-size:var(--text-base);color:var(--color-text);background:var(--color-bg);min-height:100dvh;-webkit-font-smoothing:antialiased}
|
||
img,svg{display:block;max-width:100%}
|
||
a,button,[role="button"]{transition:color var(--transition),background var(--transition),border-color var(--transition),box-shadow var(--transition)}
|
||
|
||
/* NAV */
|
||
nav{position:sticky;top:0;z-index:100;background:oklch(from var(--color-bg) l c h / 0.92);backdrop-filter:blur(12px);border-bottom:1px solid var(--color-border);padding:var(--space-3) var(--space-6)}
|
||
.nav-inner{max-width:var(--content-default);margin:auto;display:flex;align-items:center;justify-content:space-between;gap:var(--space-4)}
|
||
.nav-logo{display:flex;align-items:center;gap:var(--space-2);font-weight:700;font-size:var(--text-sm);color:var(--color-text);text-decoration:none}
|
||
.nav-logo svg{color:var(--color-primary)}
|
||
.nav-links{display:flex;gap:var(--space-1);align-items:center}
|
||
.nav-links a{font-size:var(--text-xs);color:var(--color-text-muted);text-decoration:none;padding:var(--space-2) var(--space-3);border-radius:var(--radius-md)}
|
||
.nav-links a:hover{color:var(--color-text);background:var(--color-surface-offset)}
|
||
.theme-toggle{background:none;border:1px solid var(--color-border);border-radius:var(--radius-md);padding:var(--space-2);color:var(--color-text-muted);cursor:pointer;display:flex;align-items:center}
|
||
.theme-toggle:hover{color:var(--color-text);background:var(--color-surface-offset)}
|
||
|
||
/* HERO */
|
||
.hero{padding:var(--space-16) var(--space-6) var(--space-12);text-align:center;background:radial-gradient(ellipse 80% 50% at 50% 0%,var(--color-primary-highlight),transparent)}
|
||
.hero-badge{display:inline-flex;align-items:center;gap:var(--space-2);font-size:var(--text-xs);font-family:var(--font-mono);color:var(--color-primary);background:var(--color-primary-highlight);border:1px solid oklch(from var(--color-primary) l c h / 0.3);border-radius:var(--radius-full);padding:var(--space-1) var(--space-3);margin-bottom:var(--space-6)}
|
||
.hero h1{font-size:var(--text-2xl);font-weight:800;line-height:1.1;margin-bottom:var(--space-4);letter-spacing:-.02em}
|
||
.hero h1 span{color:var(--color-primary)}
|
||
.hero p{max-width:55ch;margin:0 auto var(--space-8);color:var(--color-text-muted);font-size:var(--text-base);line-height:1.7}
|
||
.hero-meta{display:flex;gap:var(--space-4);justify-content:center;flex-wrap:wrap}
|
||
.badge{display:inline-flex;align-items:center;gap:var(--space-1);font-size:var(--text-xs);font-family:var(--font-mono);padding:var(--space-1) var(--space-2);border-radius:var(--radius-full)}
|
||
.badge-green{background:rgba(57,211,83,.1);color:#39d353;border:1px solid rgba(57,211,83,.2)}
|
||
.badge-blue{background:rgba(88,166,255,.1);color:var(--color-blue);border:1px solid rgba(88,166,255,.2)}
|
||
.badge-orange{background:rgba(255,166,87,.1);color:var(--color-orange);border:1px solid rgba(255,166,87,.2)}
|
||
.badge-purple{background:rgba(188,140,255,.1);color:var(--color-purple);border:1px solid rgba(188,140,255,.2)}
|
||
|
||
/* LAYOUT */
|
||
.container{max-width:var(--content-default);margin:0 auto;padding:0 var(--space-6)}
|
||
|
||
/* ALERT BOXES */
|
||
.alert{border-radius:var(--radius-lg);padding:var(--space-4) var(--space-5);margin:var(--space-6) 0;display:flex;gap:var(--space-3);align-items:flex-start;font-size:var(--text-sm)}
|
||
.alert-warning{background:rgba(210,153,34,.08);border:1px solid rgba(210,153,34,.25);color:var(--color-text)}
|
||
.alert-info{background:rgba(88,166,255,.08);border:1px solid rgba(88,166,255,.2);color:var(--color-text)}
|
||
.alert-success{background:var(--color-primary-highlight);border:1px solid oklch(from var(--color-primary) l c h / 0.3);color:var(--color-text)}
|
||
.alert-error{background:rgba(248,81,73,.08);border:1px solid rgba(248,81,73,.2);color:var(--color-text)}
|
||
.alert-icon{flex-shrink:0;font-size:1.1em;margin-top:.1em}
|
||
|
||
/* STEPS */
|
||
.steps{counter-reset:step;display:flex;flex-direction:column;gap:0}
|
||
.step{display:grid;grid-template-columns:40px 1fr;gap:var(--space-4);padding:var(--space-8) 0;border-bottom:1px solid var(--color-divider)}
|
||
.step:last-child{border-bottom:none}
|
||
.step-num{counter-increment:step;width:40px;height:40px;border-radius:var(--radius-full);background:var(--color-surface-2);border:2px solid var(--color-border);display:flex;align-items:center;justify-content:center;font-family:var(--font-mono);font-weight:700;font-size:var(--text-sm);color:var(--color-primary);flex-shrink:0;position:relative;top:2px}
|
||
.step-content h2{font-size:var(--text-lg);font-weight:700;margin-bottom:var(--space-2);line-height:1.25}
|
||
.step-content p{color:var(--color-text-muted);font-size:var(--text-sm);margin-bottom:var(--space-4);max-width:70ch;line-height:1.7}
|
||
.step-content p+p{margin-top:var(--space-3)}
|
||
|
||
/* CODE BLOCKS */
|
||
.code-block{position:relative;margin:var(--space-4) 0;border-radius:var(--radius-lg);overflow:hidden;border:1px solid var(--color-border)}
|
||
.code-header{background:var(--color-surface);padding:var(--space-2) var(--space-4);display:flex;align-items:center;justify-content:space-between;border-bottom:1px solid var(--color-border)}
|
||
.code-lang{font-family:var(--font-mono);font-size:var(--text-xs);color:var(--color-text-muted);font-weight:600;letter-spacing:.05em;text-transform:uppercase}
|
||
.copy-btn{background:none;border:1px solid var(--color-border);border-radius:var(--radius-sm);padding:var(--space-1) var(--space-2);font-size:var(--text-xs);color:var(--color-text-muted);cursor:pointer;font-family:var(--font-body);display:flex;align-items:center;gap:var(--space-1)}
|
||
.copy-btn:hover{background:var(--color-surface-offset);color:var(--color-text)}
|
||
.copy-btn.copied{color:var(--color-primary);border-color:oklch(from var(--color-primary) l c h / 0.3)}
|
||
pre{background:var(--color-surface);padding:var(--space-5);overflow-x:auto;line-height:1.65;margin:0}
|
||
code{font-family:var(--font-mono);font-size:.875em;color:var(--color-text)}
|
||
pre code{color:var(--color-text);display:block}
|
||
/* Syntax highlight pseudo-classes via JS coloring */
|
||
.kw{color:var(--color-blue)}
|
||
.cm{color:var(--color-text-faint);font-style:italic}
|
||
.str{color:var(--color-orange)}
|
||
.num{color:var(--color-purple)}
|
||
.flag{color:var(--color-primary)}
|
||
.cmd{color:var(--color-warning)}
|
||
|
||
/* INLINE CODE */
|
||
:not(pre)>code{font-family:var(--font-mono);font-size:.85em;background:var(--color-surface-offset);padding:.1em .35em;border-radius:var(--radius-sm);border:1px solid var(--color-border);color:var(--color-orange)}
|
||
|
||
/* TABS */
|
||
.tabs{margin:var(--space-4) 0}
|
||
.tab-list{display:flex;gap:var(--space-1);border-bottom:1px solid var(--color-border);margin-bottom:var(--space-4)}
|
||
.tab-btn{background:none;border:none;border-bottom:2px solid transparent;padding:var(--space-2) var(--space-4);font-size:var(--text-sm);color:var(--color-text-muted);cursor:pointer;font-family:var(--font-body);margin-bottom:-1px;border-radius:var(--radius-sm) var(--radius-sm) 0 0;transition:color var(--transition),border-color var(--transition)}
|
||
.tab-btn:hover{color:var(--color-text);background:var(--color-surface-2)}
|
||
.tab-btn.active{color:var(--color-primary);border-bottom-color:var(--color-primary);font-weight:600}
|
||
.tab-panel{display:none}.tab-panel.active{display:block}
|
||
|
||
/* TABLE */
|
||
.table-wrap{overflow-x:auto;border-radius:var(--radius-lg);border:1px solid var(--color-border);margin:var(--space-4) 0}
|
||
table{border-collapse:collapse;width:100%;font-size:var(--text-sm)}
|
||
th{background:var(--color-surface);padding:var(--space-3) var(--space-4);text-align:left;font-weight:600;color:var(--color-text-muted);font-size:var(--text-xs);text-transform:uppercase;letter-spacing:.05em;border-bottom:1px solid var(--color-border)}
|
||
td{padding:var(--space-3) var(--space-4);border-bottom:1px solid var(--color-divider);vertical-align:top}
|
||
tr:last-child td{border-bottom:none}
|
||
tr:hover td{background:var(--color-surface-2)}
|
||
|
||
/* SECTION HEADERS */
|
||
.section{padding:var(--space-12) 0}
|
||
.section-label{font-family:var(--font-mono);font-size:var(--text-xs);color:var(--color-primary);text-transform:uppercase;letter-spacing:.1em;margin-bottom:var(--space-3);display:block}
|
||
.section h2{font-size:var(--text-xl);font-weight:800;margin-bottom:var(--space-3);line-height:1.2}
|
||
.section-desc{color:var(--color-text-muted);max-width:65ch;line-height:1.7;margin-bottom:var(--space-8)}
|
||
|
||
/* PROGRESS BAR */
|
||
.progress-steps{display:flex;gap:0;margin-bottom:var(--space-8);border:1px solid var(--color-border);border-radius:var(--radius-lg);overflow:hidden}
|
||
.progress-item{flex:1;padding:var(--space-3) var(--space-2);text-align:center;font-size:var(--text-xs);font-family:var(--font-mono);color:var(--color-text-muted);background:var(--color-surface);border-right:1px solid var(--color-border);cursor:pointer;transition:background var(--transition)}
|
||
.progress-item:last-child{border-right:none}
|
||
.progress-item.done{background:var(--color-primary-highlight);color:var(--color-primary)}
|
||
.progress-item.active{background:var(--color-surface-2);color:var(--color-text);font-weight:600}
|
||
.progress-item:hover{background:var(--color-surface-offset)}
|
||
|
||
/* PILL LABELS */
|
||
.pill{display:inline-block;font-size:var(--text-xs);font-family:var(--font-mono);padding:.15em .5em;border-radius:var(--radius-full);font-weight:600}
|
||
.pill-ok{background:rgba(57,211,83,.12);color:#39d353}
|
||
.pill-warn{background:rgba(210,153,34,.12);color:var(--color-warning)}
|
||
.pill-err{background:rgba(248,81,73,.12);color:var(--color-error)}
|
||
.pill-info{background:rgba(88,166,255,.1);color:var(--color-blue)}
|
||
|
||
/* PREREQ GRID */
|
||
.prereq-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(200px,1fr));gap:var(--space-3);margin:var(--space-4) 0}
|
||
.prereq-card{background:var(--color-surface);border:1px solid var(--color-border);border-radius:var(--radius-lg);padding:var(--space-4);display:flex;flex-direction:column;gap:var(--space-2)}
|
||
.prereq-card .title{font-size:var(--text-sm);font-weight:600;display:flex;align-items:center;gap:var(--space-2)}
|
||
.prereq-card .desc{font-size:var(--text-xs);color:var(--color-text-muted);line-height:1.5}
|
||
.prereq-card .version{font-family:var(--font-mono);font-size:var(--text-xs);color:var(--color-primary)}
|
||
|
||
/* COLLAPSIBLE */
|
||
details{border:1px solid var(--color-border);border-radius:var(--radius-lg);margin:var(--space-4) 0;overflow:hidden}
|
||
summary{padding:var(--space-3) var(--space-4);cursor:pointer;font-weight:600;font-size:var(--text-sm);background:var(--color-surface);list-style:none;display:flex;justify-content:space-between;align-items:center;user-select:none}
|
||
summary::-webkit-details-marker{display:none}
|
||
summary::after{content:"+";color:var(--color-text-muted);font-size:var(--text-sm)}
|
||
details[open]>summary::after{content:"-"}
|
||
details[open]>summary{border-bottom:1px solid var(--color-border)}
|
||
.details-body{padding:var(--space-4);background:var(--color-surface)}
|
||
|
||
/* FOOTER */
|
||
footer{border-top:1px solid var(--color-divider);padding:var(--space-8) var(--space-6);text-align:center;color:var(--color-text-faint);font-size:var(--text-xs);font-family:var(--font-mono)}
|
||
|
||
@media(max-width:640px){
|
||
.step{grid-template-columns:32px 1fr;gap:var(--space-3)}
|
||
.progress-steps{flex-direction:column}
|
||
.progress-item{border-right:none;border-bottom:1px solid var(--color-border)}
|
||
.progress-item:last-child{border-bottom:none}
|
||
.hero h1{font-size:clamp(1.8rem,8vw,3rem)}
|
||
.nav-links a:not(:last-child){display:none}
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
|
||
<nav>
|
||
<div class="nav-inner">
|
||
<a href="#" class="nav-logo">
|
||
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg>
|
||
TurboQuant × ROCm
|
||
</a>
|
||
<div class="nav-links">
|
||
<a href="#prereq">Prerequisiti</a>
|
||
<a href="#build">Build</a>
|
||
<a href="#test">Test</a>
|
||
<a href="#troubleshoot">Troubleshooting</a>
|
||
<button class="theme-toggle" data-theme-toggle aria-label="Switch theme">
|
||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"/></svg>
|
||
</button>
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<section class="hero">
|
||
<div class="hero-badge">
|
||
<svg width="12" height="12" viewBox="0 0 24 24" fill="currentColor"><circle cx="12" cy="12" r="10"/></svg>
|
||
Aprile 2026 — Sperimentale
|
||
</div>
|
||
<h1>llama.cpp + <span>TurboQuant</span><br>su AMD ROCm</h1>
|
||
<p>Guida passo-passo per compilare llama.cpp con la patch TurboQuant (jagsan-cyber fork) su hardware AMD con ROCm/HIP. Comprende prerequisiti, build, test KV cache e troubleshooting.</p>
|
||
<div class="hero-meta">
|
||
<span class="badge badge-green">ROCm 6.x</span>
|
||
<span class="badge badge-blue">HIP / hipBLAS</span>
|
||
<span class="badge badge-orange">TurboQuant KV</span>
|
||
<span class="badge badge-purple">RDNA2/3/4</span>
|
||
</div>
|
||
</section>
|
||
|
||
<div class="container">
|
||
|
||
<div class="alert alert-warning">
|
||
<span class="alert-icon">⚠️</span>
|
||
<div><strong>Stato:</strong> Il supporto TurboQuant su ROCm è <strong>sperimentale</strong> (fork <code>jagsan-cyber/turboquant-rocm-llamacpp</code>, aprile 2026). Non è ancora nel main di llama.cpp. Aspettati possibili crash e performance variabile. Testa sempre su un sistema non di produzione.</div>
|
||
</div>
|
||
|
||
<!-- PREREQUISITI -->
|
||
<section class="section" id="prereq">
|
||
<span class="section-label">Step 0</span>
|
||
<h2>Prerequisiti hardware & software</h2>
|
||
<p class="section-desc">Prima di iniziare, verifica che il tuo sistema soddisfi i requisiti minimi. Le GPU AMD supportate da ROCm partono da RDNA2 (gfx1030+).</p>
|
||
|
||
<div class="prereq-grid">
|
||
<div class="prereq-card">
|
||
<div class="title">🖥️ GPU AMD</div>
|
||
<div class="desc">RDNA2, RDNA3 o RDNA4. Architetture gfx1030, gfx110x, gfx120x, Strix Halo gfx1151.</div>
|
||
<div class="version">≥ RX 6000 series</div>
|
||
</div>
|
||
<div class="prereq-card">
|
||
<div class="title">🐧 Linux Distro</div>
|
||
<div class="desc">Ubuntu 22.04 o 24.04 LTS, Fedora 39+. Kernel 6.x raccomandato.</div>
|
||
<div class="version">Ubuntu 22.04 / 24.04</div>
|
||
</div>
|
||
<div class="prereq-card">
|
||
<div class="title">⚙️ ROCm</div>
|
||
<div class="desc">AMD ROCm runtime e HIP SDK installati. Versione 6.0 o superiore.</div>
|
||
<div class="version">ROCm ≥ 6.0</div>
|
||
</div>
|
||
<div class="prereq-card">
|
||
<div class="title">📦 CMake</div>
|
||
<div class="desc">CMake 3.21 o superiore, con supporto HIP targets.</div>
|
||
<div class="version">cmake ≥ 3.21</div>
|
||
</div>
|
||
<div class="prereq-card">
|
||
<div class="title">🔧 Build tools</div>
|
||
<div class="desc">gcc/g++ 12+, clang (dal pacchetto ROCm), make, git, python3.</div>
|
||
<div class="version">gcc ≥ 12</div>
|
||
</div>
|
||
<div class="prereq-card">
|
||
<div class="title">💾 VRAM</div>
|
||
<div class="desc">Minimo 8 GB VRAM per modelli 7B. 16 GB+ per modelli 13–34B.</div>
|
||
<div class="version">≥ 8 GB VRAM</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="alert alert-info">
|
||
<span class="alert-icon">ℹ️</span>
|
||
<div>Verifica la tua architettura AMD con: <code>rocminfo | grep gfx</code>. Nota il valore (es. <code>gfx1100</code> per RX 7900) — ti servirà nella fase di build come <code>AMDGPU_TARGETS</code>.</div>
|
||
</div>
|
||
</section>
|
||
|
||
<!-- STEPS -->
|
||
<section class="section" id="build">
|
||
<span class="section-label">Build Guide</span>
|
||
<h2>Compilazione passo per passo</h2>
|
||
<p class="section-desc">Segui gli step nell'ordine. Ogni blocco di codice è copiabile con un click.</p>
|
||
|
||
<div class="progress-steps">
|
||
<div class="progress-item done">1 · Dipendenze</div>
|
||
<div class="progress-item done">2 · ROCm Check</div>
|
||
<div class="progress-item active">3 · Clone Fork</div>
|
||
<div class="progress-item">4 · CMake Build</div>
|
||
<div class="progress-item">5 · Quantize</div>
|
||
<div class="progress-item">6 · Run Test</div>
|
||
</div>
|
||
|
||
<div class="steps">
|
||
|
||
<!-- STEP 1 -->
|
||
<div class="step">
|
||
<div class="step-num">1</div>
|
||
<div class="step-content">
|
||
<h2>Installa le dipendenze di sistema</h2>
|
||
<p>Installa i pacchetti necessari. Su Ubuntu 22.04/24.04:</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Aggiorna e installa build tools</span>
|
||
<span class="cmd">sudo apt update && sudo apt upgrade -y</span>
|
||
<span class="cmd">sudo apt install -y</span> build-essential gcc g++ clang cmake cmake-extras git wget curl python3 python3-pip libopenblas-dev pkg-config</code></pre>
|
||
</div>
|
||
<p>Su <strong>Fedora/RHEL</strong>:</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cmd">sudo dnf install -y</span> gcc gcc-c++ clang cmake git wget curl python3 python3-pip openblas-devel</code></pre>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- STEP 2 -->
|
||
<div class="step">
|
||
<div class="step-num">2</div>
|
||
<div class="step-content">
|
||
<h2>Installa e verifica ROCm</h2>
|
||
<p>Se ROCm non è ancora installato, usa lo script ufficiale AMD. Se già installato, salta alla verifica.</p>
|
||
|
||
<div class="tabs">
|
||
<div class="tab-list">
|
||
<button class="tab-btn active" onclick="switchTab(this,'tab-rocm-install')">Installazione ROCm</button>
|
||
<button class="tab-btn" onclick="switchTab(this,'tab-rocm-verify')">Verifica installazione</button>
|
||
</div>
|
||
<div id="tab-rocm-install" class="tab-panel active">
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Scarica e installa ROCm 6.x (Ubuntu 22.04)</span>
|
||
<span class="cmd">wget</span> <span class="str">https://repo.radeon.com/amdgpu-install/6.3/ubuntu/jammy/amdgpu-install_6.3.60300-1_all.deb</span>
|
||
<span class="cmd">sudo dpkg -i</span> amdgpu-install_6.3.60300-1_all.deb
|
||
<span class="cmd">sudo amdgpu-install</span> <span class="flag">--usecase=rocm,hip</span> <span class="flag">--no-dkms</span>
|
||
|
||
<span class="cm"># Aggiungi il tuo utente al gruppo render e video</span>
|
||
<span class="cmd">sudo usermod -aG render,video</span> $USER
|
||
<span class="cmd">newgrp render</span>
|
||
|
||
<span class="cm"># Riavvia per applicare i cambiamenti</span>
|
||
<span class="cmd">sudo reboot</span></code></pre>
|
||
</div>
|
||
</div>
|
||
<div id="tab-rocm-verify" class="tab-panel">
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Verifica che ROCm rilevi la GPU</span>
|
||
<span class="cmd">rocminfo</span>
|
||
|
||
<span class="cm"># Trova la tua architettura target (es. gfx1100)</span>
|
||
<span class="cmd">rocminfo</span> | <span class="cmd">grep</span> <span class="flag">-E</span> <span class="str">"gfx|Name"</span>
|
||
|
||
<span class="cm"># Verifica HIP</span>
|
||
<span class="cmd">hipconfig</span> <span class="flag">-v</span>
|
||
<span class="cmd">hipconfig</span> <span class="flag">-l</span> <span class="cm"># mostra path del compilatore</span>
|
||
|
||
<span class="cm"># Test rapido: elenca device</span>
|
||
<span class="cmd">rocm-smi</span></code></pre>
|
||
</div>
|
||
<div class="alert alert-success" style="margin-top:var(--space-3)">
|
||
<span class="alert-icon">✅</span>
|
||
<div>Se <code>rocminfo</code> mostra la tua GPU con architettura <code>gfx****</code>, ROCm è correttamente installato. Annota il valore — es. <code>gfx1100</code> per RX 7900 XTX.</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- STEP 3 -->
|
||
<div class="step">
|
||
<div class="step-num">3</div>
|
||
<div class="step-content">
|
||
<h2>Clona il fork TurboQuant ROCm</h2>
|
||
<p>Usa il fork <code>jagsan-cyber/turboquant-rocm-llamacpp</code> che include la patch TurboQuant con backend ROCm/HIP. Contiene anche la Heavy-Hitter Oracle (H2O) per KV cache eviction.</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Crea una directory di lavoro</span>
|
||
<span class="cmd">mkdir</span> <span class="flag">-p</span> ~/llm-inference && <span class="cmd">cd</span> ~/llm-inference
|
||
|
||
<span class="cm"># Clona il fork TurboQuant con supporto ROCm</span>
|
||
<span class="cmd">git clone</span> https://github.com/jagsan-cyber/turboquant-rocm-llamacpp.git
|
||
<span class="cmd">cd</span> turboquant-rocm-llamacpp
|
||
|
||
<span class="cm"># Mostra il branch corrente e i log recenti</span>
|
||
<span class="cmd">git log</span> <span class="flag">--oneline -10</span>
|
||
<span class="cmd">git branch -a</span></code></pre>
|
||
</div>
|
||
<div class="alert alert-info">
|
||
<span class="alert-icon">💡</span>
|
||
<div>In alternativa puoi usare il fork <code>TheTom/llama-cpp-turboquant</code> (CUDA + CPU) e applicare manualmente i patch ROCm dal repo jagsan-cyber. Vedi la sezione Troubleshooting.</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- STEP 4 -->
|
||
<div class="step">
|
||
<div class="step-num">4</div>
|
||
<div class="step-content">
|
||
<h2>Configura e compila con CMake + HIP</h2>
|
||
<p>Questa è la fase critica. Sostituisci <code>gfx1100</code> con l'architettura della tua GPU rilevata al passo 2. Se hai più GPU, separa con punto e virgola: <code>gfx1100;gfx1030</code>.</p>
|
||
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Imposta la tua architettura GPU (modifica questo valore!)</span>
|
||
<span class="cm"># Esempi: gfx1030 (RX 6800), gfx1100 (RX 7900 XTX), gfx1101 (RX 7600)</span>
|
||
<span class="cm"># gfx1151 (Strix Halo / Ryzen AI MAX)</span>
|
||
<span class="cmd">export</span> <span class="kw">AMDGPU_TARGETS</span>=<span class="str">"gfx1100"</span> <span class="cm"># ← modifica qui</span>
|
||
|
||
<span class="cm"># Imposta variabili HIP dal runtime ROCm</span>
|
||
<span class="cmd">export</span> <span class="kw">HIPCXX</span>=<span class="str">"$(hipconfig -l)/clang"</span>
|
||
<span class="cmd">export</span> <span class="kw">HIP_PATH</span>=<span class="str">"$(hipconfig -R)"</span>
|
||
|
||
<span class="cm"># Crea la directory di build e configura</span>
|
||
<span class="cmd">cmake</span> -S . -B build <span class="flag">-DGGML_HIP=ON</span> <span class="flag">-DGGML_HIP_TURBOQUANT=ON</span> <span class="flag">-DAMDGPU_TARGETS=$AMDGPU_TARGETS</span> <span class="flag">-DCMAKE_BUILD_TYPE=Release</span> <span class="flag">-DGGML_BLAS=ON</span> <span class="flag">-DGGML_BLAS_VENDOR=hipBLAS</span>
|
||
|
||
<span class="cm"># Compila (usa tutti i core disponibili)</span>
|
||
<span class="cmd">cmake</span> <span class="flag">--build</span> build <span class="flag">-j$(nproc)</span>
|
||
|
||
<span class="cm"># Verifica i binari prodotti</span>
|
||
<span class="cmd">ls</span> <span class="flag">-la</span> build/bin/</code></pre>
|
||
</div>
|
||
|
||
<div class="alert alert-warning">
|
||
<span class="alert-icon">⚠️</span>
|
||
<div>Se il flag <code>-DGGML_HIP_TURBOQUANT=ON</code> non viene riconosciuto (CMake error), consulta il README del fork: alcune versioni usano <code>-DLLAMA_TURBOQUANT=ON</code> o richiede un branch specifico. Controlla con <code>git branch -a</code>.</div>
|
||
</div>
|
||
|
||
<details>
|
||
<summary>Flag CMake opzionali avanzati</summary>
|
||
<div class="details-body">
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash (opzionale)</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Flag aggiuntivi per ottimizzazioni specifiche</span>
|
||
<span class="cmd">cmake</span> -S . -B build <span class="flag">-DGGML_HIP=ON</span> <span class="flag">-DGGML_HIP_TURBOQUANT=ON</span> <span class="flag">-DAMDGPU_TARGETS=$AMDGPU_TARGETS</span> <span class="flag">-DCMAKE_BUILD_TYPE=Release</span> <span class="flag">-DGGML_BLAS=ON</span> <span class="flag">-DGGML_BLAS_VENDOR=hipBLAS</span> <span class="flag">-DGGML_NATIVE=ON</span> \ <span class="cm"># ottimizza per la CPU locale</span>
|
||
<span class="flag">-DGGML_F16C=ON</span> \ <span class="cm"># abilita FP16 se supportato</span>
|
||
<span class="flag">-DGGML_AVX2=ON</span> \ <span class="cm"># AVX2 per CPU Ryzen</span>
|
||
<span class="flag">-DLLAMA_CURL=ON</span> <span class="cm"># abilita download diretto modelli</span></code></pre>
|
||
</div>
|
||
</div>
|
||
</details>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- STEP 5 -->
|
||
<div class="step">
|
||
<div class="step-num">5</div>
|
||
<div class="step-content">
|
||
<h2>Scarica e quantizza un modello</h2>
|
||
<p>Per testare TurboQuant hai bisogno di un modello in formato GGUF. Puoi scaricarne uno pre-quantizzato da HuggingFace o usarne uno esistente.</p>
|
||
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Crea directory per i modelli</span>
|
||
<span class="cmd">mkdir</span> <span class="flag">-p</span> ~/llm-inference/models
|
||
|
||
<span class="cm"># Opzione A: scarica un GGUF già pronto (es. Qwen2.5-7B Q4_K_M)</span>
|
||
<span class="cmd">cd</span> ~/llm-inference/models
|
||
<span class="cmd">wget</span> <span class="flag">-c</span> <span class="str">"https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-GGUF/resolve/main/qwen2.5-7b-instruct-q4_k_m.gguf"</span>
|
||
|
||
<span class="cm"># Opzione B: converti da safetensors (se hai il modello HF locale)</span>
|
||
<span class="cmd">cd</span> ~/llm-inference/turboquant-rocm-llamacpp
|
||
<span class="cmd">python3</span> convert_hf_to_gguf.py <span class="str">/path/to/hf/model</span> <span class="flag">--outfile</span> ~/llm-inference/models/mio-modello-f16.gguf <span class="flag">--outtype</span> f16
|
||
|
||
<span class="cm"># Quantizzazione classica Q4_K_M (opzionale, se hai il modello F16)</span>
|
||
./build/bin/llama-quantize ~/llm-inference/models/mio-modello-f16.gguf ~/llm-inference/models/mio-modello-q4km.gguf Q4_K_M</code></pre>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- STEP 6 -->
|
||
<div class="step">
|
||
<div class="step-num">6</div>
|
||
<div class="step-content">
|
||
<h2>Esegui il test TurboQuant KV cache</h2>
|
||
<p>Avvia <code>llama-cli</code> o <code>llama-bench</code> con i flag TurboQuant attivi. Il parametro chiave è <code>--cache-type-k</code> e <code>--cache-type-v</code> impostati a <code>tq</code> (TurboQuant).</p>
|
||
|
||
<div class="tabs">
|
||
<div class="tab-list">
|
||
<button class="tab-btn active" onclick="switchTab(this,'tab-basic')">Run base</button>
|
||
<button class="tab-btn" onclick="switchTab(this,'tab-bench')">Benchmark</button>
|
||
<button class="tab-btn" onclick="switchTab(this,'tab-server')">Server mode</button>
|
||
</div>
|
||
<div id="tab-basic" class="tab-panel active">
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Inferenza con TurboQuant KV cache attivo su GPU AMD</span>
|
||
./build/bin/llama-cli <span class="flag">-m</span> ~/llm-inference/models/qwen2.5-7b-instruct-q4_k_m.gguf <span class="flag">-ngl</span> 99 \ <span class="cm"># offload tutti i layer su GPU</span>
|
||
<span class="flag">--cache-type-k</span> tq1_0 \ <span class="cm"># TurboQuant ~1-bit keys</span>
|
||
<span class="flag">--cache-type-v</span> tq4_0 \ <span class="cm"># TurboQuant 4-bit values</span>
|
||
<span class="flag">-c</span> 16384 \ <span class="cm"># context window 16K token</span>
|
||
<span class="flag">-n</span> 256 \ <span class="cm"># genera 256 token</span>
|
||
<span class="flag">-p</span> <span class="str">"Ciao! Spiegami cos'è il KV cache in un LLM."</span></code></pre>
|
||
</div>
|
||
<div class="alert alert-info" style="margin-top:var(--space-3)">
|
||
<span class="alert-icon">💡</span>
|
||
<div><strong>Tipi KV cache disponibili:</strong> <code>f16</code> (standard), <code>q8_0</code>, <code>q4_0</code>, <code>tq1_0</code> (TurboQuant ~1-bit), <code>tq4_0</code> (TurboQuant 4-bit). Combinare <code>tq1_0</code> per K e <code>tq4_0</code> per V dà il miglior rapporto qualità/memoria.</div>
|
||
</div>
|
||
</div>
|
||
<div id="tab-bench" class="tab-panel">
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Benchmark comparativo: standard f16 vs TurboQuant</span>
|
||
|
||
<span class="cm"># Test 1: KV cache standard F16 (baseline)</span>
|
||
./build/bin/llama-bench <span class="flag">-m</span> ~/llm-inference/models/qwen2.5-7b-instruct-q4_k_m.gguf <span class="flag">-ngl</span> 99 <span class="flag">--cache-type-k</span> f16 <span class="flag">--cache-type-v</span> f16 <span class="flag">-c</span> 8192 <span class="flag">-n</span> 128 <span class="flag">-r</span> 3 <span class="cm"># ripeti 3 volte per media</span>
|
||
|
||
<span class="cm"># Test 2: TurboQuant attivo</span>
|
||
./build/bin/llama-bench <span class="flag">-m</span> ~/llm-inference/models/qwen2.5-7b-instruct-q4_k_m.gguf <span class="flag">-ngl</span> 99 <span class="flag">--cache-type-k</span> tq1_0 <span class="flag">--cache-type-v</span> tq4_0 <span class="flag">-c</span> 8192 <span class="flag">-n</span> 128 <span class="flag">-r</span> 3
|
||
|
||
<span class="cm"># Monitor VRAM in tempo reale (apri in un secondo terminale)</span>
|
||
<span class="cmd">watch</span> <span class="flag">-n 0.5</span> rocm-smi <span class="flag">--showmeminfo</span> vram</code></pre>
|
||
</div>
|
||
</div>
|
||
<div id="tab-server" class="tab-panel">
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Server OpenAI-compatibile con TurboQuant abilitato</span>
|
||
./build/bin/llama-server <span class="flag">-m</span> ~/llm-inference/models/qwen2.5-7b-instruct-q4_k_m.gguf <span class="flag">-ngl</span> 99 <span class="flag">--cache-type-k</span> tq1_0 <span class="flag">--cache-type-v</span> tq4_0 <span class="flag">-c</span> 32768 \ <span class="cm"># context 32K — qui TurboQuant fa la differenza!</span>
|
||
<span class="flag">--host</span> 0.0.0.0 <span class="flag">--port</span> 8080
|
||
|
||
<span class="cm"># Test rapido con curl (in un altro terminale)</span>
|
||
<span class="cmd">curl</span> <span class="flag">-s</span> http://localhost:8080/v1/chat/completions <span class="flag">-H</span> <span class="str">"Content-Type: application/json"</span> <span class="flag">-d</span> <span class="str">'{"model":"local","messages":[{"role":"user","content":"Ciao!"}],"max_tokens":100}'</span> | python3 <span class="flag">-m json.tool</span></code></pre>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</section>
|
||
|
||
<!-- RISULTATI ATTESI -->
|
||
<section class="section" id="test">
|
||
<span class="section-label">Risultati attesi</span>
|
||
<h2>Cosa aspettarsi su ROCm</h2>
|
||
<p class="section-desc">Benchmark indicativi basati su test della community (aprile 2026). I valori ROCm sono stimati a partire dai dati CUDA applicando il gap storico di ~15-25%.</p>
|
||
|
||
<div class="table-wrap">
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>Config KV cache</th>
|
||
<th>VRAM (7B, 8K ctx)</th>
|
||
<th>TPS decode</th>
|
||
<th>Qualità output</th>
|
||
<th>Stato ROCm</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td><code>f16 / f16</code> (baseline)</td>
|
||
<td>~6.5 GB</td>
|
||
<td>100% (ref)</td>
|
||
<td><span class="pill pill-ok">Perfetta</span></td>
|
||
<td><span class="pill pill-ok">Stabile</span></td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>q8_0 / q8_0</code></td>
|
||
<td>~5.2 GB (−20%)</td>
|
||
<td>~105%</td>
|
||
<td><span class="pill pill-ok">Ottima</span></td>
|
||
<td><span class="pill pill-ok">Stabile</span></td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>q4_0 / q4_0</code></td>
|
||
<td>~3.8 GB (−42%)</td>
|
||
<td>~108%</td>
|
||
<td><span class="pill pill-warn">Buona</span></td>
|
||
<td><span class="pill pill-ok">Stabile</span></td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>tq4_0 / tq4_0</code></td>
|
||
<td>~2.4 GB (−63%)</td>
|
||
<td>~96%</td>
|
||
<td><span class="pill pill-warn">Buona</span></td>
|
||
<td><span class="pill pill-warn">Sperimentale</span></td>
|
||
</tr>
|
||
<tr>
|
||
<td><code>tq1_0 / tq4_0</code> (max compression)</td>
|
||
<td>~1.1 GB (−83%)</td>
|
||
<td>~85-92%</td>
|
||
<td><span class="pill pill-warn">Discreta</span></td>
|
||
<td><span class="pill pill-warn">Sperimentale</span></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
<p style="color:var(--color-text-muted);font-size:var(--text-xs);margin-top:var(--space-2)">* I dati ROCm sono stime. Su Metal (Apple Silicon) si registra un calo TPS maggiore (~50%) per un bug noto. CUDA mostra +22.8% decode a 32K context.</p>
|
||
</section>
|
||
|
||
<!-- TROUBLESHOOTING -->
|
||
<section class="section" id="troubleshoot">
|
||
<span class="section-label">Problemi comuni</span>
|
||
<h2>Troubleshooting ROCm + TurboQuant</h2>
|
||
<p class="section-desc">Errori frequenti e come risolverli.</p>
|
||
|
||
<details>
|
||
<summary>❌ Errore: <code>HIPCC not found</code> o <code>hipconfig: command not found</code></summary>
|
||
<div class="details-body">
|
||
<p style="color:var(--color-text-muted);font-size:var(--text-sm);margin-bottom:var(--space-3)">ROCm non è nel PATH o non è installato correttamente.</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Aggiungi ROCm al PATH</span>
|
||
<span class="cmd">echo</span> <span class="str">'export PATH=$PATH:/opt/rocm/bin:/opt/rocm/hip/bin'</span> >> ~/.bashrc
|
||
<span class="cmd">echo</span> <span class="str">'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib'</span> >> ~/.bashrc
|
||
<span class="cmd">source</span> ~/.bashrc
|
||
|
||
<span class="cm"># Verifica</span>
|
||
<span class="cmd">which</span> hipconfig && hipconfig <span class="flag">-v</span></code></pre>
|
||
</div>
|
||
</div>
|
||
</details>
|
||
|
||
<details>
|
||
<summary>❌ Errore: <code>GPU not found</code> o <code>Device 0: gfx000</code></summary>
|
||
<div class="details-body">
|
||
<p style="color:var(--color-text-muted);font-size:var(--text-sm);margin-bottom:var(--space-3)">L'utente non ha i permessi sul device o il driver non carica correttamente.</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Controlla che l'utente sia nel gruppo render/video</span>
|
||
<span class="cmd">groups</span> $USER
|
||
|
||
<span class="cm"># Se mancano, aggiungili e rilogga</span>
|
||
<span class="cmd">sudo usermod -aG render,video</span> $USER
|
||
<span class="cmd">logout</span> <span class="cm"># poi rilogga</span>
|
||
|
||
<span class="cm"># Verifica device files</span>
|
||
<span class="cmd">ls -la</span> /dev/kfd /dev/dri/renderD*</code></pre>
|
||
</div>
|
||
</div>
|
||
</details>
|
||
|
||
<details>
|
||
<summary>❌ Errore CMake: <code>DGGML_HIP_TURBOQUANT not recognized</code></summary>
|
||
<div class="details-body">
|
||
<p style="color:var(--color-text-muted);font-size:var(--text-sm);margin-bottom:var(--space-3)">Alcune versioni del fork usano nomi di flag diversi. Prova queste alternative:</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Alternativa 1</span>
|
||
<span class="cmd">cmake</span> -S . -B build <span class="flag">-DGGML_HIP=ON -DLLAMA_TURBOQUANT=ON</span> ...
|
||
|
||
<span class="cm"># Alternativa 2: cerca il flag corretto nel CMakeLists</span>
|
||
<span class="cmd">grep</span> <span class="flag">-i</span> <span class="str">"turboquant"</span> CMakeLists.txt
|
||
<span class="cmd">grep</span> <span class="flag">-i</span> <span class="str">"turboquant"</span> ggml/CMakeLists.txt</code></pre>
|
||
</div>
|
||
</div>
|
||
</details>
|
||
|
||
<details>
|
||
<summary>⚠️ Performance peggiori del previsto (TPS molto bassi)</summary>
|
||
<div class="details-body">
|
||
<p style="color:var(--color-text-muted);font-size:var(--text-sm);margin-bottom:var(--space-3)">Su ROCm il backend TurboQuant non è ancora ottimizzato quanto CUDA. Prova queste mitigazioni:</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Usa tq4_0 invece di tq1_0 per k — meno compressione, più veloce</span>
|
||
<span class="flag">--cache-type-k</span> tq4_0 <span class="flag">--cache-type-v</span> tq4_0
|
||
|
||
<span class="cm"># Riduci il context se la VRAM non è un problema</span>
|
||
<span class="flag">-c</span> 4096
|
||
|
||
<span class="cm"># Forza un numero specifico di thread CPU per il dequantize</span>
|
||
<span class="flag">-t</span> 8
|
||
|
||
<span class="cm"># Monitora l'utilizzo GPU in tempo reale</span>
|
||
<span class="cmd">rocm-smi</span> <span class="flag">--showuse --showmeminfo vram -d 0</span></code></pre>
|
||
</div>
|
||
</div>
|
||
</details>
|
||
|
||
<details>
|
||
<summary>🔄 Alternativa: applicare patch manualmente su llama.cpp ufficiale</summary>
|
||
<div class="details-body">
|
||
<p style="color:var(--color-text-muted);font-size:var(--text-sm);margin-bottom:var(--space-3)">Se preferisci partire dal repo ufficiale llama.cpp e applicare la patch TurboQuant manualmente:</p>
|
||
<div class="code-block">
|
||
<div class="code-header"><span class="code-lang">bash</span><button class="copy-btn" onclick="copyCode(this)">📋 Copia</button></div>
|
||
<pre><code><span class="cm"># Clona llama.cpp ufficiale</span>
|
||
<span class="cmd">git clone</span> https://github.com/ggml-org/llama.cpp.git
|
||
<span class="cmd">cd</span> llama.cpp
|
||
|
||
<span class="cm"># Aggiungi il fork come remote</span>
|
||
<span class="cmd">git remote add</span> thetom https://github.com/TheTom/llama-cpp-turboquant.git
|
||
<span class="cmd">git fetch</span> thetom
|
||
|
||
<span class="cm"># Cherry-pick solo i commit TurboQuant (vedi git log thetom/main)</span>
|
||
<span class="cmd">git log</span> thetom/main <span class="flag">--oneline</span> | <span class="cmd">grep</span> <span class="flag">-i</span> turboquant
|
||
<span class="cm"># poi: git cherry-pick <commit-hash></span>
|
||
|
||
<span class="cm"># Build con ROCm standard</span>
|
||
<span class="cmd">cmake</span> -S . -B build <span class="flag">-DGGML_HIP=ON</span> <span class="flag">-DAMDGPU_TARGETS=$AMDGPU_TARGETS</span> <span class="flag">-DCMAKE_BUILD_TYPE=Release</span>
|
||
<span class="cmd">cmake</span> <span class="flag">--build</span> build <span class="flag">-j$(nproc)</span></code></pre>
|
||
</div>
|
||
</div>
|
||
</details>
|
||
|
||
</section>
|
||
|
||
</div>
|
||
|
||
<footer>
|
||
Tutorial generato ad aprile 2026 · Basato su fork jagsan-cyber/turboquant-rocm-llamacpp · Stato: sperimentale
|
||
</footer>
|
||
|
||
<script>
|
||
// Theme toggle
|
||
(function(){
|
||
const t=document.querySelector('[data-theme-toggle]'),r=document.documentElement;
|
||
let d='dark';
|
||
r.setAttribute('data-theme',d);
|
||
if(t) t.addEventListener('click',()=>{
|
||
d=d==='dark'?'light':'dark';
|
||
r.setAttribute('data-theme',d);
|
||
t.innerHTML=d==='dark'?
|
||
'<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"/></svg>':
|
||
'<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><circle cx="12" cy="12" r="5"/><path d="M12 1v2M12 21v2M4.22 4.22l1.42 1.42M18.36 18.36l1.42 1.42M1 12h2M21 12h2M4.22 19.78l1.42-1.42M18.36 5.64l1.42-1.42"/></svg>';
|
||
});
|
||
})();
|
||
|
||
// Tab switcher
|
||
function switchTab(btn, panelId) {
|
||
const container = btn.closest('.tabs');
|
||
container.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
|
||
container.querySelectorAll('.tab-panel').forEach(p => p.classList.remove('active'));
|
||
btn.classList.add('active');
|
||
document.getElementById(panelId).classList.add('active');
|
||
}
|
||
|
||
// Copy code
|
||
function copyCode(btn) {
|
||
const pre = btn.closest('.code-block').querySelector('pre');
|
||
const text = pre.innerText;
|
||
navigator.clipboard.writeText(text).then(() => {
|
||
btn.textContent = '✅ Copiato';
|
||
btn.classList.add('copied');
|
||
setTimeout(() => { btn.textContent = '📋 Copia'; btn.classList.remove('copied'); }, 2000);
|
||
});
|
||
}
|
||
</script>
|
||
</body>
|
||
</html> |