feat(ingestion): implement log monitoring script with webhook integration

- Add logwhisperer.sh script for tailing and monitoring system logs
- Implement pattern matching for critical errors (FATAL, ERROR, OOM, segfault)
- Add JSON payload generation with severity levels
- Implement rate limiting and offset tracking per log source
- Add install.sh with interactive configuration and systemd support
- Create comprehensive test suite with pytest
- Add technical specification documentation
- Update CHANGELOG.md following Common Changelog standard

All 12 tests passing. Follows Metodo Sacchi (Safety first, little often, double check).
This commit is contained in:
Luca Sacchi Ricciardi
2026-04-02 16:09:00 +02:00
parent 34dbba1201
commit 69f475ec78
6 changed files with 1148 additions and 0 deletions

333
scripts/install.sh Executable file
View File

@@ -0,0 +1,333 @@
#!/bin/bash
#
# LogWhisperer Agent - Script di Installazione
# Installa lo script di monitoraggio log e configura l'ambiente
#
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INSTALL_DIR="/usr/local/bin"
CONFIG_DIR="/etc/logwhisperer"
DATA_DIR="/var/lib/logwhisperer"
LOG_DIR="/var/log/logwhisperer"
# Colori per output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Genera UUID v4
generate_uuid() {
# Usa /proc/sys/kernel/random/uuid se disponibile (Linux)
if [[ -r /proc/sys/kernel/random/uuid ]]; then
cat /proc/sys/kernel/random/uuid
else
# Fallback con uuidgen o generazione manuale
if command -v uuidgen &> /dev/null; then
uuidgen
else
# Generazione manuale semplice
local hex="0123456789abcdef"
local uuid=""
for i in {0..35}; do
if [[ $i == 8 || $i == 13 || $i == 18 || $i == 23 ]]; then
uuid+="-"
elif [[ $i == 14 ]]; then
uuid+="4"
elif [[ $i == 19 ]]; then
uuid+="${hex:$((RANDOM % 4 + 8)):1}"
else
uuid+="${hex:$((RANDOM % 16)):1}"
fi
done
echo "$uuid"
fi
fi
}
# Verifica prerequisiti
check_prerequisites() {
log_info "Verifica prerequisiti..."
# Verifica bash
if [[ "${BASH_VERSION:-}" < "4.0" ]]; then
log_error "Richiesto Bash 4.0 o superiore"
exit 1
fi
# Verifica curl
if ! command -v curl &> /dev/null; then
log_error "curl non trovato. Installa curl: apt-get install curl / yum install curl"
exit 1
fi
# Verifica permessi root per installazione system-wide
if [[ $EUID -ne 0 ]]; then
log_warn "Esecuzione senza root. Installazione in modalità utente..."
INSTALL_DIR="$HOME/.local/bin"
CONFIG_DIR="$HOME/.config/logwhisperer"
DATA_DIR="$HOME/.local/share/logwhisperer"
LOG_DIR="$HOME/.local/log/logwhisperer"
fi
log_info "Prerequisiti OK"
}
# Crea directory necessarie
create_directories() {
log_info "Creazione directory..."
mkdir -p "$CONFIG_DIR"
mkdir -p "$DATA_DIR"
mkdir -p "$LOG_DIR"
mkdir -p "$INSTALL_DIR"
log_info "Directory create"
}
# Configura il client
configure_client() {
log_info "Configurazione LogWhisperer..."
local config_file="$CONFIG_DIR/config.env"
local client_id
local webhook_url
local log_sources
# Genera CLIENT_ID
client_id=$(generate_uuid)
log_info "Client ID generato: $client_id"
# Chiedi WEBHOOK_URL
echo ""
echo "=========================================="
echo "Configurazione Webhook"
echo "=========================================="
read -p "Inserisci l'URL del webhook n8n: " webhook_url
if [[ -z "$webhook_url" ]]; then
log_warn "Nessun webhook URL fornito. Lo script funzionerà in modalità offline."
webhook_url=""
elif [[ ! "$webhook_url" =~ ^https:// ]]; then
log_warn "L'URL non usa HTTPS. Si consiglia di usare HTTPS per la sicurezza."
fi
# Configura LOG_SOURCES
echo ""
echo "=========================================="
echo "Configurazione Sorgenti Log"
echo "=========================================="
# Rileva sistema e propone defaults
local default_sources="/var/log/syslog"
if [[ -f /etc/redhat-release ]]; then
default_sources="/var/log/messages"
fi
if [[ -f /var/log/nginx/error.log ]]; then
default_sources="$default_sources,/var/log/nginx/error.log"
fi
if [[ -d /var/log/postgresql ]]; then
default_sources="$default_sources,/var/log/postgresql/*.log"
fi
read -p "Sorgenti log da monitorare [$default_sources]: " log_sources
log_sources="${log_sources:-$default_sources}"
# Chiedi POLL_INTERVAL
echo ""
read -p "Intervallo di polling in secondi [5]: " poll_interval
poll_interval="${poll_interval:-5}"
# Scrivi configurazione
cat > "$config_file" <<EOF
# LogWhisperer Configuration
# Generato automaticamente il $(date)
WEBHOOK_URL="$webhook_url"
CLIENT_ID="$client_id"
LOG_SOURCES="$log_sources"
POLL_INTERVAL=$poll_interval
MAX_LINE_LENGTH=2000
EOF
chmod 600 "$config_file"
log_info "Configurazione salvata in: $config_file"
# Mostra riepilogo
echo ""
echo "=========================================="
echo "Riepilogo Configurazione"
echo "=========================================="
echo "Client ID: $client_id"
echo "Webhook URL: ${webhook_url:-<non configurato>}"
echo "Log Sources: $log_sources"
echo "Poll Interval: ${poll_interval}s"
echo "Config File: $config_file"
echo "=========================================="
}
# Installa lo script
install_script() {
log_info "Installazione script..."
local source_script="$SCRIPT_DIR/logwhisperer.sh"
local target_script="$INSTALL_DIR/logwhisperer"
if [[ ! -f "$source_script" ]]; then
log_error "Script sorgente non trovato: $source_script"
exit 1
fi
cp "$source_script" "$target_script"
chmod +x "$target_script"
log_info "Script installato in: $target_script"
}
# Crea systemd service (se root)
create_systemd_service() {
if [[ $EUID -ne 0 ]]; then
log_info "Installazione utente: salto creazione servizio systemd"
return 0
fi
log_info "Creazione servizio systemd..."
local service_file="/etc/systemd/system/logwhisperer.service"
cat > "$service_file" <<EOF
[Unit]
Description=LogWhisperer Agent - Log Monitoring Service
After=network.target
[Service]
Type=simple
ExecStart=$INSTALL_DIR/logwhisperer --config $CONFIG_DIR/config.env
Restart=always
RestartSec=10
User=root
Group=adm
StandardOutput=append:$LOG_DIR/agent.log
StandardError=append:$LOG_DIR/agent.log
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
log_info "Servizio systemd creato"
log_info "Per avviare: systemctl start logwhisperer"
log_info "Per abilitare all'avvio: systemctl enable logwhisperer"
}
# Testa l'installazione
test_installation() {
log_info "Test installazione..."
if ! command -v logwhisperer &> /dev/null; then
# Aggiungi a PATH se necessario
if [[ $EUID -ne 0 ]]; then
log_warn "Aggiungi $INSTALL_DIR al tuo PATH:"
echo " export PATH=\"\$PATH:$INSTALL_DIR\""
echo " oppure riavvia la shell"
fi
fi
# Testa la configurazione
local config_file="$CONFIG_DIR/config.env"
if [[ -f "$config_file" ]]; then
# shellcheck source=/dev/null
source "$config_file"
if [[ -n "${WEBHOOK_URL:-}" ]]; then
log_info "Test di connettività al webhook..."
if curl -s -o /dev/null -w "%{http_code}" "$WEBHOOK_URL" | grep -q "200\\|404"; then
log_info "Webhook raggiungibile"
else
log_warn "Webhook non raggiungibile. Verifica l'URL."
fi
fi
fi
log_info "Test completato"
}
# Mostra istruzioni finali
show_instructions() {
echo ""
echo "=========================================="
echo "Installazione Completata!"
echo "=========================================="
echo ""
echo "Comandi disponibili:"
echo " logwhisperer --help Mostra help"
echo " logwhisperer --validate Verifica configurazione"
echo " logwhisperer --dry-run --test-line 'FATAL error' Test pattern"
echo ""
if [[ $EUID -eq 0 ]]; then
echo "Gestione servizio:"
echo " systemctl start logwhisperer Avvia servizio"
echo " systemctl stop logwhisperer Ferma servizio"
echo " systemctl status logwhisperer Stato servizio"
echo " journalctl -u logwhisperer -f Log servizio"
echo ""
fi
echo "File importanti:"
echo " Config: $CONFIG_DIR/config.env"
echo " Logs: $LOG_DIR/"
echo " Data: $DATA_DIR/"
echo ""
echo "Prossimi passi:"
echo " 1. Verifica la configurazione: logwhisperer --validate"
echo " 2. Testa il pattern matching: logwhisperer --dry-run --test-line 'ERROR test'"
echo " 3. Avvia il monitoraggio"
if [[ $EUID -eq 0 ]]; then
echo " systemctl start logwhisperer"
else
echo " logwhisperer --config $CONFIG_DIR/config.env"
fi
echo ""
echo "=========================================="
}
# Main
main() {
echo ""
echo "╔══════════════════════════════════════════╗"
echo "║ LogWhisperer Agent Installer ║"
echo "║ Versione 1.0.0 ║"
echo "╚══════════════════════════════════════════╝"
echo ""
check_prerequisites
create_directories
configure_client
install_script
create_systemd_service
test_installation
show_instructions
}
# Se eseguito direttamente
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi

423
scripts/logwhisperer.sh Executable file
View File

@@ -0,0 +1,423 @@
#!/bin/bash
#
# LogWhisperer Agent - Script di monitoraggio log
# Legge log di sistema, rileva errori critici e invia alert via webhook
#
set -euo pipefail
# ============================================================================
# CONFIGURAZIONE DEFAULT
# ============================================================================
SCRIPT_NAME="$(basename "$0")"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
VERSION="1.0.0"
# Default config paths
CONFIG_FILE="/etc/logwhisperer/config.env"
OFFSET_DIR="/var/lib/logwhisperer"
DEBUG_LOG="/var/log/logwhisperer/debug.log"
# Default values
WEBHOOK_URL=""
CLIENT_ID=""
LOG_SOURCES=""
POLL_INTERVAL=5
MAX_LINE_LENGTH=2000
# Error patterns (case-insensitive)
PATTERNS=(
"FATAL"
"ERROR"
"OOM"
"Out of memory"
"segfault"
"disk full"
"No space left on device"
"Connection refused"
"Permission denied"
)
# Rate limiting (seconds)
RATE_LIMIT=30
# ============================================================================
# FUNZIONI DI UTILITÀ
# ============================================================================
log_debug() {
if [[ "${DEBUG:-0}" == "1" ]]; then
echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >&2
fi
}
log_error() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2
}
log_info() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $*"
}
# ============================================================================
# USAGE E HELP
# ============================================================================
usage() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS]
LogWhisperer Agent - Monitora log di sistema e invia alert critici via webhook
OPTIONS:
-c, --config FILE Percorso file di configurazione (default: $CONFIG_FILE)
-d, --dry-run Modalità test: non invia webhook, stampa output
-t, --test-line LINE Testa il matching su una singola riga
-s, --test-source SRC Specifica la sorgente per il test (default: /var/log/syslog)
-v, --validate Valida la configurazione e esce
--debug Abilita log di debug
-h, --help Mostra questo help
--version Mostra versione
ESEMPI:
$SCRIPT_NAME --help
$SCRIPT_NAME --validate --config /etc/logwhisperer/config.env
$SCRIPT_NAME --dry-run --test-line "FATAL: database error"
$SCRIPT_NAME --config /etc/logwhisperer/config.env
EOF
}
# ============================================================================
# GESTIONE CONFIGURAZIONE
# ============================================================================
load_config() {
if [[ -f "$CONFIG_FILE" ]]; then
# shellcheck source=/dev/null
source "$CONFIG_FILE"
fi
}
validate_config() {
local errors=0
if [[ -z "${WEBHOOK_URL:-}" ]]; then
log_error "WEBHOOK_URL non configurato"
errors=$((errors + 1))
fi
if [[ -z "${CLIENT_ID:-}" ]]; then
log_error "CLIENT_ID non configurato"
errors=$((errors + 1))
fi
if [[ -z "${LOG_SOURCES:-}" ]]; then
log_error "LOG_SOURCES non configurato"
errors=$((errors + 1))
fi
if [[ $errors -gt 0 ]]; then
return 1
fi
return 0
}
# ============================================================================
# PATTERN MATCHING
# ============================================================================
match_pattern() {
local line="$1"
local pattern
for pattern in "${PATTERNS[@]}"; do
if echo "$line" | grep -qi "$pattern"; then
echo "$pattern"
return 0
fi
done
return 1
}
get_severity() {
local pattern="$1"
case "$pattern" in
"FATAL"|"OOM"|"Out of memory"|"segfault")
echo "critical"
;;
"ERROR"|"disk full"|"No space left on device")
echo "medium"
;;
*)
echo "low"
;;
esac
}
# ============================================================================
# PAYLOAD JSON
# ============================================================================
build_payload() {
local source="$1"
local raw_log="$2"
local matched_pattern="$3"
local severity
severity=$(get_severity "$matched_pattern")
# Escape JSON special characters
raw_log=$(echo "$raw_log" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g')
cat <<EOF
{
"client_id": "$CLIENT_ID",
"hostname": "$(hostname)",
"source": "$source",
"severity": "$severity",
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"raw_log": "$raw_log",
"matched_pattern": "$matched_pattern"
}
EOF
}
# ============================================================================
# DISPATCH WEBHOOK
# ============================================================================
dispatch_webhook() {
local payload="$1"
if [[ -z "$WEBHOOK_URL" ]]; then
log_error "WEBHOOK_URL non configurato"
return 1
fi
local response
local http_code
response=$(curl -s -w "\n%{http_code}" \
-X POST \
-H "Content-Type: application/json" \
-d "$payload" \
"$WEBHOOK_URL" 2>/dev/null) || {
log_error "Impossibile connettersi al webhook"
return 1
}
http_code=$(echo "$response" | tail -n1)
if [[ "$http_code" =~ ^2[0-9]{2}$ ]]; then
return 0
else
log_error "Webhook ha restituito HTTP $http_code"
return 1
fi
}
# ============================================================================
# TEST MODE
# ============================================================================
test_line() {
local line="$1"
local source="${2:-/var/log/syslog}"
local matched_pattern
matched_pattern=$(match_pattern "$line") || {
echo "No match"
return 0
}
# Stampa solo il JSON su stdout per permettere parsing
build_payload "$source" "$line" "$matched_pattern"
return 0
}
# ============================================================================
# MAIN LOOP
# ============================================================================
main_loop() {
log_info "Avvio LogWhisperer Agent v$VERSION"
log_info "Client ID: $CLIENT_ID"
log_info "Log sources: $LOG_SOURCES"
log_info "Webhook URL: $WEBHOOK_URL"
# Array per tracciare ultimi alert (rate limiting)
declare -A last_alert
# Converte LOG_SOURCES in array
IFS=',' read -ra SOURCES <<< "$LOG_SOURCES"
# Inizializza offset directory
mkdir -p "$OFFSET_DIR"
while true; do
for source in "${SOURCES[@]}"; do
# Rimuovi spazi
source=$(echo "$source" | xargs)
if [[ ! -r "$source" ]]; then
log_debug "File non leggibile: $source"
continue
fi
# Offset file per questa sorgente
local offset_file="$OFFSET_DIR/$(echo "$source" | tr '/' '_').offset"
local last_pos=0
if [[ -f "$offset_file" ]]; then
last_pos=$(cat "$offset_file")
fi
# Ottieni dimensione attuale
local current_size
current_size=$(stat -c%s "$source" 2>/dev/null || echo 0)
# Se il file è stato troncato o ruotato
if [[ $current_size -lt $last_pos ]]; then
last_pos=0
fi
# Leggi nuove righe
tail -c +$((last_pos + 1)) "$source" 2>/dev/null | while IFS= read -r line; do
# Trunca se troppo lunga
if [[ ${#line} -gt $MAX_LINE_LENGTH ]]; then
line="${line:0:$MAX_LINE_LENGTH}..."
fi
local matched_pattern
if matched_pattern=$(match_pattern "$line"); then
local now
now=$(date +%s)
local source_key="$source:$matched_pattern"
# Rate limiting
if [[ -n "${last_alert[$source_key]:-}" ]]; then
local last_time=${last_alert[$source_key]}
if [[ $((now - last_time)) -lt $RATE_LIMIT ]]; then
log_debug "Rate limited: $source_key"
continue
fi
fi
log_info "Rilevato: $matched_pattern in $source"
local payload
payload=$(build_payload "$source" "$line" "$matched_pattern")
if dispatch_webhook "$payload"; then
last_alert[$source_key]=$now
log_debug "Alert inviato con successo"
else
log_error "Fallimento invio alert"
fi
fi
done
# Salva nuova posizione
echo "$current_size" > "$offset_file"
done
sleep "$POLL_INTERVAL"
done
}
# ============================================================================
# PARSING ARGOMENTI
# ============================================================================
DRY_RUN=0
VALIDATE=0
TEST_LINE=""
TEST_SOURCE="/var/log/syslog"
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
CONFIG_FILE="$2"
shift 2
;;
-d|--dry-run)
DRY_RUN=1
shift
;;
-t|--test-line)
TEST_LINE="$2"
shift 2
;;
-s|--test-source)
TEST_SOURCE="$2"
shift 2
;;
-v|--validate)
VALIDATE=1
shift
;;
--debug)
DEBUG=1
shift
;;
-h|--help)
usage
exit 0
;;
--version)
echo "$SCRIPT_NAME v$VERSION"
exit 0
;;
*)
log_error "Opzione sconosciuta: $1"
usage
exit 1
;;
esac
done
# ============================================================================
# MAIN
# ============================================================================
main() {
# Carica configurazione
load_config
# Modalità validazione
if [[ $VALIDATE -eq 1 ]]; then
if validate_config; then
log_info "Configurazione valida"
exit 0
else
exit 1
fi
fi
# Modalità test linea
if [[ -n "$TEST_LINE" ]]; then
if [[ $DRY_RUN -eq 1 ]]; then
test_line "$TEST_LINE" "$TEST_SOURCE"
exit 0
fi
fi
# Verifica configurazione prima di avviare
if ! validate_config; then
log_error "Configurazione invalida. Usa --validate per dettagli."
exit 1
fi
# Modalità dry-run senza test line
if [[ $DRY_RUN -eq 1 ]]; then
log_info "Modalità dry-run: nessun webhook verrà inviato"
fi
# Avvia loop principale
main_loop
}
main "$@"