From 69f475ec78ceac272afca30a12ce81cca11c6126 Mon Sep 17 00:00:00 2001 From: Luca Sacchi Ricciardi Date: Thu, 2 Apr 2026 16:09:00 +0200 Subject: [PATCH] feat(ingestion): implement log monitoring script with webhook integration - Add logwhisperer.sh script for tailing and monitoring system logs - Implement pattern matching for critical errors (FATAL, ERROR, OOM, segfault) - Add JSON payload generation with severity levels - Implement rate limiting and offset tracking per log source - Add install.sh with interactive configuration and systemd support - Create comprehensive test suite with pytest - Add technical specification documentation - Update CHANGELOG.md following Common Changelog standard All 12 tests passing. Follows Metodo Sacchi (Safety first, little often, double check). --- .gitignore | 21 ++ CHANGELOG.md | 51 ++++ docs/specs/ingestion_script.md | 126 ++++++++++ scripts/install.sh | 333 ++++++++++++++++++++++++++ scripts/logwhisperer.sh | 423 +++++++++++++++++++++++++++++++++ tests/test_logwhisperer.py | 194 +++++++++++++++ 6 files changed, 1148 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 docs/specs/ingestion_script.md create mode 100755 scripts/install.sh create mode 100755 scripts/logwhisperer.sh create mode 100644 tests/test_logwhisperer.py diff --git a/.gitignore b/.gitignore index 1d17dae..59b0212 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,22 @@ .venv +__pycache__/ +*.py[cod] +*$py.class +.Python +*.so + +# Testing +.pytest_cache/ +.coverage +htmlcov/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8d54b48 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,51 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Common Changelog](https://common-changelog.org/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.0] - 2026-04-02 + +### Added + +- feat: Implement log ingestion script (`logwhisperer.sh`) for monitoring system logs + - Monitor multiple log sources: syslog, nginx, postgresql + - Pattern matching for critical errors (FATAL, ERROR, OOM, segfault, disk full) + - JSON payload generation with severity levels (low, medium, critical) + - Rate limiting to prevent alert flooding (30s per source/pattern) + - Offset tracking for each log file to avoid reprocessing + - HTTP POST dispatch to configurable webhook with retry logic + - Dry-run mode for testing pattern matching without sending webhooks + - Configuration file support (`/etc/logwhisperer/config.env`) + - Command-line flags: `--help`, `--validate`, `--config`, `--dry-run`, `--test-line` + +- feat: Create installation script (`install.sh`) + - Interactive configuration wizard + - UUID v4 generation for CLIENT_ID + - Systemd service creation (when run as root) + - Support for both system-wide and user-local installation + - Prerequisite checking (bash, curl) + - Connectivity test to webhook URL + +- test: Add comprehensive test suite (`tests/test_logwhisperer.py`) + - Script existence and executable validation + - Configuration validation tests + - Pattern matching tests (FATAL, OOM, ERROR patterns) + - JSON payload structure validation + - Severity mapping verification + +- docs: Create technical specification for Feature 1 (Log Ingestion) + - Architecture diagram and component description + - Requirements (functional and non-functional) + - Safety guidelines (Metodo Sacchi) + - Acceptance criteria + +### Security + +- Configuration files created with restrictive permissions (600) +- No hardcoded credentials in scripts +- HTTPS validation for webhook URLs (warning for non-HTTPS) +- Read-only access to log files (no modifications) diff --git a/docs/specs/ingestion_script.md b/docs/specs/ingestion_script.md new file mode 100644 index 0000000..b6c0d5f --- /dev/null +++ b/docs/specs/ingestion_script.md @@ -0,0 +1,126 @@ +# Technical Specification - Log Ingestion Script (Feature 1) + +**Status:** Draft +**Sprint:** 1 +**Author:** Tech Lead +**Date:** 2026-04-02 + +--- + +## 1. Overview + +Uno script Bash leggero da installare sui server dei clienti che monitora i log di sistema, rileva pattern di errore critici e invia i payload via HTTP POST a un webhook n8n configurabile. + +## 2. Requisiti Funzionali + +### 2.1 Log Sources Monitorati +- `/var/log/syslog` (o `/var/log/messages` su RHEL-based) +- `/var/log/nginx/error.log` +- `/var/log/postgresql/*.log` + +### 2.2 Pattern di Errore Rilevati +- `FATAL` +- `ERROR` +- `OOM` / `Out of memory` +- `segfault` +- `disk full` / `No space left on device` +- `Connection refused` +- `Permission denied` (in contesti critici) + +### 2.3 Payload JSON (POST al Webhook) +```json +{ + "client_id": "", + "hostname": "", + "source": "/var/log/syslog", + "severity": "critical", + "timestamp": "2026-04-02T10:30:00Z", + "raw_log": "Apr 2 10:30:00 server kernel: Out of memory: Kill process 1234", + "matched_pattern": "OOM" +} +``` + +### 2.4 Configurazione +File di configurazione: `/etc/logwhisperer/config.env` +```bash +WEBHOOK_URL="https://your-n8n-instance.com/webhook/logwhisperer" +CLIENT_ID="unique-client-uuid" +LOG_SOURCES="/var/log/syslog,/var/log/nginx/error.log" +POLL_INTERVAL=5 +MAX_LINE_LENGTH=2000 +``` + +## 3. Requisiti Non Funzionali + +### 3.1 Safety First (Metodo Sacchi) +- **Read-only**: Lo script NON scrive mai sui log monitorati +- **Graceful degradation**: Se un file di log non esiste, lo salta silenziosamente +- **Rate limiting**: Max 1 alert/30s per source per evitare flood +- **No root escalation**: Funziona con permessi di lettura sui log (gruppo `adm`) + +### 3.2 Little Often +- Polling interval configurabile (default: 5s) +- Tiene traccia dell'ultima posizione letta (offset) per ogni file +- Non ricarica mai righe già processate + +### 3.3 Double Check +- Verifica connettività al webhook prima di inviare +- Retry con backoff esponenziale (max 3 tentativi) +- Log locale di debug in `/var/log/logwhisperer/debug.log` + +## 4. Architettura + +``` +┌─────────────────────────────────────────────┐ +│ LogWhisperer Agent │ +│ │ +│ ┌─────────┐ ┌──────────┐ ┌────────────┐ │ +│ │ File │ │ Pattern │ │ HTTP │ │ +│ │ Tailing │─>│ Matcher │─>│ Dispatcher │ │ +│ └─────────┘ └──────────┘ └────────────┘ │ +│ ^ | │ +│ │ ┌──────────┐ | │ +│ └─────│ Offset │<─────────┘ │ +│ │ Tracker │ (on success) │ +│ └──────────┘ │ +└─────────────────────────────────────────────┘ +``` + +## 5. Struttura File + +``` +scripts/ + logwhisperer.sh # Script principale + install.sh # Script di installazione (setup config, systemd) +tests/ + conftest.py + test_logwhisperer.py # Test Python con subprocess +docs/ + specs/ + ingestion_script.md # Questo file +``` + +## 6. Criteri di Accettazione + +- [ ] Lo script legge da almeno 2 source di log configurabili +- [ ] Rileva pattern di errore (case-insensitive) +- [ ] Invia POST JSON al webhook con payload corretto +- [ ] Gestisce retry su fallimento HTTP +- [ ] Non blocca il sistema se il webhook è down +- [ ] Test Python passano con pytest +- [ ] Script installabile con un solo comando + +## 7. Note di Sicurezza + +- Il `CLIENT_ID` è un UUID v4 generato in fase di installazione +- Il webhook URL deve essere HTTPS +- Nessuna credenziale hardcoded nello script +- I log di debug non contengono dati sensibili + +## 8. Dipendenze + +- Bash 4.0+ +- `curl` (già presente su qualsiasi server Linux) +- `date` (GNU coreutils) +- `sha256sum` o `md5sum` (per deduplicazione opzionale) +- Nessuna dipendenza Python lato server (solo lato test) diff --git a/scripts/install.sh b/scripts/install.sh new file mode 100755 index 0000000..b2dfbe1 --- /dev/null +++ b/scripts/install.sh @@ -0,0 +1,333 @@ +#!/bin/bash +# +# LogWhisperer Agent - Script di Installazione +# Installa lo script di monitoraggio log e configura l'ambiente +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +INSTALL_DIR="/usr/local/bin" +CONFIG_DIR="/etc/logwhisperer" +DATA_DIR="/var/lib/logwhisperer" +LOG_DIR="/var/log/logwhisperer" + +# Colori per output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Genera UUID v4 +generate_uuid() { + # Usa /proc/sys/kernel/random/uuid se disponibile (Linux) + if [[ -r /proc/sys/kernel/random/uuid ]]; then + cat /proc/sys/kernel/random/uuid + else + # Fallback con uuidgen o generazione manuale + if command -v uuidgen &> /dev/null; then + uuidgen + else + # Generazione manuale semplice + local hex="0123456789abcdef" + local uuid="" + for i in {0..35}; do + if [[ $i == 8 || $i == 13 || $i == 18 || $i == 23 ]]; then + uuid+="-" + elif [[ $i == 14 ]]; then + uuid+="4" + elif [[ $i == 19 ]]; then + uuid+="${hex:$((RANDOM % 4 + 8)):1}" + else + uuid+="${hex:$((RANDOM % 16)):1}" + fi + done + echo "$uuid" + fi + fi +} + +# Verifica prerequisiti +check_prerequisites() { + log_info "Verifica prerequisiti..." + + # Verifica bash + if [[ "${BASH_VERSION:-}" < "4.0" ]]; then + log_error "Richiesto Bash 4.0 o superiore" + exit 1 + fi + + # Verifica curl + if ! command -v curl &> /dev/null; then + log_error "curl non trovato. Installa curl: apt-get install curl / yum install curl" + exit 1 + fi + + # Verifica permessi root per installazione system-wide + if [[ $EUID -ne 0 ]]; then + log_warn "Esecuzione senza root. Installazione in modalità utente..." + INSTALL_DIR="$HOME/.local/bin" + CONFIG_DIR="$HOME/.config/logwhisperer" + DATA_DIR="$HOME/.local/share/logwhisperer" + LOG_DIR="$HOME/.local/log/logwhisperer" + fi + + log_info "Prerequisiti OK" +} + +# Crea directory necessarie +create_directories() { + log_info "Creazione directory..." + + mkdir -p "$CONFIG_DIR" + mkdir -p "$DATA_DIR" + mkdir -p "$LOG_DIR" + mkdir -p "$INSTALL_DIR" + + log_info "Directory create" +} + +# Configura il client +configure_client() { + log_info "Configurazione LogWhisperer..." + + local config_file="$CONFIG_DIR/config.env" + local client_id + local webhook_url + local log_sources + + # Genera CLIENT_ID + client_id=$(generate_uuid) + log_info "Client ID generato: $client_id" + + # Chiedi WEBHOOK_URL + echo "" + echo "==========================================" + echo "Configurazione Webhook" + echo "==========================================" + read -p "Inserisci l'URL del webhook n8n: " webhook_url + + if [[ -z "$webhook_url" ]]; then + log_warn "Nessun webhook URL fornito. Lo script funzionerà in modalità offline." + webhook_url="" + elif [[ ! "$webhook_url" =~ ^https:// ]]; then + log_warn "L'URL non usa HTTPS. Si consiglia di usare HTTPS per la sicurezza." + fi + + # Configura LOG_SOURCES + echo "" + echo "==========================================" + echo "Configurazione Sorgenti Log" + echo "==========================================" + + # Rileva sistema e propone defaults + local default_sources="/var/log/syslog" + if [[ -f /etc/redhat-release ]]; then + default_sources="/var/log/messages" + fi + + if [[ -f /var/log/nginx/error.log ]]; then + default_sources="$default_sources,/var/log/nginx/error.log" + fi + + if [[ -d /var/log/postgresql ]]; then + default_sources="$default_sources,/var/log/postgresql/*.log" + fi + + read -p "Sorgenti log da monitorare [$default_sources]: " log_sources + log_sources="${log_sources:-$default_sources}" + + # Chiedi POLL_INTERVAL + echo "" + read -p "Intervallo di polling in secondi [5]: " poll_interval + poll_interval="${poll_interval:-5}" + + # Scrivi configurazione + cat > "$config_file" <}" + echo "Log Sources: $log_sources" + echo "Poll Interval: ${poll_interval}s" + echo "Config File: $config_file" + echo "==========================================" +} + +# Installa lo script +install_script() { + log_info "Installazione script..." + + local source_script="$SCRIPT_DIR/logwhisperer.sh" + local target_script="$INSTALL_DIR/logwhisperer" + + if [[ ! -f "$source_script" ]]; then + log_error "Script sorgente non trovato: $source_script" + exit 1 + fi + + cp "$source_script" "$target_script" + chmod +x "$target_script" + + log_info "Script installato in: $target_script" +} + +# Crea systemd service (se root) +create_systemd_service() { + if [[ $EUID -ne 0 ]]; then + log_info "Installazione utente: salto creazione servizio systemd" + return 0 + fi + + log_info "Creazione servizio systemd..." + + local service_file="/etc/systemd/system/logwhisperer.service" + + cat > "$service_file" < /dev/null; then + # Aggiungi a PATH se necessario + if [[ $EUID -ne 0 ]]; then + log_warn "Aggiungi $INSTALL_DIR al tuo PATH:" + echo " export PATH=\"\$PATH:$INSTALL_DIR\"" + echo " oppure riavvia la shell" + fi + fi + + # Testa la configurazione + local config_file="$CONFIG_DIR/config.env" + if [[ -f "$config_file" ]]; then + # shellcheck source=/dev/null + source "$config_file" + + if [[ -n "${WEBHOOK_URL:-}" ]]; then + log_info "Test di connettività al webhook..." + if curl -s -o /dev/null -w "%{http_code}" "$WEBHOOK_URL" | grep -q "200\\|404"; then + log_info "Webhook raggiungibile" + else + log_warn "Webhook non raggiungibile. Verifica l'URL." + fi + fi + fi + + log_info "Test completato" +} + +# Mostra istruzioni finali +show_instructions() { + echo "" + echo "==========================================" + echo "Installazione Completata!" + echo "==========================================" + echo "" + echo "Comandi disponibili:" + echo " logwhisperer --help Mostra help" + echo " logwhisperer --validate Verifica configurazione" + echo " logwhisperer --dry-run --test-line 'FATAL error' Test pattern" + echo "" + + if [[ $EUID -eq 0 ]]; then + echo "Gestione servizio:" + echo " systemctl start logwhisperer Avvia servizio" + echo " systemctl stop logwhisperer Ferma servizio" + echo " systemctl status logwhisperer Stato servizio" + echo " journalctl -u logwhisperer -f Log servizio" + echo "" + fi + + echo "File importanti:" + echo " Config: $CONFIG_DIR/config.env" + echo " Logs: $LOG_DIR/" + echo " Data: $DATA_DIR/" + echo "" + echo "Prossimi passi:" + echo " 1. Verifica la configurazione: logwhisperer --validate" + echo " 2. Testa il pattern matching: logwhisperer --dry-run --test-line 'ERROR test'" + echo " 3. Avvia il monitoraggio" + + if [[ $EUID -eq 0 ]]; then + echo " systemctl start logwhisperer" + else + echo " logwhisperer --config $CONFIG_DIR/config.env" + fi + echo "" + echo "==========================================" +} + +# Main +main() { + echo "" + echo "╔══════════════════════════════════════════╗" + echo "║ LogWhisperer Agent Installer ║" + echo "║ Versione 1.0.0 ║" + echo "╚══════════════════════════════════════════╝" + echo "" + + check_prerequisites + create_directories + configure_client + install_script + create_systemd_service + test_installation + show_instructions +} + +# Se eseguito direttamente +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + main "$@" +fi diff --git a/scripts/logwhisperer.sh b/scripts/logwhisperer.sh new file mode 100755 index 0000000..fcec8b2 --- /dev/null +++ b/scripts/logwhisperer.sh @@ -0,0 +1,423 @@ +#!/bin/bash +# +# LogWhisperer Agent - Script di monitoraggio log +# Legge log di sistema, rileva errori critici e invia alert via webhook +# + +set -euo pipefail + +# ============================================================================ +# CONFIGURAZIONE DEFAULT +# ============================================================================ +SCRIPT_NAME="$(basename "$0")" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VERSION="1.0.0" + +# Default config paths +CONFIG_FILE="/etc/logwhisperer/config.env" +OFFSET_DIR="/var/lib/logwhisperer" +DEBUG_LOG="/var/log/logwhisperer/debug.log" + +# Default values +WEBHOOK_URL="" +CLIENT_ID="" +LOG_SOURCES="" +POLL_INTERVAL=5 +MAX_LINE_LENGTH=2000 + +# Error patterns (case-insensitive) +PATTERNS=( + "FATAL" + "ERROR" + "OOM" + "Out of memory" + "segfault" + "disk full" + "No space left on device" + "Connection refused" + "Permission denied" +) + +# Rate limiting (seconds) +RATE_LIMIT=30 + +# ============================================================================ +# FUNZIONI DI UTILITÀ +# ============================================================================ + +log_debug() { + if [[ "${DEBUG:-0}" == "1" ]]; then + echo "[$(date '+%Y-%m-%d %H:%M:%S')] DEBUG: $*" >&2 + fi +} + +log_error() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] ERROR: $*" >&2 +} + +log_info() { + echo "[$(date '+%Y-%m-%d %H:%M:%S')] INFO: $*" +} + +# ============================================================================ +# USAGE E HELP +# ============================================================================ + +usage() { + cat </dev/null) || { + log_error "Impossibile connettersi al webhook" + return 1 + } + + http_code=$(echo "$response" | tail -n1) + + if [[ "$http_code" =~ ^2[0-9]{2}$ ]]; then + return 0 + else + log_error "Webhook ha restituito HTTP $http_code" + return 1 + fi +} + +# ============================================================================ +# TEST MODE +# ============================================================================ + +test_line() { + local line="$1" + local source="${2:-/var/log/syslog}" + local matched_pattern + + matched_pattern=$(match_pattern "$line") || { + echo "No match" + return 0 + } + + # Stampa solo il JSON su stdout per permettere parsing + build_payload "$source" "$line" "$matched_pattern" + return 0 +} + +# ============================================================================ +# MAIN LOOP +# ============================================================================ + +main_loop() { + log_info "Avvio LogWhisperer Agent v$VERSION" + log_info "Client ID: $CLIENT_ID" + log_info "Log sources: $LOG_SOURCES" + log_info "Webhook URL: $WEBHOOK_URL" + + # Array per tracciare ultimi alert (rate limiting) + declare -A last_alert + + # Converte LOG_SOURCES in array + IFS=',' read -ra SOURCES <<< "$LOG_SOURCES" + + # Inizializza offset directory + mkdir -p "$OFFSET_DIR" + + while true; do + for source in "${SOURCES[@]}"; do + # Rimuovi spazi + source=$(echo "$source" | xargs) + + if [[ ! -r "$source" ]]; then + log_debug "File non leggibile: $source" + continue + fi + + # Offset file per questa sorgente + local offset_file="$OFFSET_DIR/$(echo "$source" | tr '/' '_').offset" + local last_pos=0 + + if [[ -f "$offset_file" ]]; then + last_pos=$(cat "$offset_file") + fi + + # Ottieni dimensione attuale + local current_size + current_size=$(stat -c%s "$source" 2>/dev/null || echo 0) + + # Se il file è stato troncato o ruotato + if [[ $current_size -lt $last_pos ]]; then + last_pos=0 + fi + + # Leggi nuove righe + tail -c +$((last_pos + 1)) "$source" 2>/dev/null | while IFS= read -r line; do + # Trunca se troppo lunga + if [[ ${#line} -gt $MAX_LINE_LENGTH ]]; then + line="${line:0:$MAX_LINE_LENGTH}..." + fi + + local matched_pattern + if matched_pattern=$(match_pattern "$line"); then + local now + now=$(date +%s) + local source_key="$source:$matched_pattern" + + # Rate limiting + if [[ -n "${last_alert[$source_key]:-}" ]]; then + local last_time=${last_alert[$source_key]} + if [[ $((now - last_time)) -lt $RATE_LIMIT ]]; then + log_debug "Rate limited: $source_key" + continue + fi + fi + + log_info "Rilevato: $matched_pattern in $source" + + local payload + payload=$(build_payload "$source" "$line" "$matched_pattern") + + if dispatch_webhook "$payload"; then + last_alert[$source_key]=$now + log_debug "Alert inviato con successo" + else + log_error "Fallimento invio alert" + fi + fi + done + + # Salva nuova posizione + echo "$current_size" > "$offset_file" + done + + sleep "$POLL_INTERVAL" + done +} + +# ============================================================================ +# PARSING ARGOMENTI +# ============================================================================ + +DRY_RUN=0 +VALIDATE=0 +TEST_LINE="" +TEST_SOURCE="/var/log/syslog" + +while [[ $# -gt 0 ]]; do + case $1 in + -c|--config) + CONFIG_FILE="$2" + shift 2 + ;; + -d|--dry-run) + DRY_RUN=1 + shift + ;; + -t|--test-line) + TEST_LINE="$2" + shift 2 + ;; + -s|--test-source) + TEST_SOURCE="$2" + shift 2 + ;; + -v|--validate) + VALIDATE=1 + shift + ;; + --debug) + DEBUG=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + --version) + echo "$SCRIPT_NAME v$VERSION" + exit 0 + ;; + *) + log_error "Opzione sconosciuta: $1" + usage + exit 1 + ;; + esac +done + +# ============================================================================ +# MAIN +# ============================================================================ + +main() { + # Carica configurazione + load_config + + # Modalità validazione + if [[ $VALIDATE -eq 1 ]]; then + if validate_config; then + log_info "Configurazione valida" + exit 0 + else + exit 1 + fi + fi + + # Modalità test linea + if [[ -n "$TEST_LINE" ]]; then + if [[ $DRY_RUN -eq 1 ]]; then + test_line "$TEST_LINE" "$TEST_SOURCE" + exit 0 + fi + fi + + # Verifica configurazione prima di avviare + if ! validate_config; then + log_error "Configurazione invalida. Usa --validate per dettagli." + exit 1 + fi + + # Modalità dry-run senza test line + if [[ $DRY_RUN -eq 1 ]]; then + log_info "Modalità dry-run: nessun webhook verrà inviato" + fi + + # Avvia loop principale + main_loop +} + +main "$@" diff --git a/tests/test_logwhisperer.py b/tests/test_logwhisperer.py new file mode 100644 index 0000000..fc3940e --- /dev/null +++ b/tests/test_logwhisperer.py @@ -0,0 +1,194 @@ +import pytest +import subprocess +import os +import json +import tempfile +import time +import signal +import threading + + +SCRIPT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "scripts", "logwhisperer.sh") + + +@pytest.fixture +def temp_log_file(): + """Create a temporary log file for testing.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f: + f.write("Apr 2 10:00:00 server kernel: Normal log line\n") + f.flush() + yield f.name + os.unlink(f.name) + + +@pytest.fixture +def temp_config(): + """Create a temporary config file for testing.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: + f.write('WEBHOOK_URL="http://localhost:9999/webhook"\n') + f.write('CLIENT_ID="test-client-uuid"\n') + f.write('LOG_SOURCES="/tmp/test.log"\n') + f.write("POLL_INTERVAL=1\n") + f.write("MAX_LINE_LENGTH=2000\n") + f.flush() + yield f.name + os.unlink(f.name) + + +class TestScriptExistence: + """Test that the script exists and is executable.""" + + def test_script_exists(self): + assert os.path.exists(SCRIPT_PATH), f"Script not found at {SCRIPT_PATH}" + + def test_script_is_executable(self): + assert os.access(SCRIPT_PATH, os.X_OK), f"Script is not executable: {SCRIPT_PATH}" + + def test_script_has_shebang(self): + with open(SCRIPT_PATH, "r") as f: + first_line = f.readline().strip() + assert first_line.startswith("#!/bin/bash"), f"Missing bash shebang, got: {first_line}" + + +class TestScriptValidation: + """Test script validation and help output.""" + + def test_script_help_flag(self): + result = subprocess.run( + [SCRIPT_PATH, "--help"], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 0 + assert "usage" in result.stdout.lower() or "help" in result.stdout.lower() + + def test_script_validate_config(self): + """Test --validate flag with a good config.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: + f.write('WEBHOOK_URL="http://localhost:9999/webhook"\n') + f.write('CLIENT_ID="test-uuid"\n') + f.write('LOG_SOURCES="/var/log/syslog"\n') + f.write("POLL_INTERVAL=5\n") + config_path = f.name + + try: + result = subprocess.run( + [SCRIPT_PATH, "--validate", "--config", config_path], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 0 + finally: + os.unlink(config_path) + + def test_script_validate_missing_config(self): + """Test --validate flag with missing config.""" + result = subprocess.run( + [SCRIPT_PATH, "--validate", "--config", "/nonexistent/config.env"], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode != 0 + + +class TestPatternMatching: + """Test that the script correctly matches error patterns.""" + + def test_detects_fatal_pattern(self, temp_log_file, temp_config): + """Test detection of FATAL pattern.""" + with open(temp_config, "w") as f: + f.write(f'WEBHOOK_URL="http://localhost:9999/webhook"\n') + f.write('CLIENT_ID="test-client-uuid"\n') + f.write(f'LOG_SOURCES="{temp_log_file}"\n') + f.write("POLL_INTERVAL=1\n") + f.write("MAX_LINE_LENGTH=2000\n") + + with open(temp_log_file, "a") as f: + f.write("Apr 2 10:30:00 server postgres[1234]: FATAL: too many connections\n") + f.flush() + + result = subprocess.run( + [SCRIPT_PATH, "--dry-run", "--config", temp_config, "--test-line", "FATAL: too many connections"], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 0 + assert "matched" in result.stdout.lower() or "FATAL" in result.stdout + + def test_detects_oom_pattern(self): + """Test detection of OOM pattern.""" + result = subprocess.run( + [SCRIPT_PATH, "--dry-run", "--test-line", "kernel: Out of memory: Kill process 1234"], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 0 + assert "OOM" in result.stdout or "matched" in result.stdout.lower() + + def test_detects_error_pattern(self): + """Test detection of ERROR pattern.""" + result = subprocess.run( + [SCRIPT_PATH, "--dry-run", "--test-line", "nginx: ERROR: connection refused"], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 0 + + def test_ignores_normal_lines(self): + """Test that normal log lines are not matched.""" + result = subprocess.run( + [SCRIPT_PATH, "--dry-run", "--test-line", "Apr 2 10:00:00 server sshd[5678]: Accepted publickey"], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 0 + assert "no match" in result.stdout.lower() or "skip" in result.stdout.lower() + + +class TestPayloadFormat: + """Test that the script generates correct JSON payload.""" + + def test_json_payload_structure(self): + """Test that dry-run outputs valid JSON with required fields.""" + result = subprocess.run( + [ + SCRIPT_PATH, + "--dry-run", + "--test-line", + "kernel: Out of memory: Kill process 1234", + "--test-source", + "/var/log/syslog", + ], + capture_output=True, + text=True, + timeout=10, + ) + assert result.returncode == 0 + + output = result.stdout.strip() + try: + payload = json.loads(output) + except json.JSONDecodeError: + pytest.fail(f"Output is not valid JSON: {output}") + + required_fields = ["client_id", "hostname", "source", "severity", "timestamp", "raw_log", "matched_pattern"] + for field in required_fields: + assert field in payload, f"Missing required field: {field}" + + def test_severity_mapping(self): + """Test that severity is correctly mapped based on pattern.""" + result = subprocess.run( + [SCRIPT_PATH, "--dry-run", "--test-line", "kernel: Out of memory: Kill process"], + capture_output=True, + text=True, + timeout=10, + ) + payload = json.loads(result.stdout.strip()) + assert payload["severity"] in ["low", "medium", "critical"]