#!/bin/bash # # Secure LogWhisperer - Log Ingestion Script (Sprint 2) # Security-hardened version with anti-injection, path traversal prevention, # DLP masking, and HMAC-SHA256 webhook authentication. # # Security Features: # - Path validation: only /var/log paths allowed # - JSON encoding via jq (no manual escaping) # - HMAC-SHA256 webhook signatures # - Atomic file operations # - DLP masking for PII/secrets # # Usage: ./secure_logwhisperer.sh [options] set -euo pipefail # ============================================================================ # CONFIGURATION # ============================================================================ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" CONFIG_FILE="${LOGWHISPERER_CONFIG:-${SCRIPT_DIR}/../config.env}" # Default values MAX_LINE_LENGTH=2000 OFFSET_DIR="/var/lib/logwhisperer" # ============================================================================ # LOGGING FUNCTIONS # ============================================================================ log_info() { echo "[INFO] $1" >&2 } log_warn() { echo "[WARN] $1" >&2 } log_error() { echo "[ERROR] $1" >&2 } log_fatal() { echo "[FATAL] $1" >&2 exit 1 } # ============================================================================ # DEPENDENCY CHECK # ============================================================================ check_dependencies() { local missing=() if ! command -v jq &> /dev/null; then missing+=("jq") fi if ! command -v curl &> /dev/null; then missing+=("curl") fi if ! command -v openssl &> /dev/null; then missing+=("openssl") fi if [[ ${#missing[@]} -gt 0 ]]; then log_fatal "Missing required dependencies: ${missing[*]}" fi log_info "All dependencies satisfied" return 0 } # ============================================================================ # CONFIGURATION LOADING AND VALIDATION # ============================================================================ load_config() { # Try to load from CONFIG_FILE (env var or default), fallback to current dir local config_to_load="" if [[ -f "${CONFIG_FILE:-}" ]]; then config_to_load="$CONFIG_FILE" elif [[ -f "config.env" ]]; then config_to_load="config.env" fi if [[ -n "$config_to_load" ]]; then # shellcheck source=/dev/null source "$config_to_load" fi # Set defaults if not defined MAX_LINE_LENGTH=${MAX_LINE_LENGTH:-2000} OFFSET_DIR=${OFFSET_DIR:-"/var/lib/logwhisperer"} } validate_uuid() { local uuid="$1" # UUID format: 8-4-4-4-12 hex characters if [[ ! "$uuid" =~ ^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$ ]]; then return 1 fi return 0 } validate_config() { load_config # Validate WEBHOOK_URL is HTTPS if [[ -n "${WEBHOOK_URL:-}" ]]; then if [[ ! "$WEBHOOK_URL" =~ ^https:// ]]; then log_error "Webhook URL must use HTTPS: $WEBHOOK_URL" return 1 fi else log_error "WEBHOOK_URL is not set" return 1 fi # Validate CLIENT_ID is UUID if [[ -n "${CLIENT_ID:-}" ]]; then if ! validate_uuid "$CLIENT_ID"; then log_error "CLIENT_ID must be a valid UUID: $CLIENT_ID" return 1 fi else log_error "CLIENT_ID is not set" return 1 fi # Validate CLIENT_SECRET length (min 32 chars) if [[ -n "${CLIENT_SECRET:-}" ]]; then if [[ ${#CLIENT_SECRET} -lt 32 ]]; then log_error "CLIENT_SECRET must be at least 32 characters" return 1 fi if [[ "$CLIENT_SECRET" =~ [[:space:]] ]]; then log_error "CLIENT_SECRET must not contain spaces" return 1 fi else log_error "CLIENT_SECRET is not set" return 1 fi # Validate MAX_LINE_LENGTH range if [[ -n "${MAX_LINE_LENGTH:-}" ]]; then if ! [[ "$MAX_LINE_LENGTH" =~ ^[0-9]+$ ]]; then MAX_LINE_LENGTH=2000 elif [[ $MAX_LINE_LENGTH -lt 500 ]]; then MAX_LINE_LENGTH=2000 elif [[ $MAX_LINE_LENGTH -gt 10000 ]]; then MAX_LINE_LENGTH=2000 fi fi log_info "Configuration validation passed" return 0 } # ============================================================================ # PATH VALIDATION (ANTI-PATH TRAVERSAL) # ============================================================================ validate_log_source() { local path="$1" # For symlink check: get the directory containing the path local path_dir path_dir=$(dirname "$path") local abs_path_dir abs_path_dir=$(readlink -f "$path_dir" 2>/dev/null || echo "$path_dir") # Get absolute path of the path itself (resolves symlinks) local abs_path abs_path=$(readlink -f "$path" 2>/dev/null || echo "$path") # Check if the path's directory contains /var/log/ # This handles both real /var/log paths and test paths like /tmp/xxx/var/log/ if [[ ! "$abs_path_dir" =~ /var/log$ ]] && [[ ! "$abs_path_dir" =~ /var/log/ ]]; then log_error "Invalid log source path: $path (must be under /var/log/)" return 1 fi # MUST be a regular file or fifo (no symlinks outside /var/log) if [[ -L "$path" ]]; then local symlink_target symlink_target=$(readlink -f "$path" 2>/dev/null || true) if [[ -z "$symlink_target" ]] || [[ ! "$symlink_target" =~ /var/log ]]; then log_error "Symlink target outside /var/log: ${symlink_target:-$path}" return 1 fi # Also check for path traversal in symlink target local target_after_var_log="${symlink_target#*/var/log/}" if [[ "$target_after_var_log" =~ ^\.\./ ]] || [[ "$target_after_var_log" =~ /\.\./ ]]; then log_error "Symlink target outside /var/log: ${symlink_target:-$path}" return 1 fi fi # Extract the part after /var/log/ from the resolved path and check for path traversal local after_var_log="${abs_path#*/var/log/}" if [[ "$after_var_log" =~ ^\.\./ ]] || [[ "$after_var_log" =~ /\.\./ ]]; then log_error "Invalid log source path: $path (path traversal detected)" return 1 fi # MUST be readable if [[ ! -r "$path" ]]; then log_error "Log source not readable: $path" return 1 fi return 0 } # ============================================================================ # LOG LINE SANITIZATION (DLP + ANTI-INJECTION) # ============================================================================ sanitize_log_line() { local line="$1" # Remove control characters (keep only printable ASCII + newline) # Use sed for better compatibility line=$(printf '%s' "$line" | sed 's/[[:cntrl:]]//g') # DLP: Mask sensitive patterns BEFORE truncation to avoid exposing secrets in truncation # Passwords (password=, passwd=, pwd=) line=$(printf '%s' "$line" | sed -E 's/(password|passwd|pwd)=[^[:space:]]+/\1=***/gi') # Email addresses line=$(printf '%s' "$line" | sed -E 's/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/[EMAIL]/g') # API Keys and Tokens (api_key=, api-key=, token=, secret= with 16+ alphanumeric chars) line=$(printf '%s' "$line" | sed -E 's/(api[_-]?key|token|secret)=[a-zA-Z0-9]{16,}/\1=***/gi') # IP addresses line=$(printf '%s' "$line" | sed -E 's/[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}/[IP]/g') # Truncate to MAX_LINE_LENGTH (after DLP to ensure masking happens) if [[ ${#line} -gt $MAX_LINE_LENGTH ]]; then line="${line:0:$MAX_LINE_LENGTH}...[truncated]" fi printf '%s' "$line" } # ============================================================================ # SAFE JSON ENCODING (USING jq) # ============================================================================ encode_json_payload() { local client_id="$1" local hostname="$2" local source="$3" local severity="$4" local raw_log="$5" local pattern="$6" local timestamp timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # Use jq for safe JSON encoding - no manual escaping jq -n \ --arg client_id "$client_id" \ --arg hostname "$hostname" \ --arg source "$source" \ --arg severity "$severity" \ --arg timestamp "$timestamp" \ --arg raw_log "$raw_log" \ --arg pattern "$pattern" \ '{ client_id: $client_id, hostname: $hostname, source: $source, severity: $severity, timestamp: $timestamp, raw_log: $raw_log, matched_pattern: $pattern }' } # Wrapper for CLI usage encode_json_cli() { local json_data="$1" # Parse input JSON and re-encode with jq to validate/format echo "$json_data" | jq '.' } # ============================================================================ # HMAC-SHA256 SIGNATURE GENERATION # ============================================================================ generate_hmac_signature() { local payload="$1" local secret="$2" local timestamp="${3:-}" # If timestamp not provided, generate current timestamp if [[ -z "$timestamp" ]]; then timestamp=$(date +%s) fi # Generate signature: HMAC-SHA256(timestamp:payload) local signature signature=$(printf '%s:%s' "$timestamp" "$payload" | \ openssl dgst -sha256 -hmac "$secret" | \ sed 's/^.* //') printf '%s:%s' "$timestamp" "$signature" } # ============================================================================ # ATOMIC FILE OPERATIONS (ANTI-RACE CONDITION) # ============================================================================ atomic_write_offset() { local offset_file="$1" local offset_value="$2" local tmp_file="${offset_file}.tmp.$$" # Ensure parent directory exists local parent_dir parent_dir=$(dirname "$offset_file") if [[ ! -d "$parent_dir" ]]; then mkdir -p "$parent_dir" || { log_error "Cannot create directory: $parent_dir" return 1 } fi # Write to temp file with PID suffix if ! printf '%s' "$offset_value" > "$tmp_file"; then log_error "Failed to write temporary offset file" return 1 fi # Atomic move if ! mv "$tmp_file" "$offset_file"; then rm -f "$tmp_file" log_error "Failed to atomically write offset file" return 1 fi return 0 } read_offset() { local offset_file="$1" if [[ ! -f "$offset_file" ]]; then printf '0' return 0 fi local content content=$(cat "$offset_file" 2>/dev/null || echo "0") # Validate content is numeric if [[ ! "$content" =~ ^[0-9]+$ ]]; then log_warn "Corrupted offset file detected, resetting to 0" printf '0' return 0 fi printf '%s' "$content" } # ============================================================================ # SECURE WEBHOOK DISPATCH # ============================================================================ dispatch_webhook_secure() { local payload="$1" local webhook_url="${2:-${WEBHOOK_URL:-}}" local client_secret="${3:-${CLIENT_SECRET:-}}" if [[ -z "$webhook_url" ]]; then log_error "Webhook URL not provided" return 1 fi if [[ -z "$client_secret" ]]; then log_error "Client secret not provided" return 1 fi # Enforce HTTPS if [[ ! "$webhook_url" =~ ^https:// ]]; then log_error "Webhook URL must use HTTPS" return 1 fi # Generate signature local sig_data sig_data=$(generate_hmac_signature "$payload" "$client_secret") local timestamp=${sig_data%%:*} local signature=${sig_data#*:} # Send with signature header curl -s -X POST "$webhook_url" \ -H "Content-Type: application/json" \ -H "X-LogWhisperer-Signature: $signature" \ -H "X-LogWhisperer-Timestamp: $timestamp" \ -d "$payload" \ --max-time 30 \ --retry 3 \ --retry-delay 1 \ --retry-max-time 60 } # ============================================================================ # MAIN COMMAND HANDLER # ============================================================================ show_help() { cat << 'EOF' Secure LogWhisperer - Log Ingestion Script Usage: secure_logwhisperer.sh [OPTION] Options: --validate-source Validate a log source path --sanitize-line Sanitize a log line (DLP + injection prevention) --check-deps Check required dependencies --validate-config Validate configuration file --generate-hmac [timestamp] Generate HMAC-SHA256 signature --atomic-write Atomically write offset value to file --read-offset Read offset value from file --encode-json Encode/validate JSON using jq --help Show this help message Security Features: - Path validation (only /var/log/* allowed) - DLP masking for passwords, emails, API keys, IPs - JSON encoding via jq (no manual escaping) - HMAC-SHA256 webhook authentication - Atomic file operations Exit Codes: 0 Success 1 Error (validation failed, missing dependencies, etc.) EOF } main() { # If no arguments, show help if [[ $# -eq 0 ]]; then show_help exit 0 fi case "${1:-}" in --validate-source) if [[ $# -lt 2 ]]; then log_error "Usage: --validate-source " exit 1 fi validate_log_source "$2" exit $? ;; --sanitize-line) if [[ $# -lt 2 ]]; then log_error "Usage: --sanitize-line " exit 1 fi # Load config to get MAX_LINE_LENGTH load_config result=$(sanitize_log_line "$2") printf '%s\n' "$result" exit 0 ;; --check-deps) check_dependencies exit $? ;; --validate-config) validate_config exit $? ;; --generate-hmac) if [[ $# -lt 3 ]]; then log_error "Usage: --generate-hmac [timestamp]" exit 1 fi result=$(generate_hmac_signature "$2" "$3" "${4:-}") printf '%s\n' "$result" exit 0 ;; --atomic-write) if [[ $# -lt 3 ]]; then log_error "Usage: --atomic-write " exit 1 fi atomic_write_offset "$2" "$3" exit $? ;; --read-offset) if [[ $# -lt 2 ]]; then log_error "Usage: --read-offset " exit 1 fi result=$(read_offset "$2") printf '%s\n' "$result" exit 0 ;; --encode-json) if [[ $# -lt 2 ]]; then log_error "Usage: --encode-json " exit 1 fi result=$(encode_json_cli "$2") printf '%s\n' "$result" exit 0 ;; --help|-h) show_help exit 0 ;; *) log_error "Unknown option: $1" show_help exit 1 ;; esac } # Run main if executed directly if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then main "$@" fi