kiss server monitoring tool with email alerts
go monitoring

fix: properly support journalctl priority

+11 -20
+1 -5
.servmon.example.yaml
··· 25 25 check_interval: 5m0s 26 26 lookback_period: 5m0s 27 27 error_threshold: 10 28 - priorities: 29 - - err 30 - - crit 31 - - alert 32 - - emerg 28 + priority: err 33 29 cooldown: 30m0s 34 30 reboot: 35 31 enabled: true
+4 -4
internal/config/config.go
··· 52 52 CheckInterval time.Duration `yaml:"check_interval"` 53 53 LookbackPeriod time.Duration `yaml:"lookback_period"` 54 54 ErrorThreshold int `yaml:"error_threshold"` 55 - Priorities []string `yaml:"priorities"` // err, crit, alert, emerg 55 + Priority string `yaml:"priority"` // err, crit, alert, emerg 56 56 Cooldown time.Duration `yaml:"cooldown"` 57 57 } 58 58 ··· 119 119 CheckInterval: 5 * time.Minute, 120 120 LookbackPeriod: 5 * time.Minute, 121 121 ErrorThreshold: 10, 122 - Priorities: []string{"err", "crit", "alert", "emerg"}, 122 + Priority: "err", 123 123 Cooldown: 30 * time.Minute, 124 124 }, 125 125 Reboot: RebootConfig{ ··· 233 233 if c.AlertThresholds.Journalctl.ErrorThreshold <= 0 { 234 234 return fmt.Errorf("journalctl error threshold must be positive") 235 235 } 236 - if len(c.AlertThresholds.Journalctl.Priorities) == 0 { 237 - return fmt.Errorf("journalctl priorities cannot be empty") 236 + if c.AlertThresholds.Journalctl.Priority == "" { 237 + return fmt.Errorf("journalctl priority cannot be empty") 238 238 } 239 239 if c.AlertThresholds.Journalctl.Cooldown <= 0 { 240 240 return fmt.Errorf("journalctl cooldown must be positive")
+5 -6
internal/monitor/monitor.go
··· 357 357 // MonitorJournalctl monitors systemd journal logs for errors and critical messages 358 358 func (m *Monitor) MonitorJournalctl(ctx context.Context) { 359 359 cfg := m.config.AlertThresholds.Journalctl 360 - log.Printf("📊 Journalctl Monitor: error_threshold=%d, interval=%v, lookback=%v, priorities=%v, cooldown=%v", 361 - cfg.ErrorThreshold, cfg.CheckInterval, cfg.LookbackPeriod, cfg.Priorities, cfg.Cooldown) 360 + log.Printf("📊 Journalctl Monitor: error_threshold=%d, interval=%v, lookback=%v, priority=%v, cooldown=%v", 361 + cfg.ErrorThreshold, cfg.CheckInterval, cfg.LookbackPeriod, cfg.Priority, cfg.Cooldown) 362 362 363 363 alertCooldown := time.NewTimer(0) 364 364 <-alertCooldown.C ··· 375 375 } 376 376 377 377 // Build journalctl command with priority filters 378 - priorityArgs := strings.Join(cfg.Priorities, ",") 379 378 sinceArg := fmt.Sprintf("%dm ago", int(cfg.LookbackPeriod.Minutes())) 380 379 381 380 cmd := exec.CommandContext(ctx, "journalctl", "-x", "-e", 382 381 "--since", sinceArg, 383 - "-p", priorityArgs, 382 + "-p", cfg.Priority, 384 383 "--no-pager", 385 384 "-o", "short-precise") 386 385 387 386 output, err := cmd.CombinedOutput() 388 387 if err != nil { 389 - log.Printf("Error running journalctl: %v", err) 388 + log.Printf("Error running journalctl: %v. Output: %s", err, string(output)) 390 389 continue 391 390 } 392 391 ··· 459 458 a.WithMetadata("critical_count", criticalCount) 460 459 a.WithMetadata("threshold", cfg.ErrorThreshold) 461 460 a.WithMetadata("lookback_period", cfg.LookbackPeriod.String()) 462 - a.WithMetadata("priorities", strings.Join(cfg.Priorities, ", ")) 461 + a.WithMetadata("priority", cfg.Priority) 463 462 464 463 if len(topErrors) > 0 { 465 464 a.WithMetadata("top_errors", strings.Join(topErrors, " | "))
+1 -5
readme.md
··· 56 56 check_interval: 5m0s 57 57 lookback_period: 5m0s 58 58 error_threshold: 10 59 - priorities: 60 - - err 61 - - crit 62 - - alert 63 - - emerg 59 + priority: err 64 60 cooldown: 30m0s 65 61 66 62 reboot: