tangled
alpha
login
or
join now
julien.rbrt.fr
/
servmon
0
fork
atom
kiss server monitoring tool with email alerts
go
monitoring
0
fork
atom
overview
issues
pulls
pipelines
fix: properly support journalctl priority
julien.rbrt.fr
2 months ago
aada268d
e0622d0e
1/1
ci.yml
success
18s
+11
-20
4 changed files
expand all
collapse all
unified
split
.servmon.example.yaml
internal
config
config.go
monitor
monitor.go
readme.md
+1
-5
.servmon.example.yaml
···
25
25
check_interval: 5m0s
26
26
lookback_period: 5m0s
27
27
error_threshold: 10
28
28
-
priorities:
29
29
-
- err
30
30
-
- crit
31
31
-
- alert
32
32
-
- emerg
28
28
+
priority: err
33
29
cooldown: 30m0s
34
30
reboot:
35
31
enabled: true
+4
-4
internal/config/config.go
···
52
52
CheckInterval time.Duration `yaml:"check_interval"`
53
53
LookbackPeriod time.Duration `yaml:"lookback_period"`
54
54
ErrorThreshold int `yaml:"error_threshold"`
55
55
-
Priorities []string `yaml:"priorities"` // err, crit, alert, emerg
55
55
+
Priority string `yaml:"priority"` // err, crit, alert, emerg
56
56
Cooldown time.Duration `yaml:"cooldown"`
57
57
}
58
58
···
119
119
CheckInterval: 5 * time.Minute,
120
120
LookbackPeriod: 5 * time.Minute,
121
121
ErrorThreshold: 10,
122
122
-
Priorities: []string{"err", "crit", "alert", "emerg"},
122
122
+
Priority: "err",
123
123
Cooldown: 30 * time.Minute,
124
124
},
125
125
Reboot: RebootConfig{
···
233
233
if c.AlertThresholds.Journalctl.ErrorThreshold <= 0 {
234
234
return fmt.Errorf("journalctl error threshold must be positive")
235
235
}
236
236
-
if len(c.AlertThresholds.Journalctl.Priorities) == 0 {
237
237
-
return fmt.Errorf("journalctl priorities cannot be empty")
236
236
+
if c.AlertThresholds.Journalctl.Priority == "" {
237
237
+
return fmt.Errorf("journalctl priority cannot be empty")
238
238
}
239
239
if c.AlertThresholds.Journalctl.Cooldown <= 0 {
240
240
return fmt.Errorf("journalctl cooldown must be positive")
+5
-6
internal/monitor/monitor.go
···
357
357
// MonitorJournalctl monitors systemd journal logs for errors and critical messages
358
358
func (m *Monitor) MonitorJournalctl(ctx context.Context) {
359
359
cfg := m.config.AlertThresholds.Journalctl
360
360
-
log.Printf("📊 Journalctl Monitor: error_threshold=%d, interval=%v, lookback=%v, priorities=%v, cooldown=%v",
361
361
-
cfg.ErrorThreshold, cfg.CheckInterval, cfg.LookbackPeriod, cfg.Priorities, cfg.Cooldown)
360
360
+
log.Printf("📊 Journalctl Monitor: error_threshold=%d, interval=%v, lookback=%v, priority=%v, cooldown=%v",
361
361
+
cfg.ErrorThreshold, cfg.CheckInterval, cfg.LookbackPeriod, cfg.Priority, cfg.Cooldown)
362
362
363
363
alertCooldown := time.NewTimer(0)
364
364
<-alertCooldown.C
···
375
375
}
376
376
377
377
// Build journalctl command with priority filters
378
378
-
priorityArgs := strings.Join(cfg.Priorities, ",")
379
378
sinceArg := fmt.Sprintf("%dm ago", int(cfg.LookbackPeriod.Minutes()))
380
379
381
380
cmd := exec.CommandContext(ctx, "journalctl", "-x", "-e",
382
381
"--since", sinceArg,
383
383
-
"-p", priorityArgs,
382
382
+
"-p", cfg.Priority,
384
383
"--no-pager",
385
384
"-o", "short-precise")
386
385
387
386
output, err := cmd.CombinedOutput()
388
387
if err != nil {
389
389
-
log.Printf("Error running journalctl: %v", err)
388
388
+
log.Printf("Error running journalctl: %v. Output: %s", err, string(output))
390
389
continue
391
390
}
392
391
···
459
458
a.WithMetadata("critical_count", criticalCount)
460
459
a.WithMetadata("threshold", cfg.ErrorThreshold)
461
460
a.WithMetadata("lookback_period", cfg.LookbackPeriod.String())
462
462
-
a.WithMetadata("priorities", strings.Join(cfg.Priorities, ", "))
461
461
+
a.WithMetadata("priority", cfg.Priority)
463
462
464
463
if len(topErrors) > 0 {
465
464
a.WithMetadata("top_errors", strings.Join(topErrors, " | "))
+1
-5
readme.md
···
56
56
check_interval: 5m0s
57
57
lookback_period: 5m0s
58
58
error_threshold: 10
59
59
-
priorities:
60
60
-
- err
61
61
-
- crit
62
62
-
- alert
63
63
-
- emerg
59
59
+
priority: err
64
60
cooldown: 30m0s
65
61
66
62
reboot: