tangled
alpha
login
or
join now
julien.rbrt.fr
/
servmon
0
fork
atom
kiss server monitoring tool with email alerts
go
monitoring
0
fork
atom
overview
issues
pulls
pipelines
fix: properly support journalctl priority
julien.rbrt.fr
2 months ago
aada268d
e0622d0e
1/1
ci.yml
success
18s
+11
-20
4 changed files
expand all
collapse all
unified
split
.servmon.example.yaml
internal
config
config.go
monitor
monitor.go
readme.md
+1
-5
.servmon.example.yaml
···
25
check_interval: 5m0s
26
lookback_period: 5m0s
27
error_threshold: 10
28
-
priorities:
29
-
- err
30
-
- crit
31
-
- alert
32
-
- emerg
33
cooldown: 30m0s
34
reboot:
35
enabled: true
···
25
check_interval: 5m0s
26
lookback_period: 5m0s
27
error_threshold: 10
28
+
priority: err
0
0
0
0
29
cooldown: 30m0s
30
reboot:
31
enabled: true
+4
-4
internal/config/config.go
···
52
CheckInterval time.Duration `yaml:"check_interval"`
53
LookbackPeriod time.Duration `yaml:"lookback_period"`
54
ErrorThreshold int `yaml:"error_threshold"`
55
-
Priorities []string `yaml:"priorities"` // err, crit, alert, emerg
56
Cooldown time.Duration `yaml:"cooldown"`
57
}
58
···
119
CheckInterval: 5 * time.Minute,
120
LookbackPeriod: 5 * time.Minute,
121
ErrorThreshold: 10,
122
-
Priorities: []string{"err", "crit", "alert", "emerg"},
123
Cooldown: 30 * time.Minute,
124
},
125
Reboot: RebootConfig{
···
233
if c.AlertThresholds.Journalctl.ErrorThreshold <= 0 {
234
return fmt.Errorf("journalctl error threshold must be positive")
235
}
236
-
if len(c.AlertThresholds.Journalctl.Priorities) == 0 {
237
-
return fmt.Errorf("journalctl priorities cannot be empty")
238
}
239
if c.AlertThresholds.Journalctl.Cooldown <= 0 {
240
return fmt.Errorf("journalctl cooldown must be positive")
···
52
CheckInterval time.Duration `yaml:"check_interval"`
53
LookbackPeriod time.Duration `yaml:"lookback_period"`
54
ErrorThreshold int `yaml:"error_threshold"`
55
+
Priority string `yaml:"priority"` // err, crit, alert, emerg
56
Cooldown time.Duration `yaml:"cooldown"`
57
}
58
···
119
CheckInterval: 5 * time.Minute,
120
LookbackPeriod: 5 * time.Minute,
121
ErrorThreshold: 10,
122
+
Priority: "err",
123
Cooldown: 30 * time.Minute,
124
},
125
Reboot: RebootConfig{
···
233
if c.AlertThresholds.Journalctl.ErrorThreshold <= 0 {
234
return fmt.Errorf("journalctl error threshold must be positive")
235
}
236
+
if c.AlertThresholds.Journalctl.Priority == "" {
237
+
return fmt.Errorf("journalctl priority cannot be empty")
238
}
239
if c.AlertThresholds.Journalctl.Cooldown <= 0 {
240
return fmt.Errorf("journalctl cooldown must be positive")
+5
-6
internal/monitor/monitor.go
···
357
// MonitorJournalctl monitors systemd journal logs for errors and critical messages
358
func (m *Monitor) MonitorJournalctl(ctx context.Context) {
359
cfg := m.config.AlertThresholds.Journalctl
360
-
log.Printf("📊 Journalctl Monitor: error_threshold=%d, interval=%v, lookback=%v, priorities=%v, cooldown=%v",
361
-
cfg.ErrorThreshold, cfg.CheckInterval, cfg.LookbackPeriod, cfg.Priorities, cfg.Cooldown)
362
363
alertCooldown := time.NewTimer(0)
364
<-alertCooldown.C
···
375
}
376
377
// Build journalctl command with priority filters
378
-
priorityArgs := strings.Join(cfg.Priorities, ",")
379
sinceArg := fmt.Sprintf("%dm ago", int(cfg.LookbackPeriod.Minutes()))
380
381
cmd := exec.CommandContext(ctx, "journalctl", "-x", "-e",
382
"--since", sinceArg,
383
-
"-p", priorityArgs,
384
"--no-pager",
385
"-o", "short-precise")
386
387
output, err := cmd.CombinedOutput()
388
if err != nil {
389
-
log.Printf("Error running journalctl: %v", err)
390
continue
391
}
392
···
459
a.WithMetadata("critical_count", criticalCount)
460
a.WithMetadata("threshold", cfg.ErrorThreshold)
461
a.WithMetadata("lookback_period", cfg.LookbackPeriod.String())
462
-
a.WithMetadata("priorities", strings.Join(cfg.Priorities, ", "))
463
464
if len(topErrors) > 0 {
465
a.WithMetadata("top_errors", strings.Join(topErrors, " | "))
···
357
// MonitorJournalctl monitors systemd journal logs for errors and critical messages
358
func (m *Monitor) MonitorJournalctl(ctx context.Context) {
359
cfg := m.config.AlertThresholds.Journalctl
360
+
log.Printf("📊 Journalctl Monitor: error_threshold=%d, interval=%v, lookback=%v, priority=%v, cooldown=%v",
361
+
cfg.ErrorThreshold, cfg.CheckInterval, cfg.LookbackPeriod, cfg.Priority, cfg.Cooldown)
362
363
alertCooldown := time.NewTimer(0)
364
<-alertCooldown.C
···
375
}
376
377
// Build journalctl command with priority filters
0
378
sinceArg := fmt.Sprintf("%dm ago", int(cfg.LookbackPeriod.Minutes()))
379
380
cmd := exec.CommandContext(ctx, "journalctl", "-x", "-e",
381
"--since", sinceArg,
382
+
"-p", cfg.Priority,
383
"--no-pager",
384
"-o", "short-precise")
385
386
output, err := cmd.CombinedOutput()
387
if err != nil {
388
+
log.Printf("Error running journalctl: %v. Output: %s", err, string(output))
389
continue
390
}
391
···
458
a.WithMetadata("critical_count", criticalCount)
459
a.WithMetadata("threshold", cfg.ErrorThreshold)
460
a.WithMetadata("lookback_period", cfg.LookbackPeriod.String())
461
+
a.WithMetadata("priority", cfg.Priority)
462
463
if len(topErrors) > 0 {
464
a.WithMetadata("top_errors", strings.Join(topErrors, " | "))
+1
-5
readme.md
···
56
check_interval: 5m0s
57
lookback_period: 5m0s
58
error_threshold: 10
59
-
priorities:
60
-
- err
61
-
- crit
62
-
- alert
63
-
- emerg
64
cooldown: 30m0s
65
66
reboot:
···
56
check_interval: 5m0s
57
lookback_period: 5m0s
58
error_threshold: 10
59
+
priority: err
0
0
0
0
60
cooldown: 30m0s
61
62
reboot: