+6
-6
nix/vm.nix
+6
-6
nix/vm.nix
···
43
guest.port = 6000;
44
}
45
# spindle
46
-
{
47
-
from = "host";
48
-
host.port = 6555;
49
-
guest.port = 6555;
50
-
}
51
];
52
sharedDirectories = {
53
# We can't use the 9p mounts directly for most of these
···
83
};
84
};
85
services.tangled-spindle = {
86
-
enable = true;
87
server = {
88
owner = envVar "TANGLED_VM_SPINDLE_OWNER";
89
hostname = "localhost:6555";
···
43
guest.port = 6000;
44
}
45
# spindle
46
+
# {
47
+
# from = "host";
48
+
# host.port = 6555;
49
+
# guest.port = 6555;
50
+
# }
51
];
52
sharedDirectories = {
53
# We can't use the 9p mounts directly for most of these
···
83
};
84
};
85
services.tangled-spindle = {
86
+
enable = false;
87
server = {
88
owner = envVar "TANGLED_VM_SPINDLE_OWNER";
89
hostname = "localhost:6555";
+90
-26
spindle/engines/nixery/engine.go
+90
-26
spindle/engines/nixery/engine.go
···
173
func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) error {
174
e.l.Info("setting up workflow", "workflow", wid)
175
176
-
_, err := e.docker.NetworkCreate(ctx, networkName(wid), network.CreateOptions{
177
-
Driver: "bridge",
178
-
})
179
-
if err != nil {
180
-
return err
181
-
}
182
-
e.registerCleanup(wid, func(ctx context.Context) error {
183
-
return e.docker.NetworkRemove(ctx, networkName(wid))
184
-
})
185
-
186
addl := wf.Data.(addlFields)
187
188
reader, err := e.docker.ImagePull(ctx, addl.image, image.PullOptions{})
···
193
}
194
defer reader.Close()
195
io.Copy(os.Stdout, reader)
196
197
resp, err := e.docker.ContainerCreate(ctx, &container.Config{
198
Image: addl.image,
···
294
for _, s := range secrets {
295
workflowEnvs.AddEnv(s.Key, s.Value)
296
}
297
-
298
step := w.Steps[idx].(Step)
299
-
300
select {
301
case <-ctx.Done():
302
return ctx.Err()
303
default:
304
}
305
-
306
envs := append(EnvVars(nil), workflowEnvs...)
307
for k, v := range step.environment {
308
envs.AddEnv(k, v)
309
}
310
envs.AddEnv("HOME", homeDir)
311
312
mkExecResp, err := e.docker.ContainerExecCreate(ctx, addl.container, container.ExecOptions{
313
Cmd: []string{"bash", "-c", step.command},
···
327
328
select {
329
case <-tailDone:
330
-
331
case <-ctx.Done():
332
// cleanup will be handled by DestroyWorkflow, since
333
// Docker doesn't provide an API to kill an exec run
334
// (sure, we could grab the PID and kill it ourselves,
335
// but that's wasted effort)
336
e.l.Warn("step timed out", "step", step.Name)
337
-
338
<-tailDone
339
-
340
return engine.ErrTimedOut
341
}
342
···
346
default:
347
}
348
349
-
execInspectResp, err := e.docker.ContainerExecInspect(ctx, mkExecResp.ID)
350
if err != nil {
351
return err
352
}
353
354
-
if execInspectResp.ExitCode != 0 {
355
-
inspectResp, err := e.docker.ContainerInspect(ctx, addl.container)
356
-
if err != nil {
357
-
return err
358
}
359
360
-
e.l.Error("workflow failed!", "workflow_id", wid.String(), "exit_code", execInspectResp.ExitCode, "oom_killed", inspectResp.State.OOMKilled)
361
362
-
if inspectResp.State.OOMKilled {
363
-
return ErrOOMKilled
364
}
365
-
return engine.ErrWorkflowFailed
366
}
367
368
-
return nil
369
}
370
371
func (e *Engine) tailStep(ctx context.Context, wfLogger *models.WorkflowLogger, execID string, wid models.WorkflowId, stepIdx int, step models.Step) error {
···
173
func (e *Engine) SetupWorkflow(ctx context.Context, wid models.WorkflowId, wf *models.Workflow) error {
174
e.l.Info("setting up workflow", "workflow", wid)
175
176
addl := wf.Data.(addlFields)
177
178
reader, err := e.docker.ImagePull(ctx, addl.image, image.PullOptions{})
···
183
}
184
defer reader.Close()
185
io.Copy(os.Stdout, reader)
186
+
187
+
_, err = e.docker.NetworkCreate(ctx, networkName(wid), network.CreateOptions{
188
+
Driver: "bridge",
189
+
})
190
+
if err != nil {
191
+
return err
192
+
}
193
+
e.registerCleanup(wid, func(ctx context.Context) error {
194
+
return e.docker.NetworkRemove(ctx, networkName(wid))
195
+
})
196
197
resp, err := e.docker.ContainerCreate(ctx, &container.Config{
198
Image: addl.image,
···
294
for _, s := range secrets {
295
workflowEnvs.AddEnv(s.Key, s.Value)
296
}
297
step := w.Steps[idx].(Step)
298
select {
299
case <-ctx.Done():
300
return ctx.Err()
301
default:
302
}
303
envs := append(EnvVars(nil), workflowEnvs...)
304
for k, v := range step.environment {
305
envs.AddEnv(k, v)
306
}
307
envs.AddEnv("HOME", homeDir)
308
+
309
+
e.l.Info("executing step",
310
+
"workflow_id", wid.String(),
311
+
"step_index", idx,
312
+
"step_name", step.Name,
313
+
"command", step.command,
314
+
)
315
316
mkExecResp, err := e.docker.ContainerExecCreate(ctx, addl.container, container.ExecOptions{
317
Cmd: []string{"bash", "-c", step.command},
···
331
332
select {
333
case <-tailDone:
334
case <-ctx.Done():
335
// cleanup will be handled by DestroyWorkflow, since
336
// Docker doesn't provide an API to kill an exec run
337
// (sure, we could grab the PID and kill it ourselves,
338
// but that's wasted effort)
339
e.l.Warn("step timed out", "step", step.Name)
340
<-tailDone
341
return engine.ErrTimedOut
342
}
343
···
347
default:
348
}
349
350
+
if err = e.handleStepFailure(ctx, wid, w, idx, mkExecResp.ID); err != nil {
351
+
return err
352
+
}
353
+
354
+
e.l.Info("step completed successfully",
355
+
"workflow_id", wid.String(),
356
+
"step_index", idx,
357
+
"step_name", step.Name,
358
+
)
359
+
360
+
return nil
361
+
}
362
+
363
+
// logStepFailure logs detailed information about a failed workflow step
364
+
func (e *Engine) handleStepFailure(
365
+
ctx context.Context,
366
+
wid models.WorkflowId,
367
+
w *models.Workflow,
368
+
idx int,
369
+
execID string,
370
+
) error {
371
+
addl := w.Data.(addlFields)
372
+
step := w.Steps[idx].(Step)
373
+
374
+
inspectResp, err := e.docker.ContainerInspect(ctx, addl.container)
375
if err != nil {
376
return err
377
}
378
379
+
execInspectResp, err := e.docker.ContainerExecInspect(ctx, execID)
380
+
if err != nil {
381
+
return err
382
+
}
383
+
384
+
// no error
385
+
if execInspectResp.ExitCode == 0 {
386
+
return nil
387
+
}
388
+
389
+
logFields := []any{
390
+
"workflow_id", wid.String(),
391
+
"step_index", idx,
392
+
"step_name", step.Name,
393
+
"command", step.command,
394
+
"container_exit_code", inspectResp.State.ExitCode,
395
+
"container_oom_killed", inspectResp.State.OOMKilled,
396
+
"exec_exit_code", execInspectResp.ExitCode,
397
+
}
398
+
399
+
// Add container state information
400
+
if inspectResp.State != nil {
401
+
logFields = append(logFields,
402
+
"container_status", inspectResp.State.Status,
403
+
"container_running", inspectResp.State.Running,
404
+
"container_paused", inspectResp.State.Paused,
405
+
"container_restarting", inspectResp.State.Restarting,
406
+
"container_dead", inspectResp.State.Dead,
407
+
)
408
+
409
+
if inspectResp.State.Error != "" {
410
+
logFields = append(logFields, "container_error", inspectResp.State.Error)
411
}
412
413
+
if inspectResp.State.StartedAt != "" {
414
+
logFields = append(logFields, "container_started_at", inspectResp.State.StartedAt)
415
+
}
416
417
+
if inspectResp.State.FinishedAt != "" {
418
+
logFields = append(logFields, "container_finished_at", inspectResp.State.FinishedAt)
419
}
420
}
421
422
+
// Add resource usage if available
423
+
if inspectResp.HostConfig != nil && inspectResp.HostConfig.Memory > 0 {
424
+
logFields = append(logFields, "memory_limit", inspectResp.HostConfig.Memory)
425
+
}
426
+
427
+
e.l.Error("workflow step failed!", logFields...)
428
+
429
+
if inspectResp.State.OOMKilled {
430
+
return ErrOOMKilled
431
+
}
432
+
return engine.ErrWorkflowFailed
433
}
434
435
func (e *Engine) tailStep(ctx context.Context, wfLogger *models.WorkflowLogger, execID string, wid models.WorkflowId, stepIdx int, step models.Step) error {