package supervisor_test import ( "bufio" "errors" "fmt" "io" "os" "os/exec" "os/signal" "strconv" "strings" "sync" "syscall" "testing" "time" "kode.naiv.no/olemd/forgejo-mcp-broker/internal/supervisor" ) // TestMain implements the helper-process pattern: when invoked with // FJMCP_SUPERVISOR_HELPER set, the test binary acts as a child instead of // running tests. This avoids needing a separate helper binary or shell // dependency. func TestMain(m *testing.M) { if mode := os.Getenv("FJMCP_SUPERVISOR_HELPER"); mode != "" { runHelper(mode) return } os.Exit(m.Run()) } func runHelper(mode string) { switch mode { case "echo": // Echo each stdin line back to stdout. s := bufio.NewScanner(os.Stdin) for s.Scan() { fmt.Println(s.Text()) } os.Exit(0) case "stderr_at_startup": // Print N lines to stderr at startup, then echo loop. n, _ := strconv.Atoi(os.Getenv("FJMCP_HELPER_N")) if n == 0 { n = 3 } for i := 1; i <= n; i++ { fmt.Fprintf(os.Stderr, "stderr line %d\n", i) } s := bufio.NewScanner(os.Stdin) for s.Scan() { fmt.Println(s.Text()) } os.Exit(0) case "ignore_term": // Install SIGTERM handler that swallows the signal, announce // readiness on stdout (so tests have a sync barrier — the parent // must not send SIGTERM before the handler is in place), then // sleep until SIGKILL. sig := make(chan os.Signal, 1) signal.Notify(sig, syscall.SIGTERM) go func() { for range sig { /* ignore */ } }() fmt.Println("ready") time.Sleep(60 * time.Second) os.Exit(0) case "exit_zero": os.Exit(0) case "exit_nonzero": os.Exit(7) default: fmt.Fprintf(os.Stderr, "unknown helper mode %q\n", mode) os.Exit(2) } } // helperCmd returns Cmd args + Env that re-exec the test binary as a helper // in the given mode. -test.run=^$ skips all tests (we just want runHelper). func helperCmd(mode string, extraEnv ...string) ([]string, []string) { cmd := []string{os.Args[0], "-test.run=^$"} env := append([]string{"FJMCP_SUPERVISOR_HELPER=" + mode}, extraEnv...) return cmd, env } func TestStart_RequiresCmd(t *testing.T) { _, err := supervisor.Start(t.Context(), supervisor.Config{}) if err == nil || !strings.Contains(err.Error(), "Cmd is required") { t.Errorf("want Cmd-required error, got %v", err) } } func TestStart_BadBinary(t *testing.T) { _, err := supervisor.Start(t.Context(), supervisor.Config{ Cmd: []string{"/this/path/does/not/exist"}, }) if err == nil { t.Fatal("expected error for missing binary") } } func TestEcho_RoundTrip(t *testing.T) { cmd, env := helperCmd("echo") c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env}) if err != nil { t.Fatalf("Start: %v", err) } defer c.Stop(t.Context()) if _, err := io.WriteString(c.Stdin, "hello-world\n"); err != nil { t.Fatalf("write: %v", err) } line, err := c.Stdout.ReadString('\n') if err != nil { t.Fatalf("read: %v", err) } if got := strings.TrimRight(line, "\n"); got != "hello-world" { t.Errorf("read %q, want hello-world", got) } if c.Pid() == 0 { t.Error("Pid should be non-zero while child is running") } } func TestStderr_LinesDelivered(t *testing.T) { var ( mu sync.Mutex lines []string ) cmd, env := helperCmd("stderr_at_startup", "FJMCP_HELPER_N=4") c, err := supervisor.Start(t.Context(), supervisor.Config{ Cmd: cmd, Env: env, OnStderr: func(line string) { mu.Lock() lines = append(lines, line) mu.Unlock() }, }) if err != nil { t.Fatalf("Start: %v", err) } defer c.Stop(t.Context()) // Helper drops the four stderr lines at startup; close stdin so it exits. _ = c.Stdin.Close() select { case <-c.Done(): case <-time.After(3 * time.Second): t.Fatal("child did not exit after stdin close") } mu.Lock() defer mu.Unlock() if len(lines) != 4 { t.Fatalf("collected %d stderr lines, want 4: %v", len(lines), lines) } for i, want := range []string{"stderr line 1", "stderr line 2", "stderr line 3", "stderr line 4"} { if lines[i] != want { t.Errorf("stderr[%d] = %q, want %q", i, lines[i], want) } } } func TestStop_GracefulOnSIGTERM(t *testing.T) { cmd, env := helperCmd("echo") c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env}) if err != nil { t.Fatalf("Start: %v", err) } start := time.Now() if err := c.Stop(t.Context()); err != nil { // Echo exits cleanly on stdin close; ExitErr should be nil. Some // platforms report SIGTERM as an error if the helper got the signal // before stdin EOF reached it — accept either. t.Logf("Stop returned: %v (acceptable)", err) } if elapsed := time.Since(start); elapsed > 2*time.Second { t.Errorf("Stop took %s, want <2s for a SIGTERM-friendly child", elapsed) } // Done must be closed by now. select { case <-c.Done(): default: t.Error("Done should be closed after Stop returns") } } func TestStop_EscalatesToSIGKILL(t *testing.T) { cmd, env := helperCmd("ignore_term") c, err := supervisor.Start(t.Context(), supervisor.Config{ Cmd: cmd, Env: env, StopGrace: 200 * time.Millisecond, }) if err != nil { t.Fatalf("Start: %v", err) } // Wait for the helper to confirm its SIGTERM handler is installed. // Without this, SIGTERM races signal.Notify and kills the process // outright — the test would then mis-conclude that escalation worked // when actually graceful exit happened. line, err := c.Stdout.ReadString('\n') if err != nil || strings.TrimSpace(line) != "ready" { t.Fatalf("helper readiness sync failed: line=%q err=%v", line, err) } start := time.Now() err = c.Stop(t.Context()) elapsed := time.Since(start) // SIGKILL'd processes report a non-nil exit error. if err == nil { t.Error("expected non-nil exit error after SIGKILL escalation") } // Stop should return a tick or two after the grace period — not, say, 60s. if elapsed > 2*time.Second { t.Errorf("Stop took %s, want fast escalation past grace", elapsed) } if elapsed < 150*time.Millisecond { t.Errorf("Stop took only %s, escalated before grace?", elapsed) } } func TestStop_IsIdempotent(t *testing.T) { cmd, env := helperCmd("echo") c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env}) if err != nil { t.Fatalf("Start: %v", err) } if err := c.Stop(t.Context()); err != nil { t.Logf("first Stop: %v", err) } // Second call must not panic and must return promptly. done := make(chan struct{}) go func() { _ = c.Stop(t.Context()) close(done) }() select { case <-done: case <-time.After(time.Second): t.Error("second Stop hung") } } func TestDone_ChildExitsCleanly(t *testing.T) { cmd, env := helperCmd("exit_zero") c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env}) if err != nil { t.Fatalf("Start: %v", err) } select { case <-c.Done(): case <-time.After(3 * time.Second): t.Fatal("Done did not close") } if err := c.ExitErr(); err != nil { t.Errorf("ExitErr = %v, want nil for clean exit", err) } } func TestDone_ChildExitsBadly(t *testing.T) { cmd, env := helperCmd("exit_nonzero") c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env}) if err != nil { t.Fatalf("Start: %v", err) } select { case <-c.Done(): case <-time.After(3 * time.Second): t.Fatal("Done did not close") } err = c.ExitErr() if err == nil { t.Fatal("ExitErr = nil, want exit error for non-zero exit") } var exitErr *exec.ExitError if !errors.As(err, &exitErr) { t.Errorf("ExitErr = %v, want *exec.ExitError", err) } else if exitErr.ExitCode() != 7 { t.Errorf("ExitCode = %d, want 7", exitErr.ExitCode()) } } func TestStart_AppliesEnvOverrides(t *testing.T) { // Verify cfg.Env actually reaches the child by reading FJMCP_HELPER_N // in the stderr_at_startup helper. cmd, env := helperCmd("stderr_at_startup", "FJMCP_HELPER_N=2") var collected []string var mu sync.Mutex c, err := supervisor.Start(t.Context(), supervisor.Config{ Cmd: cmd, Env: env, OnStderr: func(line string) { mu.Lock() collected = append(collected, line) mu.Unlock() }, }) if err != nil { t.Fatalf("Start: %v", err) } _ = c.Stdin.Close() <-c.Done() mu.Lock() defer mu.Unlock() if len(collected) != 2 { t.Errorf("got %d stderr lines, want 2 (env should set N=2): %v", len(collected), collected) } }