323 lines
8.2 KiB
Go
323 lines
8.2 KiB
Go
|
|
package supervisor_test
|
||
|
|
|
||
|
|
import (
|
||
|
|
"bufio"
|
||
|
|
"errors"
|
||
|
|
"fmt"
|
||
|
|
"io"
|
||
|
|
"os"
|
||
|
|
"os/exec"
|
||
|
|
"os/signal"
|
||
|
|
"strconv"
|
||
|
|
"strings"
|
||
|
|
"sync"
|
||
|
|
"syscall"
|
||
|
|
"testing"
|
||
|
|
"time"
|
||
|
|
|
||
|
|
"kode.naiv.no/olemd/forgejo-mcp-broker/internal/supervisor"
|
||
|
|
)
|
||
|
|
|
||
|
|
// TestMain implements the helper-process pattern: when invoked with
|
||
|
|
// FJMCP_SUPERVISOR_HELPER set, the test binary acts as a child instead of
|
||
|
|
// running tests. This avoids needing a separate helper binary or shell
|
||
|
|
// dependency.
|
||
|
|
func TestMain(m *testing.M) {
|
||
|
|
if mode := os.Getenv("FJMCP_SUPERVISOR_HELPER"); mode != "" {
|
||
|
|
runHelper(mode)
|
||
|
|
return
|
||
|
|
}
|
||
|
|
os.Exit(m.Run())
|
||
|
|
}
|
||
|
|
|
||
|
|
func runHelper(mode string) {
|
||
|
|
switch mode {
|
||
|
|
case "echo":
|
||
|
|
// Echo each stdin line back to stdout.
|
||
|
|
s := bufio.NewScanner(os.Stdin)
|
||
|
|
for s.Scan() {
|
||
|
|
fmt.Println(s.Text())
|
||
|
|
}
|
||
|
|
os.Exit(0)
|
||
|
|
|
||
|
|
case "stderr_at_startup":
|
||
|
|
// Print N lines to stderr at startup, then echo loop.
|
||
|
|
n, _ := strconv.Atoi(os.Getenv("FJMCP_HELPER_N"))
|
||
|
|
if n == 0 {
|
||
|
|
n = 3
|
||
|
|
}
|
||
|
|
for i := 1; i <= n; i++ {
|
||
|
|
fmt.Fprintf(os.Stderr, "stderr line %d\n", i)
|
||
|
|
}
|
||
|
|
s := bufio.NewScanner(os.Stdin)
|
||
|
|
for s.Scan() {
|
||
|
|
fmt.Println(s.Text())
|
||
|
|
}
|
||
|
|
os.Exit(0)
|
||
|
|
|
||
|
|
case "ignore_term":
|
||
|
|
// Install SIGTERM handler that swallows the signal, announce
|
||
|
|
// readiness on stdout (so tests have a sync barrier — the parent
|
||
|
|
// must not send SIGTERM before the handler is in place), then
|
||
|
|
// sleep until SIGKILL.
|
||
|
|
sig := make(chan os.Signal, 1)
|
||
|
|
signal.Notify(sig, syscall.SIGTERM)
|
||
|
|
go func() {
|
||
|
|
for range sig { /* ignore */
|
||
|
|
}
|
||
|
|
}()
|
||
|
|
fmt.Println("ready")
|
||
|
|
time.Sleep(60 * time.Second)
|
||
|
|
os.Exit(0)
|
||
|
|
|
||
|
|
case "exit_zero":
|
||
|
|
os.Exit(0)
|
||
|
|
|
||
|
|
case "exit_nonzero":
|
||
|
|
os.Exit(7)
|
||
|
|
|
||
|
|
default:
|
||
|
|
fmt.Fprintf(os.Stderr, "unknown helper mode %q\n", mode)
|
||
|
|
os.Exit(2)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// helperCmd returns Cmd args + Env that re-exec the test binary as a helper
|
||
|
|
// in the given mode. -test.run=^$ skips all tests (we just want runHelper).
|
||
|
|
func helperCmd(mode string, extraEnv ...string) ([]string, []string) {
|
||
|
|
cmd := []string{os.Args[0], "-test.run=^$"}
|
||
|
|
env := append([]string{"FJMCP_SUPERVISOR_HELPER=" + mode}, extraEnv...)
|
||
|
|
return cmd, env
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestStart_RequiresCmd(t *testing.T) {
|
||
|
|
_, err := supervisor.Start(t.Context(), supervisor.Config{})
|
||
|
|
if err == nil || !strings.Contains(err.Error(), "Cmd is required") {
|
||
|
|
t.Errorf("want Cmd-required error, got %v", err)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestStart_BadBinary(t *testing.T) {
|
||
|
|
_, err := supervisor.Start(t.Context(), supervisor.Config{
|
||
|
|
Cmd: []string{"/this/path/does/not/exist"},
|
||
|
|
})
|
||
|
|
if err == nil {
|
||
|
|
t.Fatal("expected error for missing binary")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestEcho_RoundTrip(t *testing.T) {
|
||
|
|
cmd, env := helperCmd("echo")
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
defer c.Stop(t.Context())
|
||
|
|
|
||
|
|
if _, err := io.WriteString(c.Stdin, "hello-world\n"); err != nil {
|
||
|
|
t.Fatalf("write: %v", err)
|
||
|
|
}
|
||
|
|
line, err := c.Stdout.ReadString('\n')
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("read: %v", err)
|
||
|
|
}
|
||
|
|
if got := strings.TrimRight(line, "\n"); got != "hello-world" {
|
||
|
|
t.Errorf("read %q, want hello-world", got)
|
||
|
|
}
|
||
|
|
|
||
|
|
if c.Pid() == 0 {
|
||
|
|
t.Error("Pid should be non-zero while child is running")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestStderr_LinesDelivered(t *testing.T) {
|
||
|
|
var (
|
||
|
|
mu sync.Mutex
|
||
|
|
lines []string
|
||
|
|
)
|
||
|
|
cmd, env := helperCmd("stderr_at_startup", "FJMCP_HELPER_N=4")
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{
|
||
|
|
Cmd: cmd,
|
||
|
|
Env: env,
|
||
|
|
OnStderr: func(line string) {
|
||
|
|
mu.Lock()
|
||
|
|
lines = append(lines, line)
|
||
|
|
mu.Unlock()
|
||
|
|
},
|
||
|
|
})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
defer c.Stop(t.Context())
|
||
|
|
|
||
|
|
// Helper drops the four stderr lines at startup; close stdin so it exits.
|
||
|
|
_ = c.Stdin.Close()
|
||
|
|
select {
|
||
|
|
case <-c.Done():
|
||
|
|
case <-time.After(3 * time.Second):
|
||
|
|
t.Fatal("child did not exit after stdin close")
|
||
|
|
}
|
||
|
|
|
||
|
|
mu.Lock()
|
||
|
|
defer mu.Unlock()
|
||
|
|
if len(lines) != 4 {
|
||
|
|
t.Fatalf("collected %d stderr lines, want 4: %v", len(lines), lines)
|
||
|
|
}
|
||
|
|
for i, want := range []string{"stderr line 1", "stderr line 2", "stderr line 3", "stderr line 4"} {
|
||
|
|
if lines[i] != want {
|
||
|
|
t.Errorf("stderr[%d] = %q, want %q", i, lines[i], want)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestStop_GracefulOnSIGTERM(t *testing.T) {
|
||
|
|
cmd, env := helperCmd("echo")
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
start := time.Now()
|
||
|
|
if err := c.Stop(t.Context()); err != nil {
|
||
|
|
// Echo exits cleanly on stdin close; ExitErr should be nil. Some
|
||
|
|
// platforms report SIGTERM as an error if the helper got the signal
|
||
|
|
// before stdin EOF reached it — accept either.
|
||
|
|
t.Logf("Stop returned: %v (acceptable)", err)
|
||
|
|
}
|
||
|
|
if elapsed := time.Since(start); elapsed > 2*time.Second {
|
||
|
|
t.Errorf("Stop took %s, want <2s for a SIGTERM-friendly child", elapsed)
|
||
|
|
}
|
||
|
|
// Done must be closed by now.
|
||
|
|
select {
|
||
|
|
case <-c.Done():
|
||
|
|
default:
|
||
|
|
t.Error("Done should be closed after Stop returns")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestStop_EscalatesToSIGKILL(t *testing.T) {
|
||
|
|
cmd, env := helperCmd("ignore_term")
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{
|
||
|
|
Cmd: cmd,
|
||
|
|
Env: env,
|
||
|
|
StopGrace: 200 * time.Millisecond,
|
||
|
|
})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
|
||
|
|
// Wait for the helper to confirm its SIGTERM handler is installed.
|
||
|
|
// Without this, SIGTERM races signal.Notify and kills the process
|
||
|
|
// outright — the test would then mis-conclude that escalation worked
|
||
|
|
// when actually graceful exit happened.
|
||
|
|
line, err := c.Stdout.ReadString('\n')
|
||
|
|
if err != nil || strings.TrimSpace(line) != "ready" {
|
||
|
|
t.Fatalf("helper readiness sync failed: line=%q err=%v", line, err)
|
||
|
|
}
|
||
|
|
|
||
|
|
start := time.Now()
|
||
|
|
err = c.Stop(t.Context())
|
||
|
|
elapsed := time.Since(start)
|
||
|
|
|
||
|
|
// SIGKILL'd processes report a non-nil exit error.
|
||
|
|
if err == nil {
|
||
|
|
t.Error("expected non-nil exit error after SIGKILL escalation")
|
||
|
|
}
|
||
|
|
// Stop should return a tick or two after the grace period — not, say, 60s.
|
||
|
|
if elapsed > 2*time.Second {
|
||
|
|
t.Errorf("Stop took %s, want fast escalation past grace", elapsed)
|
||
|
|
}
|
||
|
|
if elapsed < 150*time.Millisecond {
|
||
|
|
t.Errorf("Stop took only %s, escalated before grace?", elapsed)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestStop_IsIdempotent(t *testing.T) {
|
||
|
|
cmd, env := helperCmd("echo")
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
if err := c.Stop(t.Context()); err != nil {
|
||
|
|
t.Logf("first Stop: %v", err)
|
||
|
|
}
|
||
|
|
// Second call must not panic and must return promptly.
|
||
|
|
done := make(chan struct{})
|
||
|
|
go func() {
|
||
|
|
_ = c.Stop(t.Context())
|
||
|
|
close(done)
|
||
|
|
}()
|
||
|
|
select {
|
||
|
|
case <-done:
|
||
|
|
case <-time.After(time.Second):
|
||
|
|
t.Error("second Stop hung")
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestDone_ChildExitsCleanly(t *testing.T) {
|
||
|
|
cmd, env := helperCmd("exit_zero")
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
select {
|
||
|
|
case <-c.Done():
|
||
|
|
case <-time.After(3 * time.Second):
|
||
|
|
t.Fatal("Done did not close")
|
||
|
|
}
|
||
|
|
if err := c.ExitErr(); err != nil {
|
||
|
|
t.Errorf("ExitErr = %v, want nil for clean exit", err)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestDone_ChildExitsBadly(t *testing.T) {
|
||
|
|
cmd, env := helperCmd("exit_nonzero")
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{Cmd: cmd, Env: env})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
select {
|
||
|
|
case <-c.Done():
|
||
|
|
case <-time.After(3 * time.Second):
|
||
|
|
t.Fatal("Done did not close")
|
||
|
|
}
|
||
|
|
err = c.ExitErr()
|
||
|
|
if err == nil {
|
||
|
|
t.Fatal("ExitErr = nil, want exit error for non-zero exit")
|
||
|
|
}
|
||
|
|
var exitErr *exec.ExitError
|
||
|
|
if !errors.As(err, &exitErr) {
|
||
|
|
t.Errorf("ExitErr = %v, want *exec.ExitError", err)
|
||
|
|
} else if exitErr.ExitCode() != 7 {
|
||
|
|
t.Errorf("ExitCode = %d, want 7", exitErr.ExitCode())
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
func TestStart_AppliesEnvOverrides(t *testing.T) {
|
||
|
|
// Verify cfg.Env actually reaches the child by reading FJMCP_HELPER_N
|
||
|
|
// in the stderr_at_startup helper.
|
||
|
|
cmd, env := helperCmd("stderr_at_startup", "FJMCP_HELPER_N=2")
|
||
|
|
|
||
|
|
var collected []string
|
||
|
|
var mu sync.Mutex
|
||
|
|
c, err := supervisor.Start(t.Context(), supervisor.Config{
|
||
|
|
Cmd: cmd,
|
||
|
|
Env: env,
|
||
|
|
OnStderr: func(line string) {
|
||
|
|
mu.Lock()
|
||
|
|
collected = append(collected, line)
|
||
|
|
mu.Unlock()
|
||
|
|
},
|
||
|
|
})
|
||
|
|
if err != nil {
|
||
|
|
t.Fatalf("Start: %v", err)
|
||
|
|
}
|
||
|
|
_ = c.Stdin.Close()
|
||
|
|
<-c.Done()
|
||
|
|
|
||
|
|
mu.Lock()
|
||
|
|
defer mu.Unlock()
|
||
|
|
if len(collected) != 2 {
|
||
|
|
t.Errorf("got %d stderr lines, want 2 (env should set N=2): %v", len(collected), collected)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|