feat(session): idle reaper + Forgejo token rotation (forgejo-mcp-broker-q4x)

Adds StartReaper to internal/session — two background goroutines that
keep the session map healthy under steady load.

Idle reaper:
  - Sweeps every ReapInterval (default 30s) for sessions whose
    LastActive is older than IdleTimeout (default 15m).
  - Evicts via SIGTERM through the Backend.Stop hook.

Token rotator:
  - Sweeps every RotateInterval (default 1m) for sessions whose Forgejo
    token is within RefreshLead (default 5m) of expiry.
  - Calls the operator-supplied RefreshForgejo to obtain new
    access+refresh tokens, then Respawn to mint a new Backend with the
    updated token in env.
  - Atomically swaps e.backend (now an atomic.Pointer[Backend]); the
    sid is preserved so the client just re-issues an MCP `initialize`
    on its next request rather than re-authenticating.
  - On refresh failure, evicts so the next /mcp produces a clean
    re-auth instead of carrying a stale token.

Two race fixes uncovered by -race during this work:
  - The Done-watcher started in spawnSession captured the original
    backend pointer; after rotation it still saw Done close (because
    the old backend was Stopped) and would yank the entire entry. Fixed
    by comparing watched-backend == e.backend.Load() before evicting.
  - The fakeSpawner test helper let tests read the backends slice
    without the lock the spawn callback held. Replaced with a
    spawnerControl type whose count/at/snapshot methods all lock.

Tests cover idle eviction, recently-active sessions surviving sweeps,
successful rotation+respawn (sid preserved), refresh failure → eviction,
and Stop idempotency.

Closes forgejo-mcp-broker-q4x. Phase 5 complete.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ole-Morten Duesund 2026-04-27 17:32:36 +02:00
commit 933e7bd369
5 changed files with 501 additions and 36 deletions

View file

@ -56,25 +56,57 @@ func (f *fakeBackend) backend() *session.Backend {
}
}
// fakeSpawner returns a SpawnFunc that hands out a sequence of fakeBackends.
// The returned slice is appended to as Spawn is called, so tests can
// inspect every backend that was minted.
func fakeSpawner(t *testing.T) (session.SpawnFunc, *[]*fakeBackend) {
// spawnerControl wraps a fake SpawnFunc with thread-safe access to the
// backends it has minted. Tests get raced freely without a control type
// because both spawn (called from registry goroutines) and tests access
// the slice; this lets tests query under a lock.
type spawnerControl struct {
mu sync.Mutex
backends []*fakeBackend
spawn session.SpawnFunc
}
func newSpawnerControl(t *testing.T) *spawnerControl {
t.Helper()
var (
mu sync.Mutex
backends []*fakeBackend
next int
)
spawn := func(ctx context.Context, sess *oauth.Session) (*session.Backend, error) {
mu.Lock()
defer mu.Unlock()
fb := newFakeBackend(next)
next++
backends = append(backends, fb)
c := &spawnerControl{}
c.spawn = func(ctx context.Context, sess *oauth.Session) (*session.Backend, error) {
c.mu.Lock()
defer c.mu.Unlock()
fb := newFakeBackend(len(c.backends))
c.backends = append(c.backends, fb)
return fb.backend(), nil
}
return spawn, &backends
return c
}
func (c *spawnerControl) count() int {
c.mu.Lock()
defer c.mu.Unlock()
return len(c.backends)
}
func (c *spawnerControl) at(i int) *fakeBackend {
c.mu.Lock()
defer c.mu.Unlock()
if i >= len(c.backends) {
return nil
}
return c.backends[i]
}
func (c *spawnerControl) snapshot() []*fakeBackend {
c.mu.Lock()
defer c.mu.Unlock()
out := make([]*fakeBackend, len(c.backends))
copy(out, c.backends)
return out
}
// fakeSpawner is the legacy two-return adapter so existing tests keep
// compiling. New tests should prefer newSpawnerControl directly.
func fakeSpawner(t *testing.T) (session.SpawnFunc, *spawnerControl) {
c := newSpawnerControl(t)
return c.spawn, c
}
// testBearerHeader carries a bearer-hash discriminator across the wire so
@ -137,8 +169,8 @@ func TestServe_NewSession_MintsSidAndDispatches(t *testing.T) {
if r.Active() != 1 {
t.Errorf("Active() = %d, want 1", r.Active())
}
if len(*backends) != 1 || (*backends)[0].requests.Load() != 1 {
t.Errorf("backend was not invoked exactly once: %+v", *backends)
if backends.count() != 1 || backends.at(0).requests.Load() != 1 {
t.Errorf("backend was not invoked exactly once: %+v", backends.snapshot())
}
}
@ -162,11 +194,11 @@ func TestServe_KnownSid_ReusesBackend(t *testing.T) {
if r.Active() != 1 {
t.Errorf("Active() = %d, want 1 (reuse, not spawn)", r.Active())
}
if len(*backends) != 1 {
t.Errorf("Spawn called %d times, want 1", len(*backends))
if backends.count() != 1 {
t.Errorf("Spawn called %d times, want 1", backends.count())
}
if (*backends)[0].requests.Load() != 2 {
t.Errorf("backend.requests = %d, want 2", (*backends)[0].requests.Load())
if backends.at(0).requests.Load() != 2 {
t.Errorf("backend.requests = %d, want 2", backends.at(0).requests.Load())
}
}
@ -260,7 +292,7 @@ func TestServe_BackendDone_RemovesSession(t *testing.T) {
sid := first.Header.Get(session.SessionIDHeader)
// Simulate the child exiting.
close((*backends)[0].done)
close(backends.at(0).done)
// Wait for the reaper goroutine — poll Active() rather than add a
// special hook to the production type.
@ -292,7 +324,7 @@ func TestStop_TearsDownAllSessions(t *testing.T) {
if r.Active() != 0 {
t.Errorf("Active after Stop = %d, want 0", r.Active())
}
for _, b := range *backends {
for _, b := range backends.snapshot() {
if !b.stopped.Load() {
t.Errorf("backend %d not stopped", b.id)
}