feat(session): idle reaper + Forgejo token rotation (forgejo-mcp-broker-q4x)
Adds StartReaper to internal/session — two background goroutines that
keep the session map healthy under steady load.
Idle reaper:
- Sweeps every ReapInterval (default 30s) for sessions whose
LastActive is older than IdleTimeout (default 15m).
- Evicts via SIGTERM through the Backend.Stop hook.
Token rotator:
- Sweeps every RotateInterval (default 1m) for sessions whose Forgejo
token is within RefreshLead (default 5m) of expiry.
- Calls the operator-supplied RefreshForgejo to obtain new
access+refresh tokens, then Respawn to mint a new Backend with the
updated token in env.
- Atomically swaps e.backend (now an atomic.Pointer[Backend]); the
sid is preserved so the client just re-issues an MCP `initialize`
on its next request rather than re-authenticating.
- On refresh failure, evicts so the next /mcp produces a clean
re-auth instead of carrying a stale token.
Two race fixes uncovered by -race during this work:
- The Done-watcher started in spawnSession captured the original
backend pointer; after rotation it still saw Done close (because
the old backend was Stopped) and would yank the entire entry. Fixed
by comparing watched-backend == e.backend.Load() before evicting.
- The fakeSpawner test helper let tests read the backends slice
without the lock the spawn callback held. Replaced with a
spawnerControl type whose count/at/snapshot methods all lock.
Tests cover idle eviction, recently-active sessions surviving sweeps,
successful rotation+respawn (sid preserved), refresh failure → eviction,
and Stop idempotency.
Closes forgejo-mcp-broker-q4x. Phase 5 complete.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a39011592e
commit
933e7bd369
5 changed files with 501 additions and 36 deletions
|
|
@ -56,25 +56,57 @@ func (f *fakeBackend) backend() *session.Backend {
|
|||
}
|
||||
}
|
||||
|
||||
// fakeSpawner returns a SpawnFunc that hands out a sequence of fakeBackends.
|
||||
// The returned slice is appended to as Spawn is called, so tests can
|
||||
// inspect every backend that was minted.
|
||||
func fakeSpawner(t *testing.T) (session.SpawnFunc, *[]*fakeBackend) {
|
||||
// spawnerControl wraps a fake SpawnFunc with thread-safe access to the
|
||||
// backends it has minted. Tests get raced freely without a control type
|
||||
// because both spawn (called from registry goroutines) and tests access
|
||||
// the slice; this lets tests query under a lock.
|
||||
type spawnerControl struct {
|
||||
mu sync.Mutex
|
||||
backends []*fakeBackend
|
||||
spawn session.SpawnFunc
|
||||
}
|
||||
|
||||
func newSpawnerControl(t *testing.T) *spawnerControl {
|
||||
t.Helper()
|
||||
var (
|
||||
mu sync.Mutex
|
||||
backends []*fakeBackend
|
||||
next int
|
||||
)
|
||||
spawn := func(ctx context.Context, sess *oauth.Session) (*session.Backend, error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
fb := newFakeBackend(next)
|
||||
next++
|
||||
backends = append(backends, fb)
|
||||
c := &spawnerControl{}
|
||||
c.spawn = func(ctx context.Context, sess *oauth.Session) (*session.Backend, error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
fb := newFakeBackend(len(c.backends))
|
||||
c.backends = append(c.backends, fb)
|
||||
return fb.backend(), nil
|
||||
}
|
||||
return spawn, &backends
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *spawnerControl) count() int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return len(c.backends)
|
||||
}
|
||||
|
||||
func (c *spawnerControl) at(i int) *fakeBackend {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if i >= len(c.backends) {
|
||||
return nil
|
||||
}
|
||||
return c.backends[i]
|
||||
}
|
||||
|
||||
func (c *spawnerControl) snapshot() []*fakeBackend {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
out := make([]*fakeBackend, len(c.backends))
|
||||
copy(out, c.backends)
|
||||
return out
|
||||
}
|
||||
|
||||
// fakeSpawner is the legacy two-return adapter so existing tests keep
|
||||
// compiling. New tests should prefer newSpawnerControl directly.
|
||||
func fakeSpawner(t *testing.T) (session.SpawnFunc, *spawnerControl) {
|
||||
c := newSpawnerControl(t)
|
||||
return c.spawn, c
|
||||
}
|
||||
|
||||
// testBearerHeader carries a bearer-hash discriminator across the wire so
|
||||
|
|
@ -137,8 +169,8 @@ func TestServe_NewSession_MintsSidAndDispatches(t *testing.T) {
|
|||
if r.Active() != 1 {
|
||||
t.Errorf("Active() = %d, want 1", r.Active())
|
||||
}
|
||||
if len(*backends) != 1 || (*backends)[0].requests.Load() != 1 {
|
||||
t.Errorf("backend was not invoked exactly once: %+v", *backends)
|
||||
if backends.count() != 1 || backends.at(0).requests.Load() != 1 {
|
||||
t.Errorf("backend was not invoked exactly once: %+v", backends.snapshot())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -162,11 +194,11 @@ func TestServe_KnownSid_ReusesBackend(t *testing.T) {
|
|||
if r.Active() != 1 {
|
||||
t.Errorf("Active() = %d, want 1 (reuse, not spawn)", r.Active())
|
||||
}
|
||||
if len(*backends) != 1 {
|
||||
t.Errorf("Spawn called %d times, want 1", len(*backends))
|
||||
if backends.count() != 1 {
|
||||
t.Errorf("Spawn called %d times, want 1", backends.count())
|
||||
}
|
||||
if (*backends)[0].requests.Load() != 2 {
|
||||
t.Errorf("backend.requests = %d, want 2", (*backends)[0].requests.Load())
|
||||
if backends.at(0).requests.Load() != 2 {
|
||||
t.Errorf("backend.requests = %d, want 2", backends.at(0).requests.Load())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -260,7 +292,7 @@ func TestServe_BackendDone_RemovesSession(t *testing.T) {
|
|||
sid := first.Header.Get(session.SessionIDHeader)
|
||||
|
||||
// Simulate the child exiting.
|
||||
close((*backends)[0].done)
|
||||
close(backends.at(0).done)
|
||||
|
||||
// Wait for the reaper goroutine — poll Active() rather than add a
|
||||
// special hook to the production type.
|
||||
|
|
@ -292,7 +324,7 @@ func TestStop_TearsDownAllSessions(t *testing.T) {
|
|||
if r.Active() != 0 {
|
||||
t.Errorf("Active after Stop = %d, want 0", r.Active())
|
||||
}
|
||||
for _, b := range *backends {
|
||||
for _, b := range backends.snapshot() {
|
||||
if !b.stopped.Load() {
|
||||
t.Errorf("backend %d not stopped", b.id)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue