feat(session): idle reaper + Forgejo token rotation (forgejo-mcp-broker-q4x)

Adds StartReaper to internal/session — two background goroutines that
keep the session map healthy under steady load.

Idle reaper:
  - Sweeps every ReapInterval (default 30s) for sessions whose
    LastActive is older than IdleTimeout (default 15m).
  - Evicts via SIGTERM through the Backend.Stop hook.

Token rotator:
  - Sweeps every RotateInterval (default 1m) for sessions whose Forgejo
    token is within RefreshLead (default 5m) of expiry.
  - Calls the operator-supplied RefreshForgejo to obtain new
    access+refresh tokens, then Respawn to mint a new Backend with the
    updated token in env.
  - Atomically swaps e.backend (now an atomic.Pointer[Backend]); the
    sid is preserved so the client just re-issues an MCP `initialize`
    on its next request rather than re-authenticating.
  - On refresh failure, evicts so the next /mcp produces a clean
    re-auth instead of carrying a stale token.

Two race fixes uncovered by -race during this work:
  - The Done-watcher started in spawnSession captured the original
    backend pointer; after rotation it still saw Done close (because
    the old backend was Stopped) and would yank the entire entry. Fixed
    by comparing watched-backend == e.backend.Load() before evicting.
  - The fakeSpawner test helper let tests read the backends slice
    without the lock the spawn callback held. Replaced with a
    spawnerControl type whose count/at/snapshot methods all lock.

Tests cover idle eviction, recently-active sessions surviving sweeps,
successful rotation+respawn (sid preserved), refresh failure → eviction,
and Stop idempotency.

Closes forgejo-mcp-broker-q4x. Phase 5 complete.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ole-Morten Duesund 2026-04-27 17:32:36 +02:00
commit 933e7bd369
5 changed files with 501 additions and 36 deletions

View file

@ -66,9 +66,11 @@ type Registry struct {
type entry struct {
sid string
backend *Backend
lastActive atomic.Int64 // unix nanoseconds; bumped per request
oauthSess *oauth.Session
backend atomic.Pointer[Backend] // swapped on rotation; readers use Load
lastActive atomic.Int64 // unix nanoseconds; bumped per request
mu sync.Mutex // guards oauthSess; backend swap holds this too
oauthSess *oauth.Session
}
// SessionIDHeader is the streamable-HTTP MCP header that ferries the
@ -121,7 +123,7 @@ func (r *Registry) serve(w http.ResponseWriter, req *http.Request) {
}
w.Header().Set(SessionIDHeader, e.sid)
e.lastActive.Store(r.now().UnixNano())
e.backend.Handler.ServeHTTP(w, req)
e.backend.Load().Handler.ServeHTTP(w, req)
return
}
@ -133,7 +135,7 @@ func (r *Registry) serve(w http.ResponseWriter, req *http.Request) {
return
}
e := v.(*entry)
if e.oauthSess.BrokerTokenHash != oauthSess.BrokerTokenHash {
if e.snapshotOAuth().BrokerTokenHash != oauthSess.BrokerTokenHash {
// Session id is bound to the OAuth token that minted it. A
// different bearer probing a stolen sid gets 403 — not 401, so
// this is distinct from "your token is bad" and from "we don't
@ -142,7 +144,15 @@ func (r *Registry) serve(w http.ResponseWriter, req *http.Request) {
return
}
e.lastActive.Store(r.now().UnixNano())
e.backend.Handler.ServeHTTP(w, req)
e.backend.Load().Handler.ServeHTTP(w, req)
}
// snapshotOAuth returns a pointer to the entry's current oauthSess under
// lock so callers don't see partial swaps during rotation.
func (e *entry) snapshotOAuth() *oauth.Session {
e.mu.Lock()
defer e.mu.Unlock()
return e.oauthSess
}
func (r *Registry) spawnSession(ctx context.Context, oauthSess *oauth.Session) (*entry, error) {
@ -156,7 +166,8 @@ func (r *Registry) spawnSession(ctx context.Context, oauthSess *oauth.Session) (
}
sid := newSessionID()
e := &entry{sid: sid, backend: backend, oauthSess: oauthSess}
e := &entry{sid: sid, oauthSess: oauthSess}
e.backend.Store(backend)
e.lastActive.Store(r.now().UnixNano())
if _, loaded := r.sessions.LoadOrStore(sid, e); loaded {
@ -166,12 +177,27 @@ func (r *Registry) spawnSession(ctx context.Context, oauthSess *oauth.Session) (
}
r.count.Add(1)
// When the child exits on its own (crash, OOM, etc.), reap the entry.
r.watchBackend(sid, backend)
return e, nil
}
// watchBackend launches a goroutine that removes the session if the given
// backend's Done closes WHILE that backend is still the entry's current
// one. After a rotation, the old backend's Done eventually closes too,
// but the entry now points at a new backend; in that case the watcher
// is a no-op so the session survives the rotation.
func (r *Registry) watchBackend(sid string, backend *Backend) {
go func() {
<-backend.Done
r.removeSession(sid)
v, ok := r.sessions.Load(sid)
if !ok {
return
}
e := v.(*entry)
if e.backend.Load() == backend {
r.removeSession(sid)
}
}()
return e, nil
}
func (r *Registry) removeSession(sid string) {
@ -200,7 +226,7 @@ func (r *Registry) Active() int { return int(r.count.Load()) }
func (r *Registry) Stop(ctx context.Context) {
r.sessions.Range(func(k, v any) bool {
e := v.(*entry)
if err := e.backend.Stop(ctx); err != nil {
if err := e.backend.Load().Stop(ctx); err != nil {
r.log.Warn("session stop", slog.String("sid", e.sid), slog.String("err", err.Error()))
}
r.sessions.Delete(k)