mail2couch/go/couch/couch.go
Ole-Morten Duesund 436276f0ef fix: correct duplicate message reporting in Go implementation
- Add DocumentSkippedError custom error type to distinguish between skipped and stored documents
- Fix counter bug where skipped messages were incorrectly reported as "stored"
- Enhance status reporting to show "X skipped as duplicates" for better visibility
- Fix Rust implementation binary attachment handling to support all file types (images, PDFs, etc.)
- Update test scripts to use correct binary names (mail2couch-go, mail2couch-rs)
- Add comprehensive test configurations for implementation comparison

Before: "Summary: Processed 30 messages, stored 30 new messages" (misleading when all were duplicates)
After: "Summary: Processed 30 messages, stored 0 new messages" with detailed "Stored 0/30 messages from INBOX (30 skipped as duplicates)"

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-04 00:36:01 +02:00

383 lines
11 KiB
Go

package couch
import (
"context"
"fmt"
"io"
"net/url"
"regexp"
"strings"
"time"
"github.com/go-kivik/kivik/v4"
_ "github.com/go-kivik/kivik/v4/couchdb" // The CouchDB driver
"mail2couch/config"
"mail2couch/mail"
)
// DocumentSkippedError indicates that a document was skipped because it already exists
type DocumentSkippedError struct {
DocumentID string
}
func (e *DocumentSkippedError) Error() string {
return fmt.Sprintf("document %s already exists, skipped", e.DocumentID)
}
// Client wraps the Kivik client
type Client struct {
*kivik.Client
}
// MailDocument represents an email message stored in CouchDB
type MailDocument struct {
ID string `json:"_id,omitempty"`
Rev string `json:"_rev,omitempty"`
Attachments map[string]AttachmentStub `json:"_attachments,omitempty"` // CouchDB attachments
SourceUID string `json:"sourceUid"` // Unique ID from the mail source (e.g., IMAP UID)
Mailbox string `json:"mailbox"` // Source mailbox name
From []string `json:"from"`
To []string `json:"to"`
Subject string `json:"subject"`
Date time.Time `json:"date"`
Body string `json:"body"`
Headers map[string][]string `json:"headers"`
StoredAt time.Time `json:"storedAt"` // When the document was stored
DocType string `json:"docType"` // Always "mail"
HasAttachments bool `json:"hasAttachments"` // Indicates if message has attachments
}
// AttachmentStub represents metadata for a CouchDB attachment
type AttachmentStub struct {
ContentType string `json:"content_type"`
Length int64 `json:"length,omitempty"`
Stub bool `json:"stub,omitempty"`
}
// SyncMetadata represents sync state information stored in CouchDB
type SyncMetadata struct {
ID string `json:"_id,omitempty"`
Rev string `json:"_rev,omitempty"`
DocType string `json:"docType"` // Always "sync_metadata"
Mailbox string `json:"mailbox"` // Mailbox name
LastSyncTime time.Time `json:"lastSyncTime"` // When this mailbox was last synced
LastMessageUID uint32 `json:"lastMessageUID"` // Highest UID processed in last sync
MessageCount int `json:"messageCount"` // Number of messages processed in last sync
UpdatedAt time.Time `json:"updatedAt"` // When this metadata was last updated
}
// NewClient creates a new CouchDB client from the configuration
func NewClient(cfg *config.CouchDbConfig) (*Client, error) {
parsedURL, err := url.Parse(cfg.URL)
if err != nil {
return nil, fmt.Errorf("invalid couchdb url: %w", err)
}
parsedURL.User = url.UserPassword(cfg.User, cfg.Password)
dsn := parsedURL.String()
client, err := kivik.New("couch", dsn)
if err != nil {
return nil, err
}
return &Client{client}, nil
}
// EnsureDB ensures that the configured database exists.
func (c *Client) EnsureDB(ctx context.Context, dbName string) error {
exists, err := c.DBExists(ctx, dbName)
if err != nil {
return err
}
if !exists {
return c.CreateDB(ctx, dbName)
}
return nil
}
// GenerateAccountDBName creates a CouchDB-compatible database name from account info
func GenerateAccountDBName(accountName, userEmail string) string {
// Use account name if available, otherwise fall back to email
name := accountName
if name == "" {
name = userEmail
}
// Convert to lowercase and replace invalid characters with underscores
name = strings.ToLower(name)
// CouchDB database names must match: ^[a-z][a-z0-9_$()+/-]*$
validName := regexp.MustCompile(`[^a-z0-9_$()+/-]`).ReplaceAllString(name, "_")
// Ensure it starts with a letter and add m2c prefix
if len(validName) > 0 && (validName[0] < 'a' || validName[0] > 'z') {
validName = "m2c_mail_" + validName
} else {
validName = "m2c_" + validName
}
return validName
}
// ConvertMessage converts an IMAP message to a MailDocument
func ConvertMessage(msg *mail.Message, mailbox string) *MailDocument {
docID := fmt.Sprintf("%s_%d", mailbox, msg.UID)
doc := &MailDocument{
ID: docID,
SourceUID: fmt.Sprintf("%d", msg.UID),
Mailbox: mailbox,
From: msg.From,
To: msg.To,
Subject: msg.Subject,
Date: msg.Date,
Body: msg.Body,
Headers: msg.Headers,
StoredAt: time.Now(),
DocType: "mail",
HasAttachments: len(msg.Attachments) > 0,
}
// Don't add attachment metadata here - CouchDB will handle this when we store attachments
// We'll add the attachment metadata after successful document creation
return doc
}
// StoreMessage stores a mail message in CouchDB with attachments
func (c *Client) StoreMessage(ctx context.Context, dbName string, doc *MailDocument, msg *mail.Message) error {
db := c.DB(dbName)
if db.Err() != nil {
return db.Err()
}
// Check if document already exists
exists, err := c.DocumentExists(ctx, dbName, doc.ID)
if err != nil {
return fmt.Errorf("failed to check if document exists: %w", err)
}
if exists {
return &DocumentSkippedError{DocumentID: doc.ID}
}
// Store the document first (without attachments)
rev, err := db.Put(ctx, doc.ID, doc)
if err != nil {
return fmt.Errorf("failed to store document: %w", err)
}
// If there are attachments, store them as CouchDB attachments
if msg != nil && len(msg.Attachments) > 0 {
currentRev := rev
for _, att := range msg.Attachments {
newRev, err := c.StoreAttachment(ctx, dbName, doc.ID, currentRev, att.Filename, att.ContentType, att.Content)
if err != nil {
return fmt.Errorf("failed to store attachment %s: %w", att.Filename, err)
}
currentRev = newRev // Update revision for next attachment
}
}
return nil
}
// StoreAttachment stores an attachment to an existing CouchDB document
func (c *Client) StoreAttachment(ctx context.Context, dbName, docID, rev, filename, contentType string, content []byte) (string, error) {
db := c.DB(dbName)
if db.Err() != nil {
return "", db.Err()
}
att := &kivik.Attachment{
Filename: filename,
ContentType: contentType,
Content: io.NopCloser(strings.NewReader(string(content))),
}
newRev, err := db.PutAttachment(ctx, docID, att, kivik.Rev(rev))
if err != nil {
return "", fmt.Errorf("failed to store attachment: %w", err)
}
return newRev, nil
}
// StoreMessages stores multiple mail messages in CouchDB with their corresponding attachments
func (c *Client) StoreMessages(ctx context.Context, dbName string, docs []*MailDocument, messages []*mail.Message) error {
for i, doc := range docs {
var msg *mail.Message
if i < len(messages) {
msg = messages[i]
}
if err := c.StoreMessage(ctx, dbName, doc, msg); err != nil {
return err
}
}
return nil
}
// DocumentExists checks if a document with the given ID already exists.
func (c *Client) DocumentExists(ctx context.Context, dbName, docID string) (bool, error) {
db := c.DB(dbName)
if db.Err() != nil {
return false, db.Err()
}
row := db.Get(ctx, docID)
return row.Err() == nil, nil
}
// GetAllMailDocumentIDs returns all mail document IDs from a database for a specific mailbox
func (c *Client) GetAllMailDocumentIDs(ctx context.Context, dbName, mailbox string) (map[string]bool, error) {
db := c.DB(dbName)
if db.Err() != nil {
return nil, db.Err()
}
// Create a view query to get all document IDs for the specified mailbox
rows := db.AllDocs(ctx)
docIDs := make(map[string]bool)
for rows.Next() {
docID, err := rows.ID()
if err != nil {
continue
}
// Filter by mailbox prefix (documents are named like "INBOX_123")
if mailbox == "" || strings.HasPrefix(docID, mailbox+"_") {
docIDs[docID] = true
}
}
if rows.Err() != nil {
return nil, rows.Err()
}
return docIDs, nil
}
// DeleteDocument removes a document from CouchDB
func (c *Client) DeleteDocument(ctx context.Context, dbName, docID string) error {
db := c.DB(dbName)
if db.Err() != nil {
return db.Err()
}
// Get the current revision
row := db.Get(ctx, docID)
if row.Err() != nil {
return row.Err() // Document doesn't exist or other error
}
var doc struct {
Rev string `json:"_rev"`
}
if err := row.ScanDoc(&doc); err != nil {
return err
}
// Delete the document
_, err := db.Delete(ctx, docID, doc.Rev)
return err
}
// SyncMailbox synchronizes a mailbox between mail server and CouchDB
// In sync mode: removes documents from CouchDB that are no longer in the mail account
// In archive mode: keeps all documents (no removal)
func (c *Client) SyncMailbox(ctx context.Context, dbName, mailbox string, currentMessageUIDs map[uint32]bool, syncMode bool) error {
if !syncMode {
return nil // Archive mode - don't remove anything
}
// Get all existing document IDs for this mailbox from CouchDB
existingDocs, err := c.GetAllMailDocumentIDs(ctx, dbName, mailbox)
if err != nil {
return fmt.Errorf("failed to get existing documents: %w", err)
}
// Find documents that should be removed (exist in CouchDB but not in mail account)
var toDelete []string
for docID := range existingDocs {
// Extract UID from document ID (format: "mailbox_uid")
parts := strings.Split(docID, "_")
if len(parts) < 2 {
continue
}
uidStr := parts[len(parts)-1]
uid := uint32(0)
if _, err := fmt.Sscanf(uidStr, "%d", &uid); err != nil {
continue
}
// If this UID is not in the current mail account, mark for deletion
if !currentMessageUIDs[uid] {
toDelete = append(toDelete, docID)
}
}
// Delete documents that are no longer in the mail account
for _, docID := range toDelete {
if err := c.DeleteDocument(ctx, dbName, docID); err != nil {
return fmt.Errorf("failed to delete document %s: %w", docID, err)
}
}
if len(toDelete) > 0 {
fmt.Printf(" Sync mode: Removed %d documents no longer in mail account\n", len(toDelete))
}
return nil
}
// GetSyncMetadata retrieves the sync metadata for a specific mailbox
func (c *Client) GetSyncMetadata(ctx context.Context, dbName, mailbox string) (*SyncMetadata, error) {
db := c.DB(dbName)
if db.Err() != nil {
return nil, db.Err()
}
metadataID := fmt.Sprintf("sync_metadata_%s", mailbox)
row := db.Get(ctx, metadataID)
if row.Err() != nil {
// If metadata doesn't exist, return nil (not an error for first sync)
return nil, nil
}
var metadata SyncMetadata
if err := row.ScanDoc(&metadata); err != nil {
return nil, fmt.Errorf("failed to scan sync metadata: %w", err)
}
return &metadata, nil
}
// StoreSyncMetadata stores or updates sync metadata for a mailbox
func (c *Client) StoreSyncMetadata(ctx context.Context, dbName string, metadata *SyncMetadata) error {
db := c.DB(dbName)
if db.Err() != nil {
return db.Err()
}
metadata.ID = fmt.Sprintf("sync_metadata_%s", metadata.Mailbox)
metadata.DocType = "sync_metadata"
metadata.UpdatedAt = time.Now()
// Check if metadata already exists to get current revision
existing, err := c.GetSyncMetadata(ctx, dbName, metadata.Mailbox)
if err != nil {
return fmt.Errorf("failed to check existing sync metadata: %w", err)
}
if existing != nil {
metadata.Rev = existing.Rev
}
_, err = db.Put(ctx, metadata.ID, metadata)
if err != nil {
return fmt.Errorf("failed to store sync metadata: %w", err)
}
return nil
}