fix: correct duplicate message reporting in Go implementation

- Add DocumentSkippedError custom error type to distinguish between skipped and stored documents
- Fix counter bug where skipped messages were incorrectly reported as "stored"
- Enhance status reporting to show "X skipped as duplicates" for better visibility
- Fix Rust implementation binary attachment handling to support all file types (images, PDFs, etc.)
- Update test scripts to use correct binary names (mail2couch-go, mail2couch-rs)
- Add comprehensive test configurations for implementation comparison

Before: "Summary: Processed 30 messages, stored 30 new messages" (misleading when all were duplicates)
After: "Summary: Processed 30 messages, stored 0 new messages" with detailed "Stored 0/30 messages from INBOX (30 skipped as duplicates)"

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Ole-Morten Duesund 2025-08-04 00:36:01 +02:00
commit 436276f0ef
9 changed files with 127 additions and 18 deletions

View file

@ -15,6 +15,15 @@ import (
"mail2couch/mail"
)
// DocumentSkippedError indicates that a document was skipped because it already exists
type DocumentSkippedError struct {
DocumentID string
}
func (e *DocumentSkippedError) Error() string {
return fmt.Sprintf("document %s already exists, skipped", e.DocumentID)
}
// Client wraps the Kivik client
type Client struct {
*kivik.Client
@ -149,7 +158,7 @@ func (c *Client) StoreMessage(ctx context.Context, dbName string, doc *MailDocum
}
if exists {
return nil // Document already exists, skip
return &DocumentSkippedError{DocumentID: doc.ID}
}
// Store the document first (without attachments)

Binary file not shown.

View file

@ -2,6 +2,7 @@ package main
import (
"context"
"errors"
"fmt"
"log"
"time"
@ -174,16 +175,26 @@ func processImapSource(source *config.MailSource, couchClient *couch.Client, dbN
stored := 0
if !dryRun {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
var skipped int
for i, doc := range docs {
err := couchClient.StoreMessage(ctx, dbName, doc, messages[i])
if err != nil {
var skipErr *couch.DocumentSkippedError
if errors.As(err, &skipErr) {
skipped++
} else {
log.Printf(" ERROR: Failed to store message %s: %v", doc.ID, err)
}
} else {
stored++
}
}
cancel()
if skipped > 0 {
fmt.Printf(" Stored %d/%d messages from %s (%d skipped as duplicates)\n", stored, len(messages), mailbox, skipped)
} else {
fmt.Printf(" Stored %d/%d messages from %s\n", stored, len(messages), mailbox)
}
} else {
stored = len(messages) // In dry-run, assume all would be stored
fmt.Printf(" DRY-RUN: Would store %d messages from %s\n", len(messages), mailbox)

View file

@ -705,21 +705,14 @@ impl ImapClient {
// Generate a filename for the attachment
let filename = self.get_attachment_filename(part, index);
// Get the content data (try different methods based on content type)
let body_data = if let Some(text_content) = part.get_text_contents() {
// Text-based attachments
log::debug!("Found text attachment content: {} bytes", text_content.len());
text_content.as_bytes().to_vec()
} else {
// For now, skip attachments without text content
// TODO: Implement binary attachment support with proper mail-parser API
log::debug!("Skipping non-text attachment for part {} (content-type: {})", index, content_type.c_type);
vec![]
};
// Get the binary content data using the proper mail-parser API
// This works for both text and binary attachments (images, PDFs, etc.)
let body_data = part.get_contents().to_vec();
log::debug!("Found attachment content: {} bytes (content-type: {})", body_data.len(), content_type.c_type);
let content_type_str = content_type.c_type.to_string();
// Only create attachment stub if we have actual data
// Create attachment stub - get_contents() always returns the full data
if !body_data.is_empty() {
let attachment_stub = AttachmentStub {
content_type: content_type_str.clone(),

View file

@ -36,7 +36,7 @@ build_implementations() {
# Build Go implementation
echo -e "${BLUE} Building Go implementation...${NC}"
cd go
go build -o mail2couch .
go build -o mail2couch-go .
cd ..
echo -e "${GREEN} ✅ Go implementation built${NC}"
@ -53,7 +53,7 @@ run_go() {
echo -e "${BLUE}🦬 Running Go implementation...${NC}"
cd go
echo -e "${BLUE} Using config: config-test-go.json${NC}"
./mail2couch -c config-test-go.json
./mail2couch-go -c config-test-go.json
cd ..
echo -e "${GREEN}✅ Go implementation completed${NC}"
}
@ -63,7 +63,7 @@ run_rust() {
echo -e "${BLUE}🦀 Running Rust implementation...${NC}"
cd rust
echo -e "${BLUE} Using config: config-test-rust.json${NC}"
./target/release/mail2couch -c config-test-rust.json
./target/release/mail2couch-rs -c config-test-rust.json
cd ..
echo -e "${GREEN}✅ Rust implementation completed${NC}"
}

24
test-config-go.json Normal file
View file

@ -0,0 +1,24 @@
{
"couchDb": {
"url": "http://localhost:5984",
"user": "admin",
"password": "password"
},
"mailSources": [
{
"name": "go_test_account",
"enabled": true,
"protocol": "imap",
"host": "localhost",
"port": 3143,
"user": "testuser1",
"password": "password123",
"mode": "archive",
"folderFilter": {
"include": ["INBOX"],
"exclude": []
},
"messageFilter": {}
}
]
}

24
test-config-rust.json Normal file
View file

@ -0,0 +1,24 @@
{
"couchDb": {
"url": "http://localhost:5984",
"user": "admin",
"password": "password"
},
"mailSources": [
{
"name": "rust_test_account",
"enabled": true,
"protocol": "imap",
"host": "localhost",
"port": 3143,
"user": "testuser1",
"password": "password123",
"mode": "archive",
"folderFilter": {
"include": ["INBOX"],
"exclude": []
},
"messageFilter": {}
}
]
}

24
test-config-shared.json Normal file
View file

@ -0,0 +1,24 @@
{
"couchDb": {
"url": "http://localhost:5984",
"user": "admin",
"password": "password"
},
"mailSources": [
{
"name": "comparison_test_account",
"enabled": true,
"protocol": "imap",
"host": "localhost",
"port": 3143,
"user": "testuser1",
"password": "password123",
"mode": "archive",
"folderFilter": {
"include": ["INBOX"],
"exclude": []
},
"messageFilter": {}
}
]
}

View file

@ -0,0 +1,24 @@
{
"couchDb": {
"url": "http://localhost:5984",
"user": "admin",
"password": "password"
},
"mailSources": [
{
"name": "incremental_test",
"enabled": true,
"protocol": "imap",
"host": "localhost",
"port": 3143,
"user": "testuser1",
"password": "password123",
"mode": "archive",
"folderFilter": {
"include": ["INBOX"],
"exclude": []
},
"messageFilter": {}
}
]
}