docs: add comprehensive CouchDB schema documentation for cross-implementation compatibility

- Add complete CouchDB document schema specifications in couchdb-schemas.md
- Create example JSON documents for mail and sync metadata structures
- Implement Rust schema definitions with full serde support and type safety
- Add validation script to ensure schema consistency across implementations
- Document field definitions, data types, and validation rules
- Provide Rust Cargo.toml with appropriate dependencies for future implementation

This establishes a solid foundation for the planned Rust implementation while ensuring
100% compatibility with existing Go implementation databases. Both implementations will
use identical document structures, field names, and database naming conventions.

Schema Features:
- Mail documents with native CouchDB attachment support
- Sync metadata for incremental synchronization
- Predictable document ID patterns for efficient access
- Cross-language type mappings and validation rules
- Example documents for testing and reference

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Ole-Morten Duesund 2025-08-02 15:08:35 +02:00
commit 651d95e98b
10 changed files with 908 additions and 0 deletions

20
rust/src/lib.rs Normal file
View file

@ -0,0 +1,20 @@
//! # mail2couch
//!
//! A powerful email backup utility that synchronizes mail from IMAP accounts to CouchDB.
//!
//! This library provides the core functionality for:
//! - Connecting to IMAP servers
//! - Retrieving email messages and attachments
//! - Storing emails in CouchDB with proper document structures
//! - Incremental synchronization to avoid re-processing messages
//! - Filtering by folders, dates, and keywords
//!
//! ## Document Schemas
//!
//! The library uses well-defined CouchDB document schemas that are compatible
//! with the Go implementation. See the `schemas` module for details.
pub mod schemas;
// Re-export main types for convenience
pub use schemas::{MailDocument, SyncMetadata, AttachmentStub, generate_database_name};

7
rust/src/main.rs Normal file
View file

@ -0,0 +1,7 @@
// Placeholder main.rs for Rust implementation
// This will be implemented in the future
fn main() {
println!("mail2couch Rust implementation - Coming Soon!");
println!("See the Go implementation in ../go/ for current functionality.");
}

266
rust/src/schemas.rs Normal file
View file

@ -0,0 +1,266 @@
// CouchDB document schemas for mail2couch
// This file defines the Rust structures that correspond to the CouchDB document schemas
// defined in couchdb-schemas.md
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
/// Represents an email message stored in CouchDB
/// Document ID format: {mailbox}_{uid} (e.g., "INBOX_123")
/// Document type: "mail"
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MailDocument {
/// CouchDB document ID
#[serde(rename = "_id")]
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// CouchDB revision (managed by CouchDB)
#[serde(rename = "_rev")]
#[serde(skip_serializing_if = "Option::is_none")]
pub rev: Option<String>,
/// CouchDB native attachments for email attachments
#[serde(rename = "_attachments")]
#[serde(skip_serializing_if = "Option::is_none")]
pub attachments: Option<HashMap<String, AttachmentStub>>,
/// Original IMAP UID from mail server
#[serde(rename = "sourceUid")]
pub source_uid: String,
/// Source mailbox name (e.g., "INBOX", "Sent")
pub mailbox: String,
/// Sender email addresses
pub from: Vec<String>,
/// Recipient email addresses
pub to: Vec<String>,
/// Email subject line
pub subject: String,
/// Email date from headers (ISO8601 format)
pub date: DateTime<Utc>,
/// Email body content (plain text)
pub body: String,
/// All email headers as key-value pairs
pub headers: HashMap<String, Vec<String>>,
/// When document was stored in CouchDB (ISO8601 format)
#[serde(rename = "storedAt")]
pub stored_at: DateTime<Utc>,
/// Document type identifier (always "mail")
#[serde(rename = "docType")]
pub doc_type: String,
/// Whether email has attachments
#[serde(rename = "hasAttachments")]
pub has_attachments: bool,
}
/// Metadata for CouchDB native attachments
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttachmentStub {
/// MIME type of attachment
#[serde(rename = "content_type")]
pub content_type: String,
/// Size in bytes (optional)
#[serde(skip_serializing_if = "Option::is_none")]
pub length: Option<u64>,
/// Indicates attachment is stored separately (optional)
#[serde(skip_serializing_if = "Option::is_none")]
pub stub: Option<bool>,
}
/// Sync state information for incremental syncing
/// Document ID format: sync_metadata_{mailbox} (e.g., "sync_metadata_INBOX")
/// Document type: "sync_metadata"
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyncMetadata {
/// CouchDB document ID
#[serde(rename = "_id")]
#[serde(skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
/// CouchDB revision (managed by CouchDB)
#[serde(rename = "_rev")]
#[serde(skip_serializing_if = "Option::is_none")]
pub rev: Option<String>,
/// Document type identifier (always "sync_metadata")
#[serde(rename = "docType")]
pub doc_type: String,
/// Mailbox name this metadata applies to
pub mailbox: String,
/// When this mailbox was last synced (ISO8601 format)
#[serde(rename = "lastSyncTime")]
pub last_sync_time: DateTime<Utc>,
/// Highest IMAP UID processed in last sync
#[serde(rename = "lastMessageUID")]
pub last_message_uid: u32,
/// Number of messages processed in last sync
#[serde(rename = "messageCount")]
pub message_count: u32,
/// When this metadata was last updated (ISO8601 format)
#[serde(rename = "updatedAt")]
pub updated_at: DateTime<Utc>,
}
impl MailDocument {
/// Create a new MailDocument with required fields
pub fn new(
source_uid: String,
mailbox: String,
from: Vec<String>,
to: Vec<String>,
subject: String,
date: DateTime<Utc>,
body: String,
headers: HashMap<String, Vec<String>>,
has_attachments: bool,
) -> Self {
let now = Utc::now();
Self {
id: None, // Will be set when storing to CouchDB
rev: None, // Managed by CouchDB
attachments: None,
source_uid,
mailbox,
from,
to,
subject,
date,
body,
headers,
stored_at: now,
doc_type: "mail".to_string(),
has_attachments,
}
}
/// Generate document ID based on mailbox and UID
pub fn generate_id(&self) -> String {
format!("{}_{}", self.mailbox, self.source_uid)
}
/// Set the document ID
pub fn set_id(&mut self) {
self.id = Some(self.generate_id());
}
}
impl SyncMetadata {
/// Create new sync metadata for a mailbox
pub fn new(
mailbox: String,
last_sync_time: DateTime<Utc>,
last_message_uid: u32,
message_count: u32,
) -> Self {
let now = Utc::now();
Self {
id: Some(format!("sync_metadata_{}", mailbox)),
rev: None, // Managed by CouchDB
doc_type: "sync_metadata".to_string(),
mailbox,
last_sync_time,
last_message_uid,
message_count,
updated_at: now,
}
}
}
/// Generate CouchDB database name from account information
/// Format: m2c_{account_name}
/// Rules: lowercase, replace invalid chars with underscores, ensure starts with letter
pub fn generate_database_name(account_name: &str, user_email: &str) -> String {
let name = if account_name.is_empty() {
user_email
} else {
account_name
};
// Convert to lowercase and replace invalid characters
let mut valid_name = name
.to_lowercase()
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '_' || c == '$' || c == '(' || c == ')' || c == '+' || c == '-' || c == '/' {
c
} else {
'_'
}
})
.collect::<String>();
// Ensure starts with a letter
if valid_name.is_empty() || !valid_name.chars().next().unwrap().is_ascii_lowercase() {
valid_name = format!("m2c_mail_{}", valid_name);
} else {
valid_name = format!("m2c_{}", valid_name);
}
valid_name
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generate_database_name() {
assert_eq!(generate_database_name("Personal Gmail", ""), "m2c_personal_gmail");
assert_eq!(generate_database_name("", "user@example.com"), "m2c_user_example_com");
assert_eq!(generate_database_name("123work", ""), "m2c_mail_123work");
}
#[test]
fn test_mail_document_id_generation() {
let mut doc = MailDocument::new(
"123".to_string(),
"INBOX".to_string(),
vec!["sender@example.com".to_string()],
vec!["recipient@example.com".to_string()],
"Test Subject".to_string(),
Utc::now(),
"Test body".to_string(),
HashMap::new(),
false,
);
assert_eq!(doc.generate_id(), "INBOX_123");
doc.set_id();
assert_eq!(doc.id, Some("INBOX_123".to_string()));
}
#[test]
fn test_sync_metadata_creation() {
let metadata = SyncMetadata::new(
"INBOX".to_string(),
Utc::now(),
456,
100,
);
assert_eq!(metadata.id, Some("sync_metadata_INBOX".to_string()));
assert_eq!(metadata.doc_type, "sync_metadata");
assert_eq!(metadata.mailbox, "INBOX");
assert_eq!(metadata.last_message_uid, 456);
assert_eq!(metadata.message_count, 100);
}
}