mail2couch/go/mail/imap.go
Ole-Morten Duesund 357cd06264 feat: implement comprehensive wildcard folder selection and keyword filtering
## Wildcard Folder Selection
- Add support for wildcard patterns (`*`, `?`, `[abc]`) using filepath.Match
- Implement special case: `"*"` selects ALL available folders
- Support for complex include/exclude pattern combinations
- Maintain backwards compatibility with exact string matching
- Enable subfolder pattern matching (e.g., `Work/*`, `*/Drafts`)

## Keyword Filtering
- Add SubjectKeywords, SenderKeywords, RecipientKeywords to MessageFilter config
- Implement case-insensitive keyword matching across message fields
- Support multiple keywords per filter type with inclusive OR logic
- Add ShouldProcessMessage method for message-level filtering

## Enhanced Test Environment
- Create comprehensive wildcard pattern test scenarios
- Add 12 test folders covering various pattern types: Work/*, Important/*, Archive/*, exact matches
- Implement dedicated wildcard test script (test-wildcard-patterns.sh)
- Update test configurations to demonstrate real-world wildcard usage patterns
- Enhance test data generation with folder-specific messages for validation

## Documentation
- Create FOLDER_PATTERNS.md with comprehensive wildcard examples and use cases
- Update CLAUDE.md to reflect all implemented features and current status
- Enhance test README with detailed wildcard pattern explanations
- Provide configuration examples for common email organization scenarios

## Message Origin Tracking
- Verify all messages in CouchDB properly tagged with origin folder in `mailbox` field
- Maintain per-account database isolation for better organization
- Document ID format: `{folder}_{uid}` ensures uniqueness across folders

Key patterns supported:
- `["*"]` - All folders (with excludes)
- `["Work*", "Important*"]` - Prefix matching
- `["Work/*", "Archive/*"]` - Subfolder patterns
- `["INBOX", "Sent"]` - Exact matches
- Complex include/exclude combinations

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-01 17:24:02 +02:00

427 lines
11 KiB
Go

package mail
import (
"bytes"
"fmt"
"io"
"log"
"mime"
"path/filepath"
"strings"
"time"
"github.com/emersion/go-imap/v2"
"github.com/emersion/go-imap/v2/imapclient"
"github.com/emersion/go-message"
"mail2couch/config"
)
// ImapClient wraps the IMAP client
type ImapClient struct {
*imapclient.Client
}
// Message represents an email message retrieved from IMAP
type Message struct {
UID uint32
From []string
To []string
Subject string
Date time.Time
Body string
Headers map[string][]string
Attachments []Attachment
}
// Attachment represents an email attachment
type Attachment struct {
Filename string
ContentType string
Content []byte
}
// NewImapClient creates a new IMAP client from the configuration
func NewImapClient(source *config.MailSource) (*ImapClient, error) {
addr := fmt.Sprintf("%s:%d", source.Host, source.Port)
var client *imapclient.Client
var err error
// Try TLS first for standard IMAPS ports (993, 465)
if source.Port == 993 || source.Port == 465 {
client, err = imapclient.DialTLS(addr, nil)
} else {
// Use insecure connection for other ports (143, 3143, etc.)
client, err = imapclient.DialInsecure(addr, nil)
}
if err != nil {
return nil, fmt.Errorf("failed to dial IMAP server: %w", err)
}
if err := client.Login(source.User, source.Password).Wait(); err != nil {
return nil, fmt.Errorf("failed to login: %w", err)
}
return &ImapClient{client}, nil
}
// ListMailboxes lists all available mailboxes
func (c *ImapClient) ListMailboxes() ([]string, error) {
var mailboxes []string
cmd := c.List("", "*", nil)
infos, err := cmd.Collect()
if err != nil {
return nil, err
}
for _, info := range infos {
mailboxes = append(mailboxes, info.Mailbox)
}
return mailboxes, nil
}
// GetMessages retrieves messages from a specific mailbox with filtering support
// Returns messages and a map of all current UIDs in the mailbox
// maxMessages: 0 means no limit, > 0 limits the number of messages to fetch
func (c *ImapClient) GetMessages(mailbox string, since *time.Time, maxMessages int, messageFilter *config.MessageFilter) ([]*Message, map[uint32]bool, error) {
// Select the mailbox
mbox, err := c.Select(mailbox, nil).Wait()
if err != nil {
return nil, nil, fmt.Errorf("failed to select mailbox %s: %w", mailbox, err)
}
if mbox.NumMessages == 0 {
return []*Message{}, make(map[uint32]bool), nil
}
// For now, use a simpler approach to get all sequence numbers
var messages []*Message
currentUIDs := make(map[uint32]bool)
// Determine how many messages to fetch
numToFetch := mbox.NumMessages
if maxMessages > 0 && int(numToFetch) > maxMessages {
numToFetch = uint32(maxMessages)
}
if numToFetch == 0 {
return []*Message{}, currentUIDs, nil
}
// Create sequence set for fetching (1:numToFetch)
seqSet := imap.SeqSet{}
seqSet.AddRange(1, numToFetch)
// Track all sequence numbers (for sync we'll need to get UIDs later)
for i := uint32(1); i <= mbox.NumMessages; i++ {
currentUIDs[i] = true // Using sequence numbers for now
}
// Fetch message data - get envelope and full message body
options := &imap.FetchOptions{
Envelope: true,
UID: true,
BodySection: []*imap.FetchItemBodySection{
{}, // Empty section gets the entire message
},
}
fetchCmd := c.Fetch(seqSet, options)
for {
msg := fetchCmd.Next()
if msg == nil {
break
}
parsedMsg, err := c.parseMessage(msg)
if err != nil {
log.Printf("Failed to parse message: %v", err)
continue
}
// Apply message-level keyword filtering
if messageFilter != nil && !c.ShouldProcessMessage(parsedMsg, messageFilter) {
continue // Skip this message due to keyword filter
}
messages = append(messages, parsedMsg)
}
if err := fetchCmd.Close(); err != nil {
return nil, nil, fmt.Errorf("failed to fetch messages: %w", err)
}
return messages, currentUIDs, nil
}
// parseMessage parses an IMAP fetch response into our Message struct
func (c *ImapClient) parseMessage(fetchMsg *imapclient.FetchMessageData) (*Message, error) {
msg := &Message{
UID: fetchMsg.SeqNum, // Using sequence number for now
Headers: make(map[string][]string),
Attachments: []Attachment{},
}
// Collect all fetch data first
buffer, err := fetchMsg.Collect()
if err != nil {
return nil, fmt.Errorf("failed to collect fetch data: %w", err)
}
// Parse envelope for basic headers
if buffer.Envelope != nil {
env := buffer.Envelope
msg.Subject = env.Subject
msg.Date = env.Date
// Parse From addresses
for _, addr := range env.From {
if addr.Mailbox != "" {
fullAddr := addr.Mailbox
if addr.Host != "" {
fullAddr = addr.Mailbox + "@" + addr.Host
}
msg.From = append(msg.From, fullAddr)
}
}
// Parse To addresses
for _, addr := range env.To {
if addr.Mailbox != "" {
fullAddr := addr.Mailbox
if addr.Host != "" {
fullAddr = addr.Mailbox + "@" + addr.Host
}
msg.To = append(msg.To, fullAddr)
}
}
}
// Get UID if available
if buffer.UID != 0 {
msg.UID = uint32(buffer.UID)
}
// Parse full message content
if len(buffer.BodySection) > 0 {
bodyBuffer := buffer.BodySection[0]
reader := bytes.NewReader(bodyBuffer.Bytes)
// Parse the message using go-message
entity, err := message.Read(reader)
if err != nil {
return nil, fmt.Errorf("failed to parse message: %w", err)
}
// Extract headers
header := entity.Header
for field := header.Fields(); field.Next(); {
key := field.Key()
value, _ := field.Text()
msg.Headers[key] = append(msg.Headers[key], value)
}
// Parse message body and attachments
if err := c.parseMessageBody(entity, msg); err != nil {
return nil, fmt.Errorf("failed to parse message body: %w", err)
}
}
return msg, nil
}
// parseMessageBody extracts the body and attachments from a message entity
func (c *ImapClient) parseMessageBody(entity *message.Entity, msg *Message) error {
mediaType, _, err := entity.Header.ContentType()
if err != nil {
// Default to text/plain if no content type
mediaType = "text/plain"
}
if strings.HasPrefix(mediaType, "multipart/") {
// Handle multipart message
mr := entity.MultipartReader()
if mr == nil {
return fmt.Errorf("failed to create multipart reader")
}
for {
part, err := mr.NextPart()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("failed to read multipart: %w", err)
}
if err := c.parseMessagePart(part, msg); err != nil {
log.Printf("Failed to parse message part: %v", err)
// Continue processing other parts
}
}
} else {
// Handle single part message
if err := c.parseMessagePart(entity, msg); err != nil {
return err
}
}
return nil
}
// parseMessagePart processes a single message part (body or attachment)
func (c *ImapClient) parseMessagePart(entity *message.Entity, msg *Message) error {
mediaType, params, err := entity.Header.ContentType()
if err != nil {
mediaType = "text/plain"
}
// Get content disposition
disposition, dispositionParams, _ := entity.Header.ContentDisposition()
// Determine if this is an attachment
isAttachment := disposition == "attachment" ||
(disposition == "inline" && dispositionParams["filename"] != "") ||
params["name"] != ""
if isAttachment {
// Handle attachment
filename := dispositionParams["filename"]
if filename == "" {
filename = params["name"]
}
if filename == "" {
filename = "unnamed_attachment"
}
// Decode filename if needed
decoder := &mime.WordDecoder{}
filename, _ = decoder.DecodeHeader(filename)
// Read attachment content
content, err := io.ReadAll(entity.Body)
if err != nil {
return fmt.Errorf("failed to read attachment content: %w", err)
}
attachment := Attachment{
Filename: filename,
ContentType: mediaType,
Content: content,
}
msg.Attachments = append(msg.Attachments, attachment)
} else if strings.HasPrefix(mediaType, "text/") && msg.Body == "" {
// Handle text body (only take the first text part as body)
bodyBytes, err := io.ReadAll(entity.Body)
if err != nil {
return fmt.Errorf("failed to read message body: %w", err)
}
msg.Body = string(bodyBytes)
}
return nil
}
// ShouldProcessMailbox checks if a mailbox should be processed based on filters with wildcard support
func (c *ImapClient) ShouldProcessMailbox(mailbox string, filter *config.FolderFilter) bool {
// If include list is specified, mailbox must match at least one pattern
if len(filter.Include) > 0 {
found := false
for _, pattern := range filter.Include {
// Handle special case: "*" means include all folders
if pattern == "*" {
found = true
break
}
// Use filepath.Match for wildcard pattern matching
if matched, err := filepath.Match(pattern, mailbox); err == nil && matched {
found = true
break
}
// Also support exact string matching for backwards compatibility
if mailbox == pattern {
found = true
break
}
}
if !found {
return false
}
}
// If exclude list is specified, mailbox must not match any exclude pattern
for _, pattern := range filter.Exclude {
// Use filepath.Match for wildcard pattern matching
if matched, err := filepath.Match(pattern, mailbox); err == nil && matched {
return false
}
// Also support exact string matching for backwards compatibility
if mailbox == pattern {
return false
}
}
return true
}
// ShouldProcessMessage checks if a message should be processed based on keyword filters
func (c *ImapClient) ShouldProcessMessage(msg *Message, filter *config.MessageFilter) bool {
// Check subject keywords
if len(filter.SubjectKeywords) > 0 {
if !c.containsAnyKeyword(strings.ToLower(msg.Subject), filter.SubjectKeywords) {
return false
}
}
// Check sender keywords
if len(filter.SenderKeywords) > 0 {
senderMatch := false
for _, sender := range msg.From {
if c.containsAnyKeyword(strings.ToLower(sender), filter.SenderKeywords) {
senderMatch = true
break
}
}
if !senderMatch {
return false
}
}
// Check recipient keywords
if len(filter.RecipientKeywords) > 0 {
recipientMatch := false
for _, recipient := range msg.To {
if c.containsAnyKeyword(strings.ToLower(recipient), filter.RecipientKeywords) {
recipientMatch = true
break
}
}
if !recipientMatch {
return false
}
}
return true
}
// containsAnyKeyword checks if the text contains any of the specified keywords (case-insensitive)
func (c *ImapClient) containsAnyKeyword(text string, keywords []string) bool {
for _, keyword := range keywords {
if strings.Contains(text, strings.ToLower(keyword)) {
return true
}
}
return false
}
// Logout logs the client out
func (c *ImapClient) Logout() {
if err := c.Client.Logout(); err != nil {
log.Printf("Failed to logout: %v", err)
}
}