feat: implement comprehensive wildcard folder selection and keyword filtering

## Wildcard Folder Selection
- Add support for wildcard patterns (`*`, `?`, `[abc]`) using filepath.Match
- Implement special case: `"*"` selects ALL available folders
- Support for complex include/exclude pattern combinations
- Maintain backwards compatibility with exact string matching
- Enable subfolder pattern matching (e.g., `Work/*`, `*/Drafts`)

## Keyword Filtering
- Add SubjectKeywords, SenderKeywords, RecipientKeywords to MessageFilter config
- Implement case-insensitive keyword matching across message fields
- Support multiple keywords per filter type with inclusive OR logic
- Add ShouldProcessMessage method for message-level filtering

## Enhanced Test Environment
- Create comprehensive wildcard pattern test scenarios
- Add 12 test folders covering various pattern types: Work/*, Important/*, Archive/*, exact matches
- Implement dedicated wildcard test script (test-wildcard-patterns.sh)
- Update test configurations to demonstrate real-world wildcard usage patterns
- Enhance test data generation with folder-specific messages for validation

## Documentation
- Create FOLDER_PATTERNS.md with comprehensive wildcard examples and use cases
- Update CLAUDE.md to reflect all implemented features and current status
- Enhance test README with detailed wildcard pattern explanations
- Provide configuration examples for common email organization scenarios

## Message Origin Tracking
- Verify all messages in CouchDB properly tagged with origin folder in `mailbox` field
- Maintain per-account database isolation for better organization
- Document ID format: `{folder}_{uid}` ensures uniqueness across folders

Key patterns supported:
- `["*"]` - All folders (with excludes)
- `["Work*", "Important*"]` - Prefix matching
- `["Work/*", "Archive/*"]` - Subfolder patterns
- `["INBOX", "Sent"]` - Exact matches
- Complex include/exclude combinations

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Ole-Morten Duesund 2025-08-01 17:24:02 +02:00
commit 357cd06264
10 changed files with 602 additions and 84 deletions

View file

@ -39,11 +39,10 @@ type FolderFilter struct {
}
type MessageFilter struct {
Since string `json:"since,omitempty"`
// TODO: Add keyword filtering support
// SubjectKeywords []string `json:"subjectKeywords,omitempty"` // Filter by keywords in subject
// SenderKeywords []string `json:"senderKeywords,omitempty"` // Filter by keywords in sender addresses
// RecipientKeywords []string `json:"recipientKeywords,omitempty"` // Filter by keywords in recipient addresses
Since string `json:"since,omitempty"`
SubjectKeywords []string `json:"subjectKeywords,omitempty"` // Filter by keywords in subject
SenderKeywords []string `json:"senderKeywords,omitempty"` // Filter by keywords in sender addresses
RecipientKeywords []string `json:"recipientKeywords,omitempty"` // Filter by keywords in recipient addresses
}
func LoadConfig(path string) (*Config, error) {

View file

@ -6,6 +6,7 @@ import (
"io"
"log"
"mime"
"path/filepath"
"strings"
"time"
@ -82,10 +83,10 @@ func (c *ImapClient) ListMailboxes() ([]string, error) {
return mailboxes, nil
}
// GetMessages retrieves messages from a specific mailbox (simplified version)
// GetMessages retrieves messages from a specific mailbox with filtering support
// Returns messages and a map of all current UIDs in the mailbox
// maxMessages: 0 means no limit, > 0 limits the number of messages to fetch
func (c *ImapClient) GetMessages(mailbox string, since *time.Time, maxMessages int) ([]*Message, map[uint32]bool, error) {
func (c *ImapClient) GetMessages(mailbox string, since *time.Time, maxMessages int, messageFilter *config.MessageFilter) ([]*Message, map[uint32]bool, error) {
// Select the mailbox
mbox, err := c.Select(mailbox, nil).Wait()
if err != nil {
@ -142,6 +143,11 @@ func (c *ImapClient) GetMessages(mailbox string, since *time.Time, maxMessages i
continue
}
// Apply message-level keyword filtering
if messageFilter != nil && !c.ShouldProcessMessage(parsedMsg, messageFilter) {
continue // Skip this message due to keyword filter
}
messages = append(messages, parsedMsg)
}
@ -321,13 +327,24 @@ func (c *ImapClient) parseMessagePart(entity *message.Entity, msg *Message) erro
return nil
}
// ShouldProcessMailbox checks if a mailbox should be processed based on filters
// ShouldProcessMailbox checks if a mailbox should be processed based on filters with wildcard support
func (c *ImapClient) ShouldProcessMailbox(mailbox string, filter *config.FolderFilter) bool {
// If include list is specified, mailbox must be in it
// If include list is specified, mailbox must match at least one pattern
if len(filter.Include) > 0 {
found := false
for _, included := range filter.Include {
if mailbox == included {
for _, pattern := range filter.Include {
// Handle special case: "*" means include all folders
if pattern == "*" {
found = true
break
}
// Use filepath.Match for wildcard pattern matching
if matched, err := filepath.Match(pattern, mailbox); err == nil && matched {
found = true
break
}
// Also support exact string matching for backwards compatibility
if mailbox == pattern {
found = true
break
}
@ -337,9 +354,14 @@ func (c *ImapClient) ShouldProcessMailbox(mailbox string, filter *config.FolderF
}
}
// If exclude list is specified, mailbox must not be in it
for _, excluded := range filter.Exclude {
if mailbox == excluded {
// If exclude list is specified, mailbox must not match any exclude pattern
for _, pattern := range filter.Exclude {
// Use filepath.Match for wildcard pattern matching
if matched, err := filepath.Match(pattern, mailbox); err == nil && matched {
return false
}
// Also support exact string matching for backwards compatibility
if mailbox == pattern {
return false
}
}
@ -347,6 +369,56 @@ func (c *ImapClient) ShouldProcessMailbox(mailbox string, filter *config.FolderF
return true
}
// ShouldProcessMessage checks if a message should be processed based on keyword filters
func (c *ImapClient) ShouldProcessMessage(msg *Message, filter *config.MessageFilter) bool {
// Check subject keywords
if len(filter.SubjectKeywords) > 0 {
if !c.containsAnyKeyword(strings.ToLower(msg.Subject), filter.SubjectKeywords) {
return false
}
}
// Check sender keywords
if len(filter.SenderKeywords) > 0 {
senderMatch := false
for _, sender := range msg.From {
if c.containsAnyKeyword(strings.ToLower(sender), filter.SenderKeywords) {
senderMatch = true
break
}
}
if !senderMatch {
return false
}
}
// Check recipient keywords
if len(filter.RecipientKeywords) > 0 {
recipientMatch := false
for _, recipient := range msg.To {
if c.containsAnyKeyword(strings.ToLower(recipient), filter.RecipientKeywords) {
recipientMatch = true
break
}
}
if !recipientMatch {
return false
}
}
return true
}
// containsAnyKeyword checks if the text contains any of the specified keywords (case-insensitive)
func (c *ImapClient) containsAnyKeyword(text string, keywords []string) bool {
for _, keyword := range keywords {
if strings.Contains(text, strings.ToLower(keyword)) {
return true
}
}
return false
}
// Logout logs the client out
func (c *ImapClient) Logout() {
if err := c.Client.Logout(); err != nil {

View file

@ -98,7 +98,7 @@ func processImapSource(source *config.MailSource, couchClient *couch.Client, dbN
fmt.Printf(" Processing mailbox: %s (mode: %s)\n", mailbox, source.Mode)
// Retrieve messages from the mailbox
messages, currentUIDs, err := imapClient.GetMessages(mailbox, sinceDate, maxMessages)
messages, currentUIDs, err := imapClient.GetMessages(mailbox, sinceDate, maxMessages, &source.MessageFilter)
if err != nil {
log.Printf(" ERROR: Failed to get messages from %s: %v", mailbox, err)
continue