mail2couch/scripts/validate-schemas.py

169 lines
5.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Schema Validation Script for mail2couch
This script validates that the CouchDB document schemas are consistent
between the Go implementation and the documented JSON examples.
"""
import json
import sys
from pathlib import Path
def load_json_file(file_path):
"""Load and parse a JSON file."""
try:
with open(file_path, 'r') as f:
return json.load(f)
except FileNotFoundError:
print(f"ERROR: File not found: {file_path}")
return None
except json.JSONDecodeError as e:
print(f"ERROR: Invalid JSON in {file_path}: {e}")
return None
def validate_mail_document(doc, filename):
"""Validate a mail document structure."""
required_fields = [
'_id', 'sourceUid', 'mailbox', 'from', 'to', 'subject',
'date', 'body', 'headers', 'storedAt', 'docType', 'hasAttachments'
]
errors = []
# Check required fields
for field in required_fields:
if field not in doc:
errors.append(f"Missing required field: {field}")
# Check field types
if 'docType' in doc and doc['docType'] != 'mail':
errors.append(f"Invalid docType: expected 'mail', got '{doc['docType']}'")
if 'from' in doc and not isinstance(doc['from'], list):
errors.append("Field 'from' must be an array")
if 'to' in doc and not isinstance(doc['to'], list):
errors.append("Field 'to' must be an array")
if 'headers' in doc and not isinstance(doc['headers'], dict):
errors.append("Field 'headers' must be an object")
if 'hasAttachments' in doc and not isinstance(doc['hasAttachments'], bool):
errors.append("Field 'hasAttachments' must be a boolean")
# Check _id format
if '_id' in doc:
doc_id = doc['_id']
if '_' not in doc_id:
errors.append(f"Document ID '{doc_id}' should follow format 'mailbox_uid'")
# Validate attachments if present
if '_attachments' in doc:
if not isinstance(doc['_attachments'], dict):
errors.append("Field '_attachments' must be an object")
else:
for filename, stub in doc['_attachments'].items():
if 'content_type' not in stub:
errors.append(f"Attachment '{filename}' missing content_type")
if errors:
print(f"ERRORS in {filename}:")
for error in errors:
print(f" - {error}")
return False
else:
print(f"{filename}: Valid mail document")
return True
def validate_sync_metadata(doc, filename):
"""Validate a sync metadata document structure."""
required_fields = [
'_id', 'docType', 'mailbox', 'lastSyncTime',
'lastMessageUID', 'messageCount', 'updatedAt'
]
errors = []
# Check required fields
for field in required_fields:
if field not in doc:
errors.append(f"Missing required field: {field}")
# Check field types
if 'docType' in doc and doc['docType'] != 'sync_metadata':
errors.append(f"Invalid docType: expected 'sync_metadata', got '{doc['docType']}'")
if 'lastMessageUID' in doc and not isinstance(doc['lastMessageUID'], int):
errors.append("Field 'lastMessageUID' must be an integer")
if 'messageCount' in doc and not isinstance(doc['messageCount'], int):
errors.append("Field 'messageCount' must be an integer")
# Check _id format
if '_id' in doc:
doc_id = doc['_id']
if not doc_id.startswith('sync_metadata_'):
errors.append(f"Document ID '{doc_id}' should start with 'sync_metadata_'")
if errors:
print(f"ERRORS in {filename}:")
for error in errors:
print(f" - {error}")
return False
else:
print(f"{filename}: Valid sync metadata document")
return True
def main():
"""Main validation function."""
script_dir = Path(__file__).parent
project_root = script_dir.parent
examples_dir = project_root / "examples"
print("Validating CouchDB document schemas...")
print("=" * 50)
all_valid = True
# Validate mail documents
mail_files = [
"sample-mail-document.json",
"simple-mail-document.json"
]
for filename in mail_files:
file_path = examples_dir / filename
doc = load_json_file(file_path)
if doc is None:
all_valid = False
continue
if not validate_mail_document(doc, filename):
all_valid = False
# Validate sync metadata
sync_files = [
"sample-sync-metadata.json"
]
for filename in sync_files:
file_path = examples_dir / filename
doc = load_json_file(file_path)
if doc is None:
all_valid = False
continue
if not validate_sync_metadata(doc, filename):
all_valid = False
print("=" * 50)
if all_valid:
print("✓ All schemas are valid!")
sys.exit(0)
else:
print("✗ Schema validation failed!")
sys.exit(1)
if __name__ == "__main__":
main()