export const metadata = { name: "Fetch and Aggregate Sent Emails", description: "Fetch sent Gmail messages and aggregate patterns in one step. Outputs aggregated profile data directly without intermediate files for fast profile building.", packages: [], args: [ { name: "timeRange", type: "string", description: "Time range to analyze: 30d, 90d, 1y, or all", position: 0, default: "30d" }, { name: "maxResults", type: "number", description: "Maximum number of messages to fetch", position: 1, default: "500" } ] }; const [timeRange = "30d", maxResults = "500"] = process.argv.slice(2); const maxResultsNum = parseInt(maxResults); const BATCH_SIZE = 50; // Smaller batches to avoid timeouts // Convert time range to Gmail query format const timeRangeMap = { "30d": "newer_than:30d", "90d": "newer_than:90d", "1y": "newer_than:1y", "all": "" }; const timeQuery = timeRangeMap[timeRange] || timeRangeMap["30d"]; const searchQuery = timeQuery ? `in:sent ${timeQuery}` : "in:sent"; console.log(`Fetching sent messages with query: ${searchQuery}`); console.log(`Maximum results: ${maxResultsNum}`); try { // Step 1: Fetch list of sent message IDs const listRes = await fetch( `https://gmail.googleapis.com/gmail/v1/users/me/messages?maxResults=${maxResultsNum}&q=${encodeURIComponent(searchQuery)}`, { headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" } } ); if (!listRes.ok) { console.error(`Gmail API list failed: ${listRes.status}`); console.error(await listRes.text()); throw new Error(`Failed to list sent messages: ${listRes.status}`); } const listData = await listRes.json(); if (!listData.messages || listData.messages.length === 0) { console.log("No sent messages found in specified time range"); console.log(JSON.stringify({ error: "No sent messages found", timeRange: timeRange }, null, 2)); process.exit(0); } console.log(`Found ${listData.messages.length} message(s), fetching in batches of ${BATCH_SIZE}...`); const messages = []; const messageIds = listData.messages.map(m => m.id); // Step 2: Fetch messages in batches with parallel processing for (let i = 0; i < messageIds.length; i += BATCH_SIZE) { const batch = messageIds.slice(i, i + BATCH_SIZE); console.log(`Fetching batch ${Math.floor(i / BATCH_SIZE) + 1} (${batch.length} messages)...`); const batchResults = await Promise.all( batch.map(async (msgId) => { try { const detailRes = await fetch( `https://gmail.googleapis.com/gmail/v1/users/me/messages/${msgId}?format=metadata&metadataHeaders=Subject&metadataHeaders=To&metadataHeaders=Cc&metadataHeaders=Date`, { headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" } } ); if (!detailRes.ok) { console.error(`Failed to fetch message ${msgId}: ${detailRes.status}`); return null; } const detail = await detailRes.json(); const getHeader = (name) => { const header = detail.payload.headers.find(h => h.name.toLowerCase() === name.toLowerCase()); return header ? header.value : ""; }; return { id: detail.id, threadId: detail.threadId, subject: getHeader("Subject"), to: getHeader("To"), cc: getHeader("Cc"), snippet: detail.snippet, date: getHeader("Date"), internalDate: detail.internalDate, labelIds: detail.labelIds || [] }; } catch (error) { console.error(`Error fetching message ${msgId}:`, error.message); return null; } }) ); const validMessages = batchResults.filter(m => m !== null); messages.push(...validMessages); console.log(`✓ Batch complete: ${validMessages.length}/${batch.length} messages fetched (total: ${messages.length}/${messageIds.length})`); // Early stop if we have enough data for pattern analysis (300+ messages) if (messages.length >= 300 && i + BATCH_SIZE < messageIds.length) { console.log(`✓ Collected ${messages.length} messages - sufficient for pattern analysis, stopping early`); break; } } console.log(`✓ Fetched ${messages.length} sent messages successfully`); // Step 3: Filter cold/marketing emails for personality analysis const coldEmailPatterns = [ /unsubscribe/i, /opt.out/i, /click here/i, /view in browser/i, /follow us on/i, /\[BULK\]/i, /\[MARKETING\]/i, /newsletter/i, /promotional/i ]; const isColdEmail = (email) => { const text = `${email.subject || ''} ${email.snippet || ''}`.toLowerCase(); return coldEmailPatterns.some(pattern => pattern.test(text)); }; const personalEmails = messages.filter(e => !isColdEmail(e)); const coldEmailCount = messages.length - personalEmails.length; if (coldEmailCount > 0) { console.log(`Filtered out ${coldEmailCount} cold/marketing emails (${(coldEmailCount / messages.length * 100).toFixed(1)}%), analyzing ${personalEmails.length} personal emails`); } const analysisEmails = personalEmails.length > 0 ? personalEmails : messages; console.log(`Aggregating ${analysisEmails.length} emails...`); // Step 4: Aggregate patterns // Time range analysis const dates = analysisEmails.map(e => parseInt(e.internalDate)).filter(d => !isNaN(d)); const timeRange = { start: dates.length > 0 ? new Date(Math.min(...dates)).toISOString() : null, end: dates.length > 0 ? new Date(Math.max(...dates)).toISOString() : null, totalCount: messages.length, personalCount: analysisEmails.length, coldEmailsFiltered: coldEmailCount }; // Recipient analysis const recipientCounts = {}; const domainCounts = {}; let internalCount = 0; let externalCount = 0; analysisEmails.forEach(email => { const recipients = []; if (email.to) recipients.push(...email.to.split(',').map(r => r.trim())); if (email.cc) recipients.push(...email.cc.split(',').map(r => r.trim())); recipients.forEach(recipient => { const emailMatch = recipient.match(/<([^>]+)>/) || [null, recipient]; const emailAddr = emailMatch[1] || recipient; recipientCounts[emailAddr] = (recipientCounts[emailAddr] || 0) + 1; const domain = emailAddr.split('@')[1] || 'unknown'; domainCounts[domain] = (domainCounts[domain] || 0) + 1; if (domain.includes('company') || domain.includes('corp') || domain.includes('.internal')) { internalCount++; } else { externalCount++; } }); }); const topRecipients = Object.entries(recipientCounts) .sort((a, b) => b[1] - a[1]) .slice(0, 10) .map(([email, count]) => ({ email, count })); const topDomains = Object.entries(domainCounts) .sort((a, b) => b[1] - a[1]) .slice(0, 10) .map(([domain, count]) => ({ domain, count })); const recipientStats = { topRecipients, topDomains, uniqueRecipients: Object.keys(recipientCounts).length, internalVsExternal: { internal: internalCount, external: externalCount, ratio: internalCount > 0 ? (externalCount / internalCount).toFixed(2) : 'N/A' } }; // Topic clustering const subjectWords = {}; const threadCounts = {}; analysisEmails.forEach(email => { threadCounts[email.threadId] = (threadCounts[email.threadId] || 0) + 1; const subject = email.subject || ''; const words = subject.toLowerCase() .replace(/re:|fwd:|fw:/gi, '') .split(/\s+/) .filter(w => w.length > 3 && !['the', 'and', 'for', 'with', 'from'].includes(w)); words.forEach(word => { subjectWords[word] = (subjectWords[word] || 0) + 1; }); }); const topTopics = Object.entries(subjectWords) .sort((a, b) => b[1] - a[1]) .slice(0, 15) .map(([word, count]) => ({ keyword: word, count })); // Time pattern analysis let morningCount = 0; let afternoonCount = 0; let eveningCount = 0; let nightCount = 0; let weekendCount = 0; analysisEmails.forEach(email => { const date = new Date(parseInt(email.internalDate)); const hour = date.getHours(); const day = date.getDay(); if (day === 0 || day === 6) weekendCount++; if (hour >= 6 && hour < 12) morningCount++; else if (hour >= 12 && hour < 18) afternoonCount++; else if (hour >= 18 && hour < 22) eveningCount++; else nightCount++; }); const timePatterns = { timeBuckets: { morning: morningCount, afternoon: afternoonCount, evening: eveningCount, night: nightCount }, weekendRatio: (weekendCount / analysisEmails.length * 100).toFixed(1) + '%' }; // Length patterns const lengths = analysisEmails.map(e => (e.snippet || '').length); const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length; const lengthBuckets = { brief: lengths.filter(l => l < 100).length, standard: lengths.filter(l => l >= 100 && l < 200).length, detailed: lengths.filter(l => l >= 200 && l < 300).length, long: lengths.filter(l => l >= 300).length }; const lengthPatterns = { avgSnippetLength: Math.round(avgLength), distribution: lengthBuckets }; // Phrase analysis const openers = {}; const commonWords = {}; analysisEmails.forEach(email => { const snippet = email.snippet || ''; const words = snippet.toLowerCase().split(/\s+/); if (words.length >= 2) { const opener = words.slice(0, 2).join(' '); openers[opener] = (openers[opener] || 0) + 1; } words.forEach(word => { if (word.length > 3) { commonWords[word] = (commonWords[word] || 0) + 1; } }); }); const topOpeners = Object.entries(openers) .sort((a, b) => b[1] - a[1]) .slice(0, 10) .map(([phrase, count]) => ({ phrase, count })); const topWords = Object.entries(commonWords) .sort((a, b) => b[1] - a[1]) .slice(0, 20) .map(([word, count]) => ({ word, count })); const phraseAnalysis = { topOpeners, topWords }; // Communication stats const threadDepths = Object.values(threadCounts); const avgThreadDepth = threadDepths.reduce((a, b) => a + b, 0) / threadDepths.length; const communicationStats = { totalThreads: Object.keys(threadCounts).length, avgThreadDepth: avgThreadDepth.toFixed(1), singleEmailThreads: threadDepths.filter(d => d === 1).length, multiEmailThreads: threadDepths.filter(d => d > 1).length }; // Project signals const projectKeywords = ['project', 'meeting', 'deadline', 'review', 'deliverable', 'presentation', 'report']; const projectEmails = analysisEmails.filter(e => projectKeywords.some(kw => (e.subject || '').toLowerCase().includes(kw) || (e.snippet || '').toLowerCase().includes(kw)) ); const projectSignals = { projectRelatedCount: projectEmails.length, projectRatio: (projectEmails.length / analysisEmails.length * 100).toFixed(1) + '%', keywordMatches: projectKeywords.map(kw => ({ keyword: kw, count: analysisEmails.filter(e => (e.subject || '').toLowerCase().includes(kw) || (e.snippet || '').toLowerCase().includes(kw)).length })).filter(m => m.count > 0) }; // Step 5: Output aggregated data const aggregates = { timeRange, recipientStats, topics: topTopics, timePatterns, lengthPatterns, phraseAnalysis, communicationStats, projectSignals }; console.log(`✓ Aggregation complete`); console.log(JSON.stringify(aggregates, null, 2)); } catch (error) { console.error("Error in fetch and aggregate:", error.message); throw error; }