Connect to Gmail
Collect inbox emails (90 days), sent emails (6 months), and run targeted discovery searches for profile analysis
Source Code
import fs from "fs";
import path from "path";
const [profileOutputPath, writingSamplesOutputPath] = process.argv.slice(2);
if (!profileOutputPath || !writingSamplesOutputPath) {
console.error("Usage: gmail.inbox.connect <profileOutputPath> <writingSamplesOutputPath>");
console.error("Both output paths are required.");
process.exit(1);
}
const INBOX_MAX_MESSAGES = 75;
const SENT_MAX_MESSAGES = 50;
const DISCOVERY_MAX_PER_QUERY = 15;
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const formatDate = (d) =>
d.toLocaleDateString("en-US", { month: "short", day: "numeric" });
/**
* Format date as relative time ago
*/
function formatTimeAgo(date) {
if (!date || isNaN(date.getTime())) return "unknown";
const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
if (seconds < 60) return "just now";
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return `${minutes}m ago`;
const hours = Math.floor(minutes / 60);
if (hours < 24) return `${hours}h ago`;
const days = Math.floor(hours / 24);
if (days < 7) return `${days}d ago`;
if (days < 30) return `${Math.floor(days / 7)}w ago`;
if (days < 365) return `${Math.floor(days / 30)}mo ago`;
const years = Math.floor(days / 365);
return `${years}y ago`;
}
// Targeted discovery searches organized by confidence tier
// Tier 1: High-confidence personal facts (user wrote about themselves or verification emails)
// Tier 2: Tool usage signals (high signal for understanding user's workflow)
// Tier 3: Professional interests (conferences, newsletters, certifications)
// Tier 4: Infrastructure signals (for tech users)
const DISCOVERY_QUERIES = [
// === TIER 1: High-confidence personal facts ===
// User writes about themselves in sent mail
{ tier: 1, category: "children", query: 'in:sent ("my son" OR "my daughter" OR "my kids" OR "the kids")' },
{ tier: 1, category: "partner", query: 'in:sent ("my husband" OR "my wife" OR "my partner" OR "my boyfriend" OR "my girlfriend") -"business partner" -"design partner" -"co-founder"' },
{ tier: 1, category: "pets", query: 'in:sent ("my dog" OR "my cat" OR "our dog" OR "our cat" OR "our pet")' },
{ tier: 1, category: "phone_numbers", query: 'in:sent ("my number is" OR "my cell is" OR "call me at" OR "text me at" OR "reach me at")' },
{ tier: 1, category: "birthday", query: 'to:me subject:"happy birthday"' },
// Location - require actual shipping services, not promos
{ tier: 1, category: "location", query: 'from:(amazon OR fedex OR ups OR usps OR dhl) ("delivered to" OR "shipping address" OR "shipped to")' },
// Communication - verification emails prove account ownership
{ tier: 1, category: "whatsapp", query: 'from:whatsapp ("verification code" OR "your code" OR "new device")' },
{ tier: 1, category: "signal_app", query: 'from:signal ("verification" OR "code")' },
{ tier: 1, category: "telegram", query: 'from:telegram ("code" OR "verification")' },
{ tier: 1, category: "slack", query: 'from:slack ("verification code" OR "sign in code" OR "confirmation code")' },
// === TIER 2: Tool usage (HIGH SIGNAL for workflow understanding) ===
// SaaS trials show active tool exploration
{ tier: 2, category: "saas_trials", query: 'subject:("trial started" OR "trial ending" OR "days left" OR "trial expires")' },
// Receipts show what user actually pays for
{ tier: 2, category: "receipts", query: 'from:(stripe OR paypal) subject:("receipt" OR "payment" OR "invoice")' },
// Project management tools show active usage
{ tier: 2, category: "project_mgmt", query: 'from:(linear OR jira OR asana OR notion OR monday) subject:("assigned" OR "mentioned" OR "commented")' },
// Password manager shows security posture
{ tier: 2, category: "password_mgr", query: 'from:(1password OR lastpass OR bitwarden OR dashlane) subject:("security" OR "new device" OR "sign in")' },
// Video platform preference from user's sent calendly/scheduling links
{ tier: 2, category: "video_platform", query: 'in:sent ("calendly.com" OR "cal.com") ("zoom" OR "meet.google" OR "teams")' },
// Subscriptions - existing query, good signal
{ tier: 2, category: "subscriptions", query: 'subject:("subscription confirmed" OR "you subscribed" OR "thanks for subscribing" OR "welcome to")' },
// === TIER 3: Professional interests ===
// Conference registrations show professional interests
{ tier: 3, category: "conferences", query: 'subject:("ticket confirmation" OR "registration confirmed" OR "event registration") -meeting -calendar' },
// Newsletter subscriptions show content interests
{ tier: 3, category: "newsletters", query: 'from:substack OR subject:("you subscribed" "newsletter")' },
// Certifications show skills and learning focus
{ tier: 3, category: "certifications", query: 'subject:("certificate" OR "certification" OR "completed course" OR "course complete")' },
// Social media - account existence signals
{ tier: 3, category: "instagram", query: "from:instagram" },
{ tier: 3, category: "linkedin", query: "from:linkedin" },
{ tier: 3, category: "twitter", query: "from:twitter OR from:x.com" },
{ tier: 3, category: "github", query: "from:github" },
// === TIER 4: Infrastructure signals (for tech users) ===
// Hosting/deployment shows side projects
{ tier: 4, category: "hosting", query: 'from:(vercel OR netlify OR heroku OR railway OR render) subject:("deployed" OR "deployment" OR "build")' },
// Domain ownership shows side projects/businesses
{ tier: 4, category: "domains", query: 'from:(namecheap OR godaddy OR cloudflare OR porkbun) subject:("renewal" OR "registered" OR "expiring")' },
// Cloud storage patterns
{ tier: 4, category: "cloud_storage", query: 'in:sent ("dropbox.com/s/" OR "drive.google.com/file" OR "notion.so/")' },
// === TIER 5: Lifestyle signals ===
// Commerce
{ tier: 5, category: "amazon", query: 'from:amazon "your order"' },
// Financial (institution names only, never account numbers)
{ tier: 5, category: "banking", query: '(from:chase OR from:wellsfargo OR from:bankofamerica OR from:citi OR from:capitalone) subject:statement' },
{ tier: 5, category: "investments", query: "from:fidelity OR from:schwab OR from:vanguard OR from:robinhood" },
// Travel
{ tier: 5, category: "travel", query: 'subject:("booking confirmed" OR "reservation confirmed" OR "flight confirmation" OR "itinerary") (from:airline OR from:hotel OR from:airbnb OR from:expedia OR from:booking)' },
// Health
{ tier: 5, category: "health", query: 'subject:("appointment confirmed" OR "appointment reminder" OR "your visit") (from:doctor OR from:health OR from:medical OR from:pharmacy)' },
// Entertainment
{ tier: 5, category: "spotify", query: "from:spotify" },
{ tier: 5, category: "netflix", query: "from:netflix" },
{ tier: 5, category: "discord", query: 'from:discord subject:("joined" OR "invited")' },
];
console.log("Collecting Gmail data (inbox: 90 days, sent: 6 months)...");
console.log(` Inbox: up to ${INBOX_MAX_MESSAGES} messages`);
console.log(` Sent: up to ${SENT_MAX_MESSAGES} messages`);
/**
* Fetch message IDs matching a query
*/
async function fetchMessageIds(query, maxResults) {
const ids = [];
let pageToken = null;
while (ids.length < maxResults) {
const remaining = maxResults - ids.length;
const pageSize = Math.min(remaining, 100);
const url = new URL(
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
);
url.searchParams.set("maxResults", pageSize.toString());
if (query) url.searchParams.set("q", query);
if (pageToken) url.searchParams.set("pageToken", pageToken);
const res = await fetch(url.toString(), {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
const text = await res.text();
if (!res.ok) {
throw new Error(`Gmail API failed: ${res.status} - ${text}`);
}
let data;
try {
data = JSON.parse(text);
} catch (e) {
throw new Error(`Gmail API returned invalid JSON: ${text.slice(0, 200)}`);
}
if (!data.messages || data.messages.length === 0) break;
ids.push(...data.messages.map((m) => m.id).slice(0, remaining));
pageToken = data.nextPageToken;
if (!pageToken) break;
}
return ids;
}
/**
* Fetch message details with parallel requests
*/
async function fetchMessages(messageIds, format = "metadata") {
const CONCURRENCY = 25;
const results = [];
for (let i = 0; i < messageIds.length; i += CONCURRENCY) {
const batch = messageIds.slice(i, i + CONCURRENCY);
const fetched = await Promise.all(
batch.map(async (id) => {
let url = `https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=${format}`;
if (format === "metadata") {
url +=
"&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc";
}
const res = await fetch(url, {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
if (!res.ok) return null;
try {
return await res.json();
} catch {
return null; // Invalid JSON, skip this message
}
})
);
results.push(...fetched.filter(Boolean));
console.log(
` Fetched ${Math.min(i + CONCURRENCY, messageIds.length)}/${
messageIds.length
}...`
);
}
return results;
}
/**
* Extract email address from header
*/
function extractEmail(header) {
if (!header) return "unknown";
const match = header.match(/<([^>]+)>/);
return match ? match[1].toLowerCase() : header.toLowerCase().trim();
}
/**
* Extract name from header
*/
function extractName(header) {
if (!header) return "Unknown";
const match = header.match(/^([^<]+)</);
if (match) return match[1].trim().replace(/"/g, "");
return header.split("@")[0];
}
/**
* Get header value from message
*/
function getHeader(msg, name) {
const header = msg.payload?.headers?.find(
(h) => h.name.toLowerCase() === name.toLowerCase()
);
return header ? header.value : "";
}
/**
* Extract plain text body from Gmail message payload
*/
function extractBodyText(payload) {
if (!payload) return "";
if (payload.body?.data) {
try {
return Buffer.from(payload.body.data, "base64").toString("utf-8");
} catch {
return "";
}
}
if (payload.parts) {
for (const part of payload.parts) {
if (part.mimeType === "text/plain" && part.body?.data) {
try {
return Buffer.from(part.body.data, "base64").toString("utf-8");
} catch {
continue;
}
}
if (part.parts) {
for (const nested of part.parts) {
if (nested.mimeType === "text/plain" && nested.body?.data) {
try {
return Buffer.from(nested.body.data, "base64").toString("utf-8");
} catch {
continue;
}
}
}
}
}
}
return "";
}
try {
// Fetch inbox and sent message IDs in parallel
console.log("\nPhase 1: Listing message IDs...");
const [inboxIds, sentIds] = await Promise.all([
fetchMessageIds("newer_than:90d -in:sent -category:promotions -category:updates -category:social -category:forums", INBOX_MAX_MESSAGES),
fetchMessageIds("in:sent newer_than:180d", SENT_MAX_MESSAGES), // 6 months for writing samples
]);
console.log(` Inbox: ${inboxIds.length} messages`);
console.log(` Sent: ${sentIds.length} messages`);
if (inboxIds.length === 0 && sentIds.length === 0) {
console.error("\n✗ No messages found in the last 90 days.");
console.log(
JSON.stringify({
success: false,
error: "no_messages_found",
})
);
process.exit(1);
}
// Fetch message details in parallel
console.log("\nPhase 2: Fetching message details...");
const [inboxDetails, sentDetails] = await Promise.all([
fetchMessages(inboxIds, "metadata"),
fetchMessages(sentIds, "full"), // Full for writing samples
]);
console.log(` Inbox: ${inboxDetails.length} fetched`);
console.log(` Sent: ${sentDetails.length} fetched`);
// Phase 3: Discovery searches
console.log("\nPhase 3: Running discovery searches...");
const discoveryResults = await Promise.all(
DISCOVERY_QUERIES.map(async ({ tier, category, query }) => {
try {
const ids = await fetchMessageIds(query, DISCOVERY_MAX_PER_QUERY);
if (ids.length === 0) {
return { tier, category, query, count: 0, emails: [] };
}
const messages = await fetchMessages(ids, "metadata");
return {
tier,
category,
query,
count: messages.length,
emails: messages.map((m) => {
const dateStr = getHeader(m, "Date");
return {
id: m.id,
threadId: m.threadId,
from: getHeader(m, "From"),
to: getHeader(m, "To"),
subject: getHeader(m, "Subject"),
snippet: m.snippet,
timeAgo: formatTimeAgo(new Date(dateStr)),
};
}),
};
} catch (err) {
console.log(` Warning: ${category} search failed: ${err.message}`);
return { tier, category, query, count: 0, emails: [], error: err.message };
}
})
);
const discoveryWithResults = discoveryResults.filter((r) => r.count > 0);
console.log(
` Discovery: ${discoveryWithResults.length}/${DISCOVERY_QUERIES.length} categories found`
);
for (const r of discoveryWithResults) {
console.log(` - ${r.category}: ${r.count} emails`);
}
// Process inbox messages - track enhanced contact data
const inboxMessages = [];
const contactsReceived = new Map(); // email -> { name, count, dates, threadIds }
const contactsSent = new Map(); // email -> { name, count, dates, threadIds }
const labelCounts = {};
const dateVolume = {};
for (const msg of inboxDetails) {
const from = getHeader(msg, "From");
const to = getHeader(msg, "To");
const date = getHeader(msg, "Date");
const subject = getHeader(msg, "Subject");
const senderEmail = extractEmail(from);
const senderName = extractName(from);
const msgDate = date ? new Date(date) : null;
// Track contacts who email the user with enhanced data
if (!contactsReceived.has(senderEmail)) {
contactsReceived.set(senderEmail, {
name: senderName,
count: 0,
dates: [],
threadIds: new Set(),
});
}
const receivedContact = contactsReceived.get(senderEmail);
receivedContact.count++;
if (msgDate && !isNaN(msgDate.getTime())) {
receivedContact.dates.push(msgDate);
}
if (msg.threadId) {
receivedContact.threadIds.add(msg.threadId);
}
// Track labels
for (const label of msg.labelIds || []) {
labelCounts[label] = (labelCounts[label] || 0) + 1;
}
// Track volume by date
if (date) {
const d = new Date(date);
if (!isNaN(d.getTime())) {
const dateKey = d.toISOString().split("T")[0];
dateVolume[dateKey] = (dateVolume[dateKey] || 0) + 1;
}
}
inboxMessages.push({
id: msg.id,
threadId: msg.threadId,
from: from,
to: to,
subject: subject,
date: date,
snippet: msg.snippet,
labelIds: msg.labelIds || [],
});
}
// Process sent messages for contacts and writing samples
const writingSamples = [];
for (const msg of sentDetails) {
const to = getHeader(msg, "To");
const cc = getHeader(msg, "Cc");
const date = getHeader(msg, "Date");
const subject = getHeader(msg, "Subject");
const msgDate = date ? new Date(date) : null;
// Track contacts the user emails with enhanced data
const recipients = [to, cc]
.filter(Boolean)
.join(",")
.split(",")
.map((r) => r.trim())
.filter(Boolean);
for (const recipient of recipients) {
const email = extractEmail(recipient);
const name = extractName(recipient);
if (!contactsSent.has(email)) {
contactsSent.set(email, {
name: name,
count: 0,
dates: [],
threadIds: new Set(),
});
}
const sentContact = contactsSent.get(email);
sentContact.count++;
if (msgDate && !isNaN(msgDate.getTime())) {
sentContact.dates.push(msgDate);
}
if (msg.threadId) {
sentContact.threadIds.add(msg.threadId);
}
}
// Extract body for writing samples
const bodyText = extractBodyText(msg.payload);
if (bodyText && bodyText.trim().length >= 50) {
writingSamples.push({
text: bodyText,
metadata: {
id: msg.id,
date: date || new Date().toISOString(),
subject: subject || "",
},
});
}
}
// Build contacts with bidirectional signals and enhanced metrics
const allContacts = new Map();
for (const [email, data] of contactsReceived) {
if (!allContacts.has(email)) {
allContacts.set(email, {
email,
name: data.name,
receivedFrom: 0,
sentTo: 0,
dates: [],
threadIds: new Set(),
});
}
const contact = allContacts.get(email);
contact.receivedFrom = data.count;
contact.dates.push(...data.dates);
data.threadIds.forEach((t) => contact.threadIds.add(t));
}
for (const [email, data] of contactsSent) {
if (!allContacts.has(email)) {
allContacts.set(email, {
email,
name: data.name,
receivedFrom: 0,
sentTo: 0,
dates: [],
threadIds: new Set(),
});
}
const contact = allContacts.get(email);
contact.sentTo = data.count;
contact.dates.push(...data.dates);
data.threadIds.forEach((t) => contact.threadIds.add(t));
// Prefer name from sent (more likely to be accurate)
if (data.name && data.name !== "Unknown") {
contact.name = data.name;
}
}
/**
* Enhanced contact scoring with time span, recency, and conversation depth
*/
const scoreContact = (contact) => {
const now = Date.now();
let score = 0;
const breakdown = {
volume: 0,
bidirectional: 0,
timeSpan: 0,
recency: 0,
depth: 0,
};
// 1. Volume score (max 30 points)
breakdown.volume = Math.min(30, contact.totalInteractions * 3);
score += breakdown.volume;
// 2. Bidirectional bonus (20 points)
if (contact.bidirectional) {
breakdown.bidirectional = 20;
score += 20;
}
// 3. Time span factor (+15 sustained, -15 burst)
if (contact.spanDays > 30 && contact.totalInteractions >= 3) {
breakdown.timeSpan = 15; // Sustained relationship
score += 15;
} else if (contact.spanDays < 7 && contact.totalInteractions <= 4) {
breakdown.timeSpan = -15; // Recent burst - likely one-off
score -= 15;
}
// 4. Recency decay (max 20 points)
breakdown.recency = Math.max(0, 20 - Math.floor(contact.daysSinceLastContact / 5));
score += breakdown.recency;
// 5. Conversation depth (max 15 points)
if (contact.avgMessagesPerThread > 3) {
breakdown.depth += 10;
score += 10;
}
if (contact.longestThread > 5) {
breakdown.depth += 5;
score += 5;
}
// Penalize obvious noise addresses
const email = contact.email.toLowerCase();
const noisePatterns = [
"noreply", "no-reply", "notifications", "mailer-daemon",
"postmaster", "donotreply", "automated", "newsletter"
];
if (noisePatterns.some((p) => email.includes(p))) {
score -= 100;
}
// Determine confidence tier
let confidence = "low";
if (contact.totalInteractions >= 6 && contact.spanDays > 14) {
confidence = "high";
} else if (contact.totalInteractions >= 3) {
confidence = "medium";
}
return { score, confidence, breakdown };
};
// Build contacts with enhanced metrics and scores
const contacts = [...allContacts.values()]
.map((c) => {
const totalInteractions = c.receivedFrom + c.sentTo;
const bidirectional = c.receivedFrom > 0 && c.sentTo > 0;
// Calculate time-based metrics
const sortedDates = c.dates.sort((a, b) => a.getTime() - b.getTime());
const firstContactDate = sortedDates.length > 0 ? sortedDates[0] : null;
const lastContactDate = sortedDates.length > 0 ? sortedDates[sortedDates.length - 1] : null;
const spanDays = firstContactDate && lastContactDate
? Math.floor((lastContactDate.getTime() - firstContactDate.getTime()) / (1000 * 60 * 60 * 24))
: 0;
const daysSinceLastContact = lastContactDate
? Math.floor((Date.now() - lastContactDate.getTime()) / (1000 * 60 * 60 * 24))
: 999;
// Calculate thread depth metrics
const uniqueThreads = c.threadIds.size;
const avgMessagesPerThread = uniqueThreads > 0 ? totalInteractions / uniqueThreads : totalInteractions;
// Count messages per thread to find longest
const threadCounts = new Map();
// Note: We track thread IDs but not per-message counts, so use avgMessagesPerThread as approximation
const longestThread = Math.ceil(avgMessagesPerThread);
return {
email: c.email,
name: c.name,
receivedFrom: c.receivedFrom,
sentTo: c.sentTo,
totalInteractions,
bidirectional,
// Time data
firstContactDate: firstContactDate ? firstContactDate.toISOString() : null,
lastContactDate: lastContactDate ? lastContactDate.toISOString() : null,
spanDays,
daysSinceLastContact,
// Thread depth
uniqueThreads,
avgMessagesPerThread: Math.round(avgMessagesPerThread * 10) / 10,
longestThread,
};
})
.map((c) => {
const { score, confidence, breakdown } = scoreContact(c);
return { ...c, signalScore: score, confidence, breakdown };
})
.filter((c) => c.signalScore >= 0) // Remove obvious noise
.sort((a, b) => b.signalScore - a.signalScore || b.totalInteractions - a.totalInteractions);
// Compute date range
const dates = Object.keys(dateVolume).sort();
const dateRange =
dates.length > 0
? { oldest: dates[0], newest: dates[dates.length - 1] }
: null;
// Ensure output directories exist
const profileDir = path.dirname(profileOutputPath);
const samplesDir = path.dirname(writingSamplesOutputPath);
if (profileDir && profileDir !== ".") fs.mkdirSync(profileDir, { recursive: true });
if (samplesDir && samplesDir !== ".") fs.mkdirSync(samplesDir, { recursive: true });
// Compute quantitative patterns for wow moments
const highConfidenceContacts = contacts.filter((c) => c.confidence === "high");
const multiTurnContacts = contacts.filter((c) => c.avgMessagesPerThread > 3);
const singleExchangeContacts = contacts.filter((c) => c.uniqueThreads === 1 && c.totalInteractions <= 2);
const longestRelationship = contacts.reduce(
(max, c) => (c.spanDays > (max?.spanDays || 0) ? c : max),
null
);
const newestContact = contacts.reduce(
(newest, c) => {
if (!c.firstContactDate) return newest;
const cDate = new Date(c.firstContactDate);
const newestDate = newest?.firstContactDate ? new Date(newest.firstContactDate) : new Date(0);
return cDate > newestDate ? c : newest;
},
null
);
const deepestConversation = contacts.reduce(
(max, c) => (c.longestThread > (max?.longestThread || 0) ? c : max),
null
);
// Write profile data
const profileData = {
period: `${formatDate(ninetyDaysAgo)} - ${formatDate(new Date())}`,
summary: {
inboxMessages: inboxMessages.length,
sentMessages: sentDetails.length,
uniqueContacts: contacts.length,
bidirectionalContacts: contacts.filter((c) => c.bidirectional).length,
highConfidenceContacts: highConfidenceContacts.length,
discoveryCategories: discoveryWithResults.length,
},
// Quantitative patterns for wow moments
patterns: {
// Conversation depth
multiTurnContacts: multiTurnContacts.length,
singleExchangeContacts: singleExchangeContacts.length,
deepestConversation: deepestConversation ? {
contact: deepestConversation.name || deepestConversation.email,
messageCount: deepestConversation.longestThread,
avgPerThread: deepestConversation.avgMessagesPerThread,
} : null,
// Time investment
longestRelationship: longestRelationship ? {
contact: longestRelationship.name || longestRelationship.email,
spanDays: longestRelationship.spanDays,
firstContact: longestRelationship.firstContactDate,
} : null,
newestContact: newestContact ? {
contact: newestContact.name || newestContact.email,
firstContact: newestContact.firstContactDate,
daysSinceFirst: newestContact.spanDays,
} : null,
// Relationship quality
sustainedRelationships: contacts.filter((c) => c.spanDays > 30 && c.totalInteractions >= 3).length,
recentBursts: contacts.filter((c) => c.spanDays < 7 && c.totalInteractions >= 2).length,
},
contacts: contacts.slice(0, 50), // Top 50 contacts
labels: Object.entries(labelCounts)
.sort((a, b) => b[1] - a[1])
.map(([label, count]) => ({ label, count })),
volumeByDate: Object.entries(dateVolume)
.sort((a, b) => a[0].localeCompare(b[0]))
.map(([date, count]) => ({ date, count })),
recentThreads: inboxMessages
.slice(0, 20)
.map((m) => ({ id: m.threadId, subject: m.subject, from: m.from })),
// Discovery results for targeted profile extraction (with confidence tiers)
discovery: Object.fromEntries(
discoveryWithResults.map((r) => [
r.category,
{ tier: r.tier, query: r.query, count: r.count, emails: r.emails },
])
),
};
const profileJson = JSON.stringify(profileData, null, 2);
fs.writeFileSync(profileOutputPath, profileJson);
// Force flush to disk (important for large files)
let fd = fs.openSync(profileOutputPath, "r");
fs.fsyncSync(fd);
fs.closeSync(fd);
// Verify write succeeded
if (!fs.existsSync(profileOutputPath)) {
console.error(`✗ File write verification failed: ${profileOutputPath} not found after write`);
process.exit(1);
}
let stats = fs.statSync(profileOutputPath);
const expectedProfileBytes = Buffer.byteLength(profileJson, "utf8");
if (stats.size !== expectedProfileBytes) {
console.error(`✗ File size mismatch: expected ${expectedProfileBytes} bytes, got ${stats.size}`);
process.exit(1);
}
console.log(`\n✓ Profile data written to: ${profileOutputPath} (${(stats.size / 1024).toFixed(1)}KB)`);
// Write writing samples
const writingSamplesData = {
source: "gmail",
analyzedAt: new Date().toISOString(),
context: {
timePeriod: "180d",
sampleCount: writingSamples.length,
minLength: 50,
},
samples: writingSamples,
};
const samplesJson = JSON.stringify(writingSamplesData, null, 2);
fs.writeFileSync(writingSamplesOutputPath, samplesJson);
// Force flush to disk (important for large files)
fd = fs.openSync(writingSamplesOutputPath, "r");
fs.fsyncSync(fd);
fs.closeSync(fd);
// Verify write succeeded
if (!fs.existsSync(writingSamplesOutputPath)) {
console.error(`✗ File write verification failed: ${writingSamplesOutputPath} not found after write`);
process.exit(1);
}
stats = fs.statSync(writingSamplesOutputPath);
const expectedSamplesBytes = Buffer.byteLength(samplesJson, "utf8");
if (stats.size !== expectedSamplesBytes) {
console.error(`✗ File size mismatch: expected ${expectedSamplesBytes} bytes, got ${stats.size}`);
process.exit(1);
}
console.log(`✓ Writing samples written to: ${writingSamplesOutputPath} (${(stats.size / 1024).toFixed(1)}KB)`);
console.log(` ${writingSamples.length} sent emails with analyzable content`);
// Summary
console.log(`\n✓ Gmail data collection complete`);
console.log(` Period: ${profileData.period}`);
console.log(` Inbox: ${inboxMessages.length} messages`);
console.log(` Sent: ${sentDetails.length} messages`);
console.log(` Contacts: ${contacts.length} (${profileData.summary.bidirectionalContacts} bidirectional)`);
console.log(` Discovery: ${discoveryWithResults.length} categories with matches`);
if (contacts.length > 0) {
console.log(`\n Top contacts:`);
contacts.slice(0, 5).forEach((c) => {
const direction =
c.bidirectional ? "↔" : c.receivedFrom > 0 ? "←" : "→";
console.log(
` ${direction} ${c.name || c.email}: ${c.totalInteractions} emails`
);
});
}
console.log(
JSON.stringify({
success: true,
profileOutputPath,
writingSamplesOutputPath,
inboxCount: inboxMessages.length,
sentCount: sentDetails.length,
contactCount: contacts.length,
writingSampleCount: writingSamples.length,
discoveryCategories: discoveryWithResults.length,
discoveryHits: Object.keys(profileData.discovery),
})
);
} catch (error) {
console.error("Failed:", error.message);
throw error;
}