code icon Code

Connect to Gmail

Collect inbox emails (90 days), sent emails (6 months), and run targeted discovery searches for profile analysis

Source Code

import fs from "fs";
import path from "path";

const [profileOutputPath, writingSamplesOutputPath] = process.argv.slice(2);

if (!profileOutputPath || !writingSamplesOutputPath) {
  console.error("Usage: gmail.inbox.connect <profileOutputPath> <writingSamplesOutputPath>");
  console.error("Both output paths are required.");
  process.exit(1);
}

const INBOX_MAX_MESSAGES = 75;
const SENT_MAX_MESSAGES = 50;
const DISCOVERY_MAX_PER_QUERY = 15;
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const formatDate = (d) =>
  d.toLocaleDateString("en-US", { month: "short", day: "numeric" });

/**
 * Format date as relative time ago
 */
function formatTimeAgo(date) {
  if (!date || isNaN(date.getTime())) return "unknown";
  const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
  if (seconds < 60) return "just now";
  const minutes = Math.floor(seconds / 60);
  if (minutes < 60) return `${minutes}m ago`;
  const hours = Math.floor(minutes / 60);
  if (hours < 24) return `${hours}h ago`;
  const days = Math.floor(hours / 24);
  if (days < 7) return `${days}d ago`;
  if (days < 30) return `${Math.floor(days / 7)}w ago`;
  if (days < 365) return `${Math.floor(days / 30)}mo ago`;
  const years = Math.floor(days / 365);
  return `${years}y ago`;
}

// Targeted discovery searches organized by confidence tier
// Tier 1: High-confidence personal facts (user wrote about themselves or verification emails)
// Tier 2: Tool usage signals (high signal for understanding user's workflow)
// Tier 3: Professional interests (conferences, newsletters, certifications)
// Tier 4: Infrastructure signals (for tech users)
const DISCOVERY_QUERIES = [
  // === TIER 1: High-confidence personal facts ===
  // User writes about themselves in sent mail
  { tier: 1, category: "children", query: 'in:sent ("my son" OR "my daughter" OR "my kids" OR "the kids")' },
  { tier: 1, category: "partner", query: 'in:sent ("my husband" OR "my wife" OR "my partner" OR "my boyfriend" OR "my girlfriend") -"business partner" -"design partner" -"co-founder"' },
  { tier: 1, category: "pets", query: 'in:sent ("my dog" OR "my cat" OR "our dog" OR "our cat" OR "our pet")' },
  { tier: 1, category: "phone_numbers", query: 'in:sent ("my number is" OR "my cell is" OR "call me at" OR "text me at" OR "reach me at")' },
  { tier: 1, category: "birthday", query: 'to:me subject:"happy birthday"' },
  // Location - require actual shipping services, not promos
  { tier: 1, category: "location", query: 'from:(amazon OR fedex OR ups OR usps OR dhl) ("delivered to" OR "shipping address" OR "shipped to")' },
  // Communication - verification emails prove account ownership
  { tier: 1, category: "whatsapp", query: 'from:whatsapp ("verification code" OR "your code" OR "new device")' },
  { tier: 1, category: "signal_app", query: 'from:signal ("verification" OR "code")' },
  { tier: 1, category: "telegram", query: 'from:telegram ("code" OR "verification")' },
  { tier: 1, category: "slack", query: 'from:slack ("verification code" OR "sign in code" OR "confirmation code")' },

  // === TIER 2: Tool usage (HIGH SIGNAL for workflow understanding) ===
  // SaaS trials show active tool exploration
  { tier: 2, category: "saas_trials", query: 'subject:("trial started" OR "trial ending" OR "days left" OR "trial expires")' },
  // Receipts show what user actually pays for
  { tier: 2, category: "receipts", query: 'from:(stripe OR paypal) subject:("receipt" OR "payment" OR "invoice")' },
  // Project management tools show active usage
  { tier: 2, category: "project_mgmt", query: 'from:(linear OR jira OR asana OR notion OR monday) subject:("assigned" OR "mentioned" OR "commented")' },
  // Password manager shows security posture
  { tier: 2, category: "password_mgr", query: 'from:(1password OR lastpass OR bitwarden OR dashlane) subject:("security" OR "new device" OR "sign in")' },
  // Video platform preference from user's sent calendly/scheduling links
  { tier: 2, category: "video_platform", query: 'in:sent ("calendly.com" OR "cal.com") ("zoom" OR "meet.google" OR "teams")' },
  // Subscriptions - existing query, good signal
  { tier: 2, category: "subscriptions", query: 'subject:("subscription confirmed" OR "you subscribed" OR "thanks for subscribing" OR "welcome to")' },

  // === TIER 3: Professional interests ===
  // Conference registrations show professional interests
  { tier: 3, category: "conferences", query: 'subject:("ticket confirmation" OR "registration confirmed" OR "event registration") -meeting -calendar' },
  // Newsletter subscriptions show content interests
  { tier: 3, category: "newsletters", query: 'from:substack OR subject:("you subscribed" "newsletter")' },
  // Certifications show skills and learning focus
  { tier: 3, category: "certifications", query: 'subject:("certificate" OR "certification" OR "completed course" OR "course complete")' },
  // Social media - account existence signals
  { tier: 3, category: "instagram", query: "from:instagram" },
  { tier: 3, category: "linkedin", query: "from:linkedin" },
  { tier: 3, category: "twitter", query: "from:twitter OR from:x.com" },
  { tier: 3, category: "github", query: "from:github" },

  // === TIER 4: Infrastructure signals (for tech users) ===
  // Hosting/deployment shows side projects
  { tier: 4, category: "hosting", query: 'from:(vercel OR netlify OR heroku OR railway OR render) subject:("deployed" OR "deployment" OR "build")' },
  // Domain ownership shows side projects/businesses
  { tier: 4, category: "domains", query: 'from:(namecheap OR godaddy OR cloudflare OR porkbun) subject:("renewal" OR "registered" OR "expiring")' },
  // Cloud storage patterns
  { tier: 4, category: "cloud_storage", query: 'in:sent ("dropbox.com/s/" OR "drive.google.com/file" OR "notion.so/")' },

  // === TIER 5: Lifestyle signals ===
  // Commerce
  { tier: 5, category: "amazon", query: 'from:amazon "your order"' },
  // Financial (institution names only, never account numbers)
  { tier: 5, category: "banking", query: '(from:chase OR from:wellsfargo OR from:bankofamerica OR from:citi OR from:capitalone) subject:statement' },
  { tier: 5, category: "investments", query: "from:fidelity OR from:schwab OR from:vanguard OR from:robinhood" },
  // Travel
  { tier: 5, category: "travel", query: 'subject:("booking confirmed" OR "reservation confirmed" OR "flight confirmation" OR "itinerary") (from:airline OR from:hotel OR from:airbnb OR from:expedia OR from:booking)' },
  // Health
  { tier: 5, category: "health", query: 'subject:("appointment confirmed" OR "appointment reminder" OR "your visit") (from:doctor OR from:health OR from:medical OR from:pharmacy)' },
  // Entertainment
  { tier: 5, category: "spotify", query: "from:spotify" },
  { tier: 5, category: "netflix", query: "from:netflix" },
  { tier: 5, category: "discord", query: 'from:discord subject:("joined" OR "invited")' },
];

console.log("Collecting Gmail data (inbox: 90 days, sent: 6 months)...");
console.log(`  Inbox: up to ${INBOX_MAX_MESSAGES} messages`);
console.log(`  Sent: up to ${SENT_MAX_MESSAGES} messages`);

/**
 * Fetch message IDs matching a query
 */
async function fetchMessageIds(query, maxResults) {
  const ids = [];
  let pageToken = null;

  while (ids.length < maxResults) {
    const remaining = maxResults - ids.length;
    const pageSize = Math.min(remaining, 100);

    const url = new URL(
      "https://gmail.googleapis.com/gmail/v1/users/me/messages"
    );
    url.searchParams.set("maxResults", pageSize.toString());
    if (query) url.searchParams.set("q", query);
    if (pageToken) url.searchParams.set("pageToken", pageToken);

    const res = await fetch(url.toString(), {
      headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
    });

    const text = await res.text();
    if (!res.ok) {
      throw new Error(`Gmail API failed: ${res.status} - ${text}`);
    }

    let data;
    try {
      data = JSON.parse(text);
    } catch (e) {
      throw new Error(`Gmail API returned invalid JSON: ${text.slice(0, 200)}`);
    }
    if (!data.messages || data.messages.length === 0) break;

    ids.push(...data.messages.map((m) => m.id).slice(0, remaining));
    pageToken = data.nextPageToken;
    if (!pageToken) break;
  }

  return ids;
}

/**
 * Fetch message details with parallel requests
 */
async function fetchMessages(messageIds, format = "metadata") {
  const CONCURRENCY = 25;
  const results = [];

  for (let i = 0; i < messageIds.length; i += CONCURRENCY) {
    const batch = messageIds.slice(i, i + CONCURRENCY);
    const fetched = await Promise.all(
      batch.map(async (id) => {
        let url = `https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=${format}`;
        if (format === "metadata") {
          url +=
            "&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=To&metadataHeaders=Date&metadataHeaders=Cc";
        }
        const res = await fetch(url, {
          headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
        });
        if (!res.ok) return null;
        try {
          return await res.json();
        } catch {
          return null; // Invalid JSON, skip this message
        }
      })
    );
    results.push(...fetched.filter(Boolean));

    console.log(
      `  Fetched ${Math.min(i + CONCURRENCY, messageIds.length)}/${
        messageIds.length
      }...`
    );
  }

  return results;
}

/**
 * Extract email address from header
 */
function extractEmail(header) {
  if (!header) return "unknown";
  const match = header.match(/<([^>]+)>/);
  return match ? match[1].toLowerCase() : header.toLowerCase().trim();
}

/**
 * Extract name from header
 */
function extractName(header) {
  if (!header) return "Unknown";
  const match = header.match(/^([^<]+)</);
  if (match) return match[1].trim().replace(/"/g, "");
  return header.split("@")[0];
}

/**
 * Get header value from message
 */
function getHeader(msg, name) {
  const header = msg.payload?.headers?.find(
    (h) => h.name.toLowerCase() === name.toLowerCase()
  );
  return header ? header.value : "";
}

/**
 * Extract plain text body from Gmail message payload
 */
function extractBodyText(payload) {
  if (!payload) return "";

  if (payload.body?.data) {
    try {
      return Buffer.from(payload.body.data, "base64").toString("utf-8");
    } catch {
      return "";
    }
  }

  if (payload.parts) {
    for (const part of payload.parts) {
      if (part.mimeType === "text/plain" && part.body?.data) {
        try {
          return Buffer.from(part.body.data, "base64").toString("utf-8");
        } catch {
          continue;
        }
      }
      if (part.parts) {
        for (const nested of part.parts) {
          if (nested.mimeType === "text/plain" && nested.body?.data) {
            try {
              return Buffer.from(nested.body.data, "base64").toString("utf-8");
            } catch {
              continue;
            }
          }
        }
      }
    }
  }

  return "";
}

try {
  // Fetch inbox and sent message IDs in parallel
  console.log("\nPhase 1: Listing message IDs...");
  const [inboxIds, sentIds] = await Promise.all([
    fetchMessageIds("newer_than:90d -in:sent -category:promotions -category:updates -category:social -category:forums", INBOX_MAX_MESSAGES),
    fetchMessageIds("in:sent newer_than:180d", SENT_MAX_MESSAGES), // 6 months for writing samples
  ]);

  console.log(`  Inbox: ${inboxIds.length} messages`);
  console.log(`  Sent: ${sentIds.length} messages`);

  if (inboxIds.length === 0 && sentIds.length === 0) {
    console.error("\n✗ No messages found in the last 90 days.");
    console.log(
      JSON.stringify({
        success: false,
        error: "no_messages_found",
      })
    );
    process.exit(1);
  }

  // Fetch message details in parallel
  console.log("\nPhase 2: Fetching message details...");
  const [inboxDetails, sentDetails] = await Promise.all([
    fetchMessages(inboxIds, "metadata"),
    fetchMessages(sentIds, "full"), // Full for writing samples
  ]);

  console.log(`  Inbox: ${inboxDetails.length} fetched`);
  console.log(`  Sent: ${sentDetails.length} fetched`);

  // Phase 3: Discovery searches
  console.log("\nPhase 3: Running discovery searches...");
  const discoveryResults = await Promise.all(
    DISCOVERY_QUERIES.map(async ({ tier, category, query }) => {
      try {
        const ids = await fetchMessageIds(query, DISCOVERY_MAX_PER_QUERY);
        if (ids.length === 0) {
          return { tier, category, query, count: 0, emails: [] };
        }
        const messages = await fetchMessages(ids, "metadata");
        return {
          tier,
          category,
          query,
          count: messages.length,
          emails: messages.map((m) => {
            const dateStr = getHeader(m, "Date");
            return {
              id: m.id,
              threadId: m.threadId,
              from: getHeader(m, "From"),
              to: getHeader(m, "To"),
              subject: getHeader(m, "Subject"),
              snippet: m.snippet,
              timeAgo: formatTimeAgo(new Date(dateStr)),
            };
          }),
        };
      } catch (err) {
        console.log(`  Warning: ${category} search failed: ${err.message}`);
        return { tier, category, query, count: 0, emails: [], error: err.message };
      }
    })
  );

  const discoveryWithResults = discoveryResults.filter((r) => r.count > 0);
  console.log(
    `  Discovery: ${discoveryWithResults.length}/${DISCOVERY_QUERIES.length} categories found`
  );
  for (const r of discoveryWithResults) {
    console.log(`    - ${r.category}: ${r.count} emails`);
  }

  // Process inbox messages - track enhanced contact data
  const inboxMessages = [];
  const contactsReceived = new Map(); // email -> { name, count, dates, threadIds }
  const contactsSent = new Map(); // email -> { name, count, dates, threadIds }
  const labelCounts = {};
  const dateVolume = {};

  for (const msg of inboxDetails) {
    const from = getHeader(msg, "From");
    const to = getHeader(msg, "To");
    const date = getHeader(msg, "Date");
    const subject = getHeader(msg, "Subject");

    const senderEmail = extractEmail(from);
    const senderName = extractName(from);
    const msgDate = date ? new Date(date) : null;

    // Track contacts who email the user with enhanced data
    if (!contactsReceived.has(senderEmail)) {
      contactsReceived.set(senderEmail, {
        name: senderName,
        count: 0,
        dates: [],
        threadIds: new Set(),
      });
    }
    const receivedContact = contactsReceived.get(senderEmail);
    receivedContact.count++;
    if (msgDate && !isNaN(msgDate.getTime())) {
      receivedContact.dates.push(msgDate);
    }
    if (msg.threadId) {
      receivedContact.threadIds.add(msg.threadId);
    }

    // Track labels
    for (const label of msg.labelIds || []) {
      labelCounts[label] = (labelCounts[label] || 0) + 1;
    }

    // Track volume by date
    if (date) {
      const d = new Date(date);
      if (!isNaN(d.getTime())) {
        const dateKey = d.toISOString().split("T")[0];
        dateVolume[dateKey] = (dateVolume[dateKey] || 0) + 1;
      }
    }

    inboxMessages.push({
      id: msg.id,
      threadId: msg.threadId,
      from: from,
      to: to,
      subject: subject,
      date: date,
      snippet: msg.snippet,
      labelIds: msg.labelIds || [],
    });
  }

  // Process sent messages for contacts and writing samples
  const writingSamples = [];

  for (const msg of sentDetails) {
    const to = getHeader(msg, "To");
    const cc = getHeader(msg, "Cc");
    const date = getHeader(msg, "Date");
    const subject = getHeader(msg, "Subject");
    const msgDate = date ? new Date(date) : null;

    // Track contacts the user emails with enhanced data
    const recipients = [to, cc]
      .filter(Boolean)
      .join(",")
      .split(",")
      .map((r) => r.trim())
      .filter(Boolean);

    for (const recipient of recipients) {
      const email = extractEmail(recipient);
      const name = extractName(recipient);
      if (!contactsSent.has(email)) {
        contactsSent.set(email, {
          name: name,
          count: 0,
          dates: [],
          threadIds: new Set(),
        });
      }
      const sentContact = contactsSent.get(email);
      sentContact.count++;
      if (msgDate && !isNaN(msgDate.getTime())) {
        sentContact.dates.push(msgDate);
      }
      if (msg.threadId) {
        sentContact.threadIds.add(msg.threadId);
      }
    }

    // Extract body for writing samples
    const bodyText = extractBodyText(msg.payload);
    if (bodyText && bodyText.trim().length >= 50) {
      writingSamples.push({
        text: bodyText,
        metadata: {
          id: msg.id,
          date: date || new Date().toISOString(),
          subject: subject || "",
        },
      });
    }
  }

  // Build contacts with bidirectional signals and enhanced metrics
  const allContacts = new Map();

  for (const [email, data] of contactsReceived) {
    if (!allContacts.has(email)) {
      allContacts.set(email, {
        email,
        name: data.name,
        receivedFrom: 0,
        sentTo: 0,
        dates: [],
        threadIds: new Set(),
      });
    }
    const contact = allContacts.get(email);
    contact.receivedFrom = data.count;
    contact.dates.push(...data.dates);
    data.threadIds.forEach((t) => contact.threadIds.add(t));
  }

  for (const [email, data] of contactsSent) {
    if (!allContacts.has(email)) {
      allContacts.set(email, {
        email,
        name: data.name,
        receivedFrom: 0,
        sentTo: 0,
        dates: [],
        threadIds: new Set(),
      });
    }
    const contact = allContacts.get(email);
    contact.sentTo = data.count;
    contact.dates.push(...data.dates);
    data.threadIds.forEach((t) => contact.threadIds.add(t));
    // Prefer name from sent (more likely to be accurate)
    if (data.name && data.name !== "Unknown") {
      contact.name = data.name;
    }
  }

  /**
   * Enhanced contact scoring with time span, recency, and conversation depth
   */
  const scoreContact = (contact) => {
    const now = Date.now();
    let score = 0;
    const breakdown = {
      volume: 0,
      bidirectional: 0,
      timeSpan: 0,
      recency: 0,
      depth: 0,
    };

    // 1. Volume score (max 30 points)
    breakdown.volume = Math.min(30, contact.totalInteractions * 3);
    score += breakdown.volume;

    // 2. Bidirectional bonus (20 points)
    if (contact.bidirectional) {
      breakdown.bidirectional = 20;
      score += 20;
    }

    // 3. Time span factor (+15 sustained, -15 burst)
    if (contact.spanDays > 30 && contact.totalInteractions >= 3) {
      breakdown.timeSpan = 15; // Sustained relationship
      score += 15;
    } else if (contact.spanDays < 7 && contact.totalInteractions <= 4) {
      breakdown.timeSpan = -15; // Recent burst - likely one-off
      score -= 15;
    }

    // 4. Recency decay (max 20 points)
    breakdown.recency = Math.max(0, 20 - Math.floor(contact.daysSinceLastContact / 5));
    score += breakdown.recency;

    // 5. Conversation depth (max 15 points)
    if (contact.avgMessagesPerThread > 3) {
      breakdown.depth += 10;
      score += 10;
    }
    if (contact.longestThread > 5) {
      breakdown.depth += 5;
      score += 5;
    }

    // Penalize obvious noise addresses
    const email = contact.email.toLowerCase();
    const noisePatterns = [
      "noreply", "no-reply", "notifications", "mailer-daemon",
      "postmaster", "donotreply", "automated", "newsletter"
    ];
    if (noisePatterns.some((p) => email.includes(p))) {
      score -= 100;
    }

    // Determine confidence tier
    let confidence = "low";
    if (contact.totalInteractions >= 6 && contact.spanDays > 14) {
      confidence = "high";
    } else if (contact.totalInteractions >= 3) {
      confidence = "medium";
    }

    return { score, confidence, breakdown };
  };

  // Build contacts with enhanced metrics and scores
  const contacts = [...allContacts.values()]
    .map((c) => {
      const totalInteractions = c.receivedFrom + c.sentTo;
      const bidirectional = c.receivedFrom > 0 && c.sentTo > 0;

      // Calculate time-based metrics
      const sortedDates = c.dates.sort((a, b) => a.getTime() - b.getTime());
      const firstContactDate = sortedDates.length > 0 ? sortedDates[0] : null;
      const lastContactDate = sortedDates.length > 0 ? sortedDates[sortedDates.length - 1] : null;

      const spanDays = firstContactDate && lastContactDate
        ? Math.floor((lastContactDate.getTime() - firstContactDate.getTime()) / (1000 * 60 * 60 * 24))
        : 0;
      const daysSinceLastContact = lastContactDate
        ? Math.floor((Date.now() - lastContactDate.getTime()) / (1000 * 60 * 60 * 24))
        : 999;

      // Calculate thread depth metrics
      const uniqueThreads = c.threadIds.size;
      const avgMessagesPerThread = uniqueThreads > 0 ? totalInteractions / uniqueThreads : totalInteractions;

      // Count messages per thread to find longest
      const threadCounts = new Map();
      // Note: We track thread IDs but not per-message counts, so use avgMessagesPerThread as approximation
      const longestThread = Math.ceil(avgMessagesPerThread);

      return {
        email: c.email,
        name: c.name,
        receivedFrom: c.receivedFrom,
        sentTo: c.sentTo,
        totalInteractions,
        bidirectional,
        // Time data
        firstContactDate: firstContactDate ? firstContactDate.toISOString() : null,
        lastContactDate: lastContactDate ? lastContactDate.toISOString() : null,
        spanDays,
        daysSinceLastContact,
        // Thread depth
        uniqueThreads,
        avgMessagesPerThread: Math.round(avgMessagesPerThread * 10) / 10,
        longestThread,
      };
    })
    .map((c) => {
      const { score, confidence, breakdown } = scoreContact(c);
      return { ...c, signalScore: score, confidence, breakdown };
    })
    .filter((c) => c.signalScore >= 0) // Remove obvious noise
    .sort((a, b) => b.signalScore - a.signalScore || b.totalInteractions - a.totalInteractions);

  // Compute date range
  const dates = Object.keys(dateVolume).sort();
  const dateRange =
    dates.length > 0
      ? { oldest: dates[0], newest: dates[dates.length - 1] }
      : null;

  // Ensure output directories exist
  const profileDir = path.dirname(profileOutputPath);
  const samplesDir = path.dirname(writingSamplesOutputPath);
  if (profileDir && profileDir !== ".") fs.mkdirSync(profileDir, { recursive: true });
  if (samplesDir && samplesDir !== ".") fs.mkdirSync(samplesDir, { recursive: true });

  // Compute quantitative patterns for wow moments
  const highConfidenceContacts = contacts.filter((c) => c.confidence === "high");
  const multiTurnContacts = contacts.filter((c) => c.avgMessagesPerThread > 3);
  const singleExchangeContacts = contacts.filter((c) => c.uniqueThreads === 1 && c.totalInteractions <= 2);
  const longestRelationship = contacts.reduce(
    (max, c) => (c.spanDays > (max?.spanDays || 0) ? c : max),
    null
  );
  const newestContact = contacts.reduce(
    (newest, c) => {
      if (!c.firstContactDate) return newest;
      const cDate = new Date(c.firstContactDate);
      const newestDate = newest?.firstContactDate ? new Date(newest.firstContactDate) : new Date(0);
      return cDate > newestDate ? c : newest;
    },
    null
  );
  const deepestConversation = contacts.reduce(
    (max, c) => (c.longestThread > (max?.longestThread || 0) ? c : max),
    null
  );

  // Write profile data
  const profileData = {
    period: `${formatDate(ninetyDaysAgo)} - ${formatDate(new Date())}`,
    summary: {
      inboxMessages: inboxMessages.length,
      sentMessages: sentDetails.length,
      uniqueContacts: contacts.length,
      bidirectionalContacts: contacts.filter((c) => c.bidirectional).length,
      highConfidenceContacts: highConfidenceContacts.length,
      discoveryCategories: discoveryWithResults.length,
    },
    // Quantitative patterns for wow moments
    patterns: {
      // Conversation depth
      multiTurnContacts: multiTurnContacts.length,
      singleExchangeContacts: singleExchangeContacts.length,
      deepestConversation: deepestConversation ? {
        contact: deepestConversation.name || deepestConversation.email,
        messageCount: deepestConversation.longestThread,
        avgPerThread: deepestConversation.avgMessagesPerThread,
      } : null,
      // Time investment
      longestRelationship: longestRelationship ? {
        contact: longestRelationship.name || longestRelationship.email,
        spanDays: longestRelationship.spanDays,
        firstContact: longestRelationship.firstContactDate,
      } : null,
      newestContact: newestContact ? {
        contact: newestContact.name || newestContact.email,
        firstContact: newestContact.firstContactDate,
        daysSinceFirst: newestContact.spanDays,
      } : null,
      // Relationship quality
      sustainedRelationships: contacts.filter((c) => c.spanDays > 30 && c.totalInteractions >= 3).length,
      recentBursts: contacts.filter((c) => c.spanDays < 7 && c.totalInteractions >= 2).length,
    },
    contacts: contacts.slice(0, 50), // Top 50 contacts
    labels: Object.entries(labelCounts)
      .sort((a, b) => b[1] - a[1])
      .map(([label, count]) => ({ label, count })),
    volumeByDate: Object.entries(dateVolume)
      .sort((a, b) => a[0].localeCompare(b[0]))
      .map(([date, count]) => ({ date, count })),
    recentThreads: inboxMessages
      .slice(0, 20)
      .map((m) => ({ id: m.threadId, subject: m.subject, from: m.from })),
    // Discovery results for targeted profile extraction (with confidence tiers)
    discovery: Object.fromEntries(
      discoveryWithResults.map((r) => [
        r.category,
        { tier: r.tier, query: r.query, count: r.count, emails: r.emails },
      ])
    ),
  };

  const profileJson = JSON.stringify(profileData, null, 2);
  fs.writeFileSync(profileOutputPath, profileJson);

  // Force flush to disk (important for large files)
  let fd = fs.openSync(profileOutputPath, "r");
  fs.fsyncSync(fd);
  fs.closeSync(fd);

  // Verify write succeeded
  if (!fs.existsSync(profileOutputPath)) {
    console.error(`✗ File write verification failed: ${profileOutputPath} not found after write`);
    process.exit(1);
  }
  let stats = fs.statSync(profileOutputPath);
  const expectedProfileBytes = Buffer.byteLength(profileJson, "utf8");
  if (stats.size !== expectedProfileBytes) {
    console.error(`✗ File size mismatch: expected ${expectedProfileBytes} bytes, got ${stats.size}`);
    process.exit(1);
  }
  console.log(`\n✓ Profile data written to: ${profileOutputPath} (${(stats.size / 1024).toFixed(1)}KB)`);

  // Write writing samples
  const writingSamplesData = {
    source: "gmail",
    analyzedAt: new Date().toISOString(),
    context: {
      timePeriod: "180d",
      sampleCount: writingSamples.length,
      minLength: 50,
    },
    samples: writingSamples,
  };

  const samplesJson = JSON.stringify(writingSamplesData, null, 2);
  fs.writeFileSync(writingSamplesOutputPath, samplesJson);

  // Force flush to disk (important for large files)
  fd = fs.openSync(writingSamplesOutputPath, "r");
  fs.fsyncSync(fd);
  fs.closeSync(fd);

  // Verify write succeeded
  if (!fs.existsSync(writingSamplesOutputPath)) {
    console.error(`✗ File write verification failed: ${writingSamplesOutputPath} not found after write`);
    process.exit(1);
  }
  stats = fs.statSync(writingSamplesOutputPath);
  const expectedSamplesBytes = Buffer.byteLength(samplesJson, "utf8");
  if (stats.size !== expectedSamplesBytes) {
    console.error(`✗ File size mismatch: expected ${expectedSamplesBytes} bytes, got ${stats.size}`);
    process.exit(1);
  }
  console.log(`✓ Writing samples written to: ${writingSamplesOutputPath} (${(stats.size / 1024).toFixed(1)}KB)`);
  console.log(`  ${writingSamples.length} sent emails with analyzable content`);

  // Summary
  console.log(`\n✓ Gmail data collection complete`);
  console.log(`  Period: ${profileData.period}`);
  console.log(`  Inbox: ${inboxMessages.length} messages`);
  console.log(`  Sent: ${sentDetails.length} messages`);
  console.log(`  Contacts: ${contacts.length} (${profileData.summary.bidirectionalContacts} bidirectional)`);
  console.log(`  Discovery: ${discoveryWithResults.length} categories with matches`);

  if (contacts.length > 0) {
    console.log(`\n  Top contacts:`);
    contacts.slice(0, 5).forEach((c) => {
      const direction =
        c.bidirectional ? "↔" : c.receivedFrom > 0 ? "←" : "→";
      console.log(
        `    ${direction} ${c.name || c.email}: ${c.totalInteractions} emails`
      );
    });
  }

  console.log(
    JSON.stringify({
      success: true,
      profileOutputPath,
      writingSamplesOutputPath,
      inboxCount: inboxMessages.length,
      sentCount: sentDetails.length,
      contactCount: contacts.length,
      writingSampleCount: writingSamples.length,
      discoveryCategories: discoveryWithResults.length,
      discoveryHits: Object.keys(profileData.discovery),
    })
  );
} catch (error) {
  console.error("Failed:", error.message);
  throw error;
}