code icon Code

List Flight Email Candidates

Search Gmail for potential flight emails using multiple search passes, return IDs and snippets only (lightweight)

Source Code

import fs from "fs";
import path from "path";

const [yearsBack = "3", outputPath = "session/flight-candidates.json"] =
  process.argv.slice(2);
const yearsNum = parseInt(yearsBack) || 3;

// Calculate date for X years ago
const startDate = new Date();
startDate.setFullYear(startDate.getFullYear() - yearsNum);
const afterDate = startDate.toISOString().split("T")[0].replace(/-/g, "/");

// Search queries - multiple passes to catch more emails
const queries = [
  // Pass 1: Subject-based patterns (most reliable)
  {
    name: "subject-patterns",
    query: `(subject:"flight confirmation" OR subject:"your flight" OR subject:"flight details" OR subject:"boarding pass" OR subject:"e-ticket" OR subject:"flight itinerary" OR subject:"trip confirmation" OR subject:"booking confirmation" OR subject:"travel itinerary" OR subject:"flight reservation" OR subject:"your trip" OR subject:"your booking" OR subject:"check-in" OR subject:"ready to fly" OR subject:"itinerary receipt" OR subject:"e-receipt" OR subject:"travel confirmation") after:${afterDate}`,
  },
  // Pass 2: Body-based patterns (catches generic subjects)
  {
    name: "body-patterns",
    query: `("confirmation number" OR "booking reference" OR "passenger name" OR "departure terminal" OR "seat assignment" OR "boarding time" OR "flight number" OR "check-in opens") after:${afterDate}`,
  },
  // Pass 3: Airline-specific terms in body
  {
    name: "airline-terms",
    query: `("PNR" OR "record locator" OR "e-ticket number" OR "ticket number" OR "itinerary number") after:${afterDate}`,
  },
  // Pass 4: Major airline names
  {
    name: "airline-names",
    query: `(from:united OR from:delta OR from:american OR from:southwest OR from:jetblue OR from:british OR from:lufthansa OR from:airfrance OR from:emirates OR from:singapore OR from:ryanair OR from:easyjet OR from:virgin OR from:qantas OR from:cathay OR from:klm OR from:iberia OR from:finnair OR from:norwegian OR from:spirit OR from:frontier) after:${afterDate}`,
  },
  // Pass 5: Booking services and OTAs
  {
    name: "booking-services",
    query: `(from:expedia OR from:kayak OR from:booking OR from:skyscanner OR from:tripadvisor OR from:priceline OR from:orbitz OR from:travelocity OR from:cheapflights OR from:momondo OR from:google OR from:hopper OR from:kiwi) after:${afterDate}`,
  },
  // Pass 6: Common airport codes in body (major hubs)
  {
    name: "airport-codes",
    query: `("JFK" OR "LAX" OR "LHR" OR "CDG" OR "SFO" OR "ORD" OR "DXB" OR "SIN" OR "HND" OR "FRA" OR "AMS" OR "HKG" OR "DFW" OR "DEN" OR "SEA" OR "ATL" OR "BOS" OR "MIA" OR "EWR" OR "IAH") after:${afterDate}`,
  },
];

console.log(`Listing flight email candidates from the last ${yearsNum} years...`);
console.log(`Running ${queries.length} search passes for maximum coverage...\n`);

/**
 * Run a single query and return message IDs
 */
async function runQuery(queryObj, maxResults = 200) {
  const ids = new Set();
  let pageToken = null;

  console.log(`  [${queryObj.name}] Searching...`);

  while (ids.size < maxResults) {
    const remaining = maxResults - ids.size;
    const pageSize = Math.min(remaining, 100);

    const listUrl = new URL(
      "https://gmail.googleapis.com/gmail/v1/users/me/messages"
    );
    listUrl.searchParams.set("maxResults", pageSize.toString());
    listUrl.searchParams.set("q", queryObj.query);
    if (pageToken) listUrl.searchParams.set("pageToken", pageToken);

    const listRes = await fetch(listUrl.toString(), {
      headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
    });

    if (!listRes.ok) {
      console.log(`  [${queryObj.name}] API error: ${listRes.status}`);
      break;
    }

    const listData = await listRes.json();
    if (!listData.messages || listData.messages.length === 0) break;

    for (const msg of listData.messages) {
      ids.add(msg.id);
    }

    pageToken = listData.nextPageToken;
    if (!pageToken) break;
  }

  console.log(`  [${queryObj.name}] Found ${ids.size} messages`);
  return ids;
}

/**
 * Fetch metadata for a batch of message IDs
 */
async function fetchMetadata(messageIds) {
  const candidates = [];
  const CONCURRENCY = 25;
  const idArray = Array.from(messageIds);

  for (let i = 0; i < idArray.length; i += CONCURRENCY) {
    const batch = idArray.slice(i, i + CONCURRENCY);
    const fetched = await Promise.all(
      batch.map(async (id) => {
        const url =
          `https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=metadata` +
          "&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date";
        const res = await fetch(url, {
          headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
        });
        if (!res.ok) return null;
        return res.json();
      })
    );

    for (const msg of fetched.filter(Boolean)) {
      const getHeader = (name) => {
        const h = msg.payload?.headers?.find(
          (h) => h.name.toLowerCase() === name.toLowerCase()
        );
        return h ? h.value : "";
      };

      candidates.push({
        id: msg.id,
        threadId: msg.threadId,
        subject: getHeader("Subject"),
        from: getHeader("From"),
        date: getHeader("Date"),
        snippet: msg.snippet || "",
      });
    }

    if ((i + CONCURRENCY) % 100 === 0 || i + CONCURRENCY >= idArray.length) {
      console.log(
        `  Fetched metadata: ${Math.min(i + CONCURRENCY, idArray.length)}/${idArray.length}`
      );
    }
  }

  return candidates;
}

try {
  // Run all queries in parallel and collect unique IDs
  const allIds = new Set();

  const results = await Promise.all(queries.map((q) => runQuery(q)));
  for (const ids of results) {
    for (const id of ids) {
      allIds.add(id);
    }
  }

  console.log(`\nTotal unique messages found: ${allIds.size}`);
  console.log("Fetching metadata...\n");

  // Fetch metadata for all unique IDs
  const candidates = await fetchMetadata(allIds);

  // Sort by date
  candidates.sort((a, b) => new Date(a.date) - new Date(b.date));

  // Write output
  const dir = path.dirname(outputPath);
  if (dir && dir !== ".") fs.mkdirSync(dir, { recursive: true });

  const output = {
    searchPasses: queries.map((q) => q.name),
    yearsBack: yearsNum,
    fetchedAt: new Date().toISOString(),
    count: candidates.length,
    candidates,
  };

  fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));

  console.log(`✓ Found ${candidates.length} candidate emails`);
  console.log(`  Written to: ${outputPath}`);
  console.log(`\n  Sample subjects:`);
  candidates.slice(0, 5).forEach((c) => {
    console.log(`    - ${c.subject.slice(0, 60)}`);
  });

  console.log(
    JSON.stringify({
      success: true,
      outputPath,
      count: candidates.length,
    })
  );
} catch (error) {
  console.error("Error listing emails:", error.message);
  throw error;
}