List Flight Email Candidates
Search Gmail for potential flight emails using multiple search passes, return IDs and snippets only (lightweight)
Source Code
import fs from "fs";
import path from "path";
const [yearsBack = "3", outputPath = "session/flight-candidates.json"] =
process.argv.slice(2);
const yearsNum = parseInt(yearsBack) || 3;
// Calculate date for X years ago
const startDate = new Date();
startDate.setFullYear(startDate.getFullYear() - yearsNum);
const afterDate = startDate.toISOString().split("T")[0].replace(/-/g, "/");
// Search queries - multiple passes to catch more emails
const queries = [
// Pass 1: Subject-based patterns (most reliable)
{
name: "subject-patterns",
query: `(subject:"flight confirmation" OR subject:"your flight" OR subject:"flight details" OR subject:"boarding pass" OR subject:"e-ticket" OR subject:"flight itinerary" OR subject:"trip confirmation" OR subject:"booking confirmation" OR subject:"travel itinerary" OR subject:"flight reservation" OR subject:"your trip" OR subject:"your booking" OR subject:"check-in" OR subject:"ready to fly" OR subject:"itinerary receipt" OR subject:"e-receipt" OR subject:"travel confirmation") after:${afterDate}`,
},
// Pass 2: Body-based patterns (catches generic subjects)
{
name: "body-patterns",
query: `("confirmation number" OR "booking reference" OR "passenger name" OR "departure terminal" OR "seat assignment" OR "boarding time" OR "flight number" OR "check-in opens") after:${afterDate}`,
},
// Pass 3: Airline-specific terms in body
{
name: "airline-terms",
query: `("PNR" OR "record locator" OR "e-ticket number" OR "ticket number" OR "itinerary number") after:${afterDate}`,
},
// Pass 4: Major airline names
{
name: "airline-names",
query: `(from:united OR from:delta OR from:american OR from:southwest OR from:jetblue OR from:british OR from:lufthansa OR from:airfrance OR from:emirates OR from:singapore OR from:ryanair OR from:easyjet OR from:virgin OR from:qantas OR from:cathay OR from:klm OR from:iberia OR from:finnair OR from:norwegian OR from:spirit OR from:frontier) after:${afterDate}`,
},
// Pass 5: Booking services and OTAs
{
name: "booking-services",
query: `(from:expedia OR from:kayak OR from:booking OR from:skyscanner OR from:tripadvisor OR from:priceline OR from:orbitz OR from:travelocity OR from:cheapflights OR from:momondo OR from:google OR from:hopper OR from:kiwi) after:${afterDate}`,
},
// Pass 6: Common airport codes in body (major hubs)
{
name: "airport-codes",
query: `("JFK" OR "LAX" OR "LHR" OR "CDG" OR "SFO" OR "ORD" OR "DXB" OR "SIN" OR "HND" OR "FRA" OR "AMS" OR "HKG" OR "DFW" OR "DEN" OR "SEA" OR "ATL" OR "BOS" OR "MIA" OR "EWR" OR "IAH") after:${afterDate}`,
},
];
console.log(`Listing flight email candidates from the last ${yearsNum} years...`);
console.log(`Running ${queries.length} search passes for maximum coverage...\n`);
/**
* Run a single query and return message IDs
*/
async function runQuery(queryObj, maxResults = 200) {
const ids = new Set();
let pageToken = null;
console.log(` [${queryObj.name}] Searching...`);
while (ids.size < maxResults) {
const remaining = maxResults - ids.size;
const pageSize = Math.min(remaining, 100);
const listUrl = new URL(
"https://gmail.googleapis.com/gmail/v1/users/me/messages"
);
listUrl.searchParams.set("maxResults", pageSize.toString());
listUrl.searchParams.set("q", queryObj.query);
if (pageToken) listUrl.searchParams.set("pageToken", pageToken);
const listRes = await fetch(listUrl.toString(), {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
if (!listRes.ok) {
console.log(` [${queryObj.name}] API error: ${listRes.status}`);
break;
}
const listData = await listRes.json();
if (!listData.messages || listData.messages.length === 0) break;
for (const msg of listData.messages) {
ids.add(msg.id);
}
pageToken = listData.nextPageToken;
if (!pageToken) break;
}
console.log(` [${queryObj.name}] Found ${ids.size} messages`);
return ids;
}
/**
* Fetch metadata for a batch of message IDs
*/
async function fetchMetadata(messageIds) {
const candidates = [];
const CONCURRENCY = 25;
const idArray = Array.from(messageIds);
for (let i = 0; i < idArray.length; i += CONCURRENCY) {
const batch = idArray.slice(i, i + CONCURRENCY);
const fetched = await Promise.all(
batch.map(async (id) => {
const url =
`https://gmail.googleapis.com/gmail/v1/users/me/messages/${id}?format=metadata` +
"&metadataHeaders=Subject&metadataHeaders=From&metadataHeaders=Date";
const res = await fetch(url, {
headers: { Authorization: "Bearer PLACEHOLDER_TOKEN" },
});
if (!res.ok) return null;
return res.json();
})
);
for (const msg of fetched.filter(Boolean)) {
const getHeader = (name) => {
const h = msg.payload?.headers?.find(
(h) => h.name.toLowerCase() === name.toLowerCase()
);
return h ? h.value : "";
};
candidates.push({
id: msg.id,
threadId: msg.threadId,
subject: getHeader("Subject"),
from: getHeader("From"),
date: getHeader("Date"),
snippet: msg.snippet || "",
});
}
if ((i + CONCURRENCY) % 100 === 0 || i + CONCURRENCY >= idArray.length) {
console.log(
` Fetched metadata: ${Math.min(i + CONCURRENCY, idArray.length)}/${idArray.length}`
);
}
}
return candidates;
}
try {
// Run all queries in parallel and collect unique IDs
const allIds = new Set();
const results = await Promise.all(queries.map((q) => runQuery(q)));
for (const ids of results) {
for (const id of ids) {
allIds.add(id);
}
}
console.log(`\nTotal unique messages found: ${allIds.size}`);
console.log("Fetching metadata...\n");
// Fetch metadata for all unique IDs
const candidates = await fetchMetadata(allIds);
// Sort by date
candidates.sort((a, b) => new Date(a.date) - new Date(b.date));
// Write output
const dir = path.dirname(outputPath);
if (dir && dir !== ".") fs.mkdirSync(dir, { recursive: true });
const output = {
searchPasses: queries.map((q) => q.name),
yearsBack: yearsNum,
fetchedAt: new Date().toISOString(),
count: candidates.length,
candidates,
};
fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));
console.log(`✓ Found ${candidates.length} candidate emails`);
console.log(` Written to: ${outputPath}`);
console.log(`\n Sample subjects:`);
candidates.slice(0, 5).forEach((c) => {
console.log(` - ${c.subject.slice(0, 60)}`);
});
console.log(
JSON.stringify({
success: true,
outputPath,
count: candidates.length,
})
);
} catch (error) {
console.error("Error listing emails:", error.message);
throw error;
}