code icon Code

Analyze Notion Workspace

Analyze Notion workspace activity from the last 90 days - documents, databases, collaborators, topics

Source Code

import fs from "fs";

const [
  analysisOutputPath = "session/notion-profile-analysis.json",
  writingSamplesOutputPath = "session/writing-samples.json",
] = process.argv.slice(2);

const NOTION_API = "https://api.notion.com/v1";
const headers = {
  Authorization: "Bearer PLACEHOLDER_TOKEN",
  "Content-Type": "application/json",
  "Notion-Version": "2022-06-28",
};

// 90 days ago as ISO string
const ninetyDaysAgo = new Date(Date.now() - 90 * 24 * 60 * 60 * 1000);
const startDate = ninetyDaysAgo;
const endDate = new Date();
const formatDate = (d) =>
  d.toLocaleDateString("en-US", { month: "short", day: "numeric" });

/**
 * Extract text from rich_text array
 */
function extractText(richText) {
  if (!richText || !Array.isArray(richText)) return "";
  return richText.map((t) => t.plain_text || "").join("");
}

/**
 * Extract text content from blocks recursively
 */
function extractTextFromBlocks(blocks) {
  const texts = [];
  for (const block of blocks) {
    if (block.text) {
      texts.push(block.text);
    }
    if (block.children) {
      texts.push(...extractTextFromBlocks(block.children));
    }
  }
  return texts.join("\n");
}

/**
 * Fetch blocks with children recursively (up to specified depth)
 */
async function fetchBlocksWithChildren(blockId, depth = 0, maxDepth = 2) {
  if (depth > maxDepth) return [];

  const blocksUrl = new URL(`${NOTION_API}/blocks/${blockId}/children`);
  blocksUrl.searchParams.set("page_size", "100");

  const blocksRes = await fetch(blocksUrl.toString(), {
    method: "GET",
    headers,
  });

  if (!blocksRes.ok) {
    const errorText = await blocksRes.text();
    throw new Error(`API ${blocksRes.status}: ${errorText.slice(0, 100)}`);
  }

  const blocksData = await blocksRes.json();
  const blocks = blocksData.results || [];

  // Fetch children for blocks that have them (parallel, limited concurrency)
  const blocksWithChildren = blocks.filter((b) => b.has_children);
  if (blocksWithChildren.length > 0 && depth < maxDepth) {
    const childPromises = blocksWithChildren.map(async (block) => {
      try {
        const children = await fetchBlocksWithChildren(block.id, depth + 1, maxDepth);
        block.children = children;
      } catch (e) {
        // Log but continue - partial content is better than none
        console.log(`  Warning: Could not fetch children for block ${block.id}: ${e.message}`);
        block.children = [];
      }
    });
    await Promise.all(childPromises);
  }

  return blocks;
}

console.log("Fetching Notion data (last 90 days)...");

try {
  // 1. Get user info - for bot integrations, the owner is the actual user
  console.log("Fetching workspace data...");

  const meRes = await fetch(`${NOTION_API}/users/me`, { headers });
  if (!meRes.ok) throw new Error(`Failed to get user: ${meRes.status}`);

  const meData = await meRes.json();

  // If it's a bot, get the owner user; otherwise use the direct user
  let userName, userId;
  if (meData.type === "bot" && meData.bot?.owner?.user) {
    userName = meData.bot.owner.user.name || "Unknown User";
    userId = meData.bot.owner.user.id;
  } else {
    userName = meData.name || "Unknown User";
    userId = meData.id;
  }

  console.log(`✓ Connected as ${userName}`);

  // 2. Collect ALL documents user writes to (database entries + standalone pages)
  console.log("Finding all documents you've written to...");

  const allDocuments = [];
  const collaborators = new Map(); // userId -> { name, count }
  const topics = new Map(); // topic -> { count, dates: [docDates] }
  const contentTexts = []; // For writing style analysis
  let totalPagesSeen = 0;
  let pagesInDateRange = 0;

  // Search for all pages edited in last 90 days
  // Results are sorted by last_edited_time descending, so we can break early
  let cursor = undefined;
  let reachedCutoff = false;
  const maxPages = 150; // Max qualifying pages to collect

  while (!reachedCutoff && allDocuments.length < maxPages) {
    const searchRes = await fetch(`${NOTION_API}/search`, {
      method: "POST",
      headers,
      body: JSON.stringify({
        filter: { property: "object", value: "page" },
        sort: { direction: "descending", timestamp: "last_edited_time" },
        page_size: 100,
        ...(cursor && { start_cursor: cursor }),
      }),
    });

    if (!searchRes.ok) break;
    const searchData = await searchRes.json();

    for (const page of searchData.results) {
      totalPagesSeen++;
      const editedAt = new Date(page.last_edited_time);

      // Since results are sorted by last_edited_time descending,
      // once we hit a page older than 90 days, all subsequent pages will be older too
      if (editedAt < ninetyDaysAgo) {
        reachedCutoff = true;
        break;
      }
      pagesInDateRange++;

      // Include pages the user created OR last edited
      // This captures the user's workspace activity
      const userCreated = page.created_by?.id === userId;
      const userLastEdited = page.last_edited_by?.id === userId;

      if (!userCreated && !userLastEdited) continue;

      // Track collaborators only on pages the user actually worked on
      // This shows who the user collaborates with, not everyone in the workspace
      if (page.created_by?.id && page.created_by.id !== userId) {
        const collabId = page.created_by.id;
        const existing = collaborators.get(collabId) || { count: 0 };
        existing.count++;
        collaborators.set(collabId, existing);
      }
      if (page.last_edited_by?.id && page.last_edited_by.id !== userId) {
        const collabId = page.last_edited_by.id;
        const existing = collaborators.get(collabId) || { count: 0 };
        existing.count++;
        collaborators.set(collabId, existing);
      }

      const titleProp = Object.values(page.properties || {}).find(
        (p) => p.type === "title"
      );
      const title = titleProp?.title?.[0]?.plain_text || "Untitled";

      // Extract icon (but don't use it as URL)
      const icon =
        page.icon?.type === "emoji"
          ? page.icon.emoji
          : page.icon?.external?.url || page.icon?.file?.url || null;

      const doc = {
        id: page.id,
        title,
        icon,
        url: page.url,
        type:
          page.parent?.type === "database_id"
            ? "database_entry"
            : "standalone_page",
        parentId: page.parent?.database_id || page.parent?.page_id || null,
        createdTime: page.created_time,
        lastEditedTime: page.last_edited_time,
        createdBy: page.created_by?.id,
        lastEditedBy: page.last_edited_by?.id,
        userCreated,
      };

      allDocuments.push(doc);
      if (allDocuments.length >= maxPages) break;
    }

    if (
      !searchData.has_more ||
      reachedCutoff ||
      allDocuments.length >= maxPages
    )
      break;
    cursor = searchData.next_cursor;
  }

  console.log(
    `  Scanned ${totalPagesSeen} pages (${pagesInDateRange} in last 90 days), found ${allDocuments.length} you created/edited`
  );

  // Fetch names for collaborators (parallel, limited to top 20 by count)
  // We fetch more than needed because some will be filtered as bots
  const topCollabIds = Array.from(collaborators.entries())
    .sort((a, b) => b[1].count - a[1].count)
    .slice(0, 20)
    .map(([id]) => id);

  if (topCollabIds.length > 0) {
    console.log(
      `  Fetching info for ${topCollabIds.length} potential collaborators...`
    );
    const userPromises = topCollabIds.map(async (collabId) => {
      try {
        const userRes = await fetch(`${NOTION_API}/users/${collabId}`, {
          headers,
        });
        if (userRes.ok) {
          const userData = await userRes.json();
          // Filter out bots - only return real people
          if (userData.type === "bot") {
            return { id: collabId, name: null, isBot: true };
          }
          return {
            id: collabId,
            name: userData.name || "Unknown",
            isBot: false,
          };
        }
      } catch (e) {
        console.log(`  Warning: Could not fetch user ${collabId}: ${e.message}`);
      }
      return { id: collabId, name: null, isBot: false };
    });

    const userResults = await Promise.all(userPromises);
    let botsFiltered = 0;
    let namesAssigned = 0;
    for (const { id, name, isBot } of userResults) {
      if (isBot) {
        // Remove bots from collaborators entirely
        collaborators.delete(id);
        botsFiltered++;
      } else if (name && collaborators.has(id)) {
        collaborators.get(id).name = name;
        namesAssigned++;
      }
    }
    console.log(`  Collaborator lookup: ${namesAssigned} names found, ${botsFiltered} bots filtered`);
  }

  // 3. Read content from sample documents for topic and style analysis (parallelized)
  console.log("Analyzing content from substantial documents...");

  // Analyze pages user created (standalone pages + database entries with substantial content)
  const createdPages = allDocuments.filter((d) => d.userCreated);

  // Prioritize standalone pages, but also include database entries
  const standaloneForAnalysis = createdPages.filter(
    (d) => d.type === "standalone_page"
  );
  const dbEntriesForAnalysis = createdPages.filter(
    (d) => d.type === "database_entry"
  );

  // Read more pages (up to 30 total) to have a good pool, then filter by length
  const sampleDocs = [
    ...standaloneForAnalysis.slice(0, 20),
    ...dbEntriesForAnalysis.slice(0, 10),
  ].slice(0, 30);

  /**
   * Extract text from blocks recursively, including children
   */
  function extractAllText(blocks) {
    const texts = [];
    for (const block of blocks) {
      const blockType = block.type;
      const content = block[blockType];
      if (content?.rich_text) {
        texts.push(extractText(content.rich_text));
      }
      // Recursively extract from children
      if (block.children && block.children.length > 0) {
        texts.push(...extractAllText(block.children));
      }
    }
    return texts;
  }

  // Read all content in parallel (with child block support)
  const contentPromises = sampleDocs.map(async (doc) => {
    try {
      // Fetch blocks with children (up to depth 2)
      const blocks = await fetchBlocksWithChildren(doc.id, 0, 2);
      const texts = extractAllText(blocks);

      if (texts.length > 0) {
        const totalLength = texts.join(" ").length;
        return {
          docId: doc.id,
          title: doc.title,
          texts,
          totalLength,
          blocks,
          docType: doc.type,
        };
      } else {
        // Log when a page has blocks but no extractable text
        if (blocks.length > 0) {
          console.log(`  Note: "${doc.title}" has ${blocks.length} blocks but no extractable text`);
        }
      }
    } catch (e) {
      console.log(`  Warning: Could not read blocks for "${doc.title}": ${e.message}`);
      return null;
    }
    return null;
  });

  const contentResults = await Promise.all(contentPromises);

  // Filter to substantial content (at least 500 characters) and sort by length
  // This filters out short database entries that are just metadata
  const substantialContent = contentResults
    .filter((result) => result && result.totalLength >= 500)
    .sort((a, b) => b.totalLength - a.totalLength)
    .slice(0, 15); // Take top 15 longest pages

  console.log(
    `  Read ${contentResults.filter((r) => r).length} pages, analyzing ${
      substantialContent.length
    } substantial documents`
  );

  // Show near-misses when no substantial content found (helps diagnose threshold issues)
  if (substantialContent.length === 0) {
    const nearMisses = contentResults
      .filter((r) => r && r.totalLength >= 100 && r.totalLength < 500)
      .sort((a, b) => b.totalLength - a.totalLength)
      .slice(0, 5);

    if (nearMisses.length > 0) {
      console.log(
        `  Near threshold (100-500 chars): ${nearMisses
          .map((r) => `"${r.title}" (${r.totalLength})`)
          .join(", ")}`
      );
    }

    // Also show any pages with content below 100 chars
    const tinyPages = contentResults
      .filter((r) => r && r.totalLength > 0 && r.totalLength < 100)
      .slice(0, 3);

    if (tinyPages.length > 0) {
      console.log(
        `  Very short pages (<100 chars): ${tinyPages
          .map((r) => `"${r.title}" (${r.totalLength})`)
          .join(", ")}`
      );
    }
  }

  // Separate standalone pages from database entries for reporting
  const substantialStandalone = substantialContent.filter(
    (r) => r.docType === "standalone_page"
  );
  const substantialDbEntries = substantialContent.filter(
    (r) => r.docType === "database_entry"
  );

  // Build doc lookup map for O(1) access
  const docById = new Map(allDocuments.map((d) => [d.id, d]));

  // Process results and extract topics with date tracking
  for (const result of substantialContent) {
    if (!result) continue;

    contentTexts.push({
      docId: result.docId,
      title: result.title,
      texts: result.texts,
    });

    // Get document date for topic recurrence tracking
    const doc = docById.get(result.docId);
    const docDate = doc?.lastEditedTime || doc?.createdTime || null;

    // Extract topics (simple keyword extraction)
    const allText = result.texts.join(" ").toLowerCase();
    const words = allText.match(/\b[a-z]{4,}\b/g) || [];
    const wordCounts = {};
    for (const word of words) {
      if (word.length > 4) {
        wordCounts[word] = (wordCounts[word] || 0) + 1;
      }
    }
    const topWords = Object.entries(wordCounts)
      .sort((a, b) => b[1] - a[1])
      .slice(0, 5)
      .map(([word, count]) => word);

    for (const topic of topWords) {
      const existing = topics.get(topic) || { count: 0, dates: [] };
      existing.count++;
      if (docDate) existing.dates.push(docDate);
      topics.set(topic, existing);
    }
  }

  console.log(
    `  Analyzed content from ${contentTexts.length} substantial documents`
  );

  // 4. Group documents by type and activity
  const databaseEntries = allDocuments.filter(
    (d) => d.type === "database_entry"
  );
  const standalonePages = allDocuments.filter(
    (d) => d.type === "standalone_page"
  );

  // Group database entries by parent database
  const databaseGroups = new Map();
  for (const entry of databaseEntries) {
    if (!entry.parentId) continue;
    const existing = databaseGroups.get(entry.parentId) || [];
    existing.push(entry);
    databaseGroups.set(entry.parentId, existing);
  }

  /**
   * Extract property schema from database properties object
   * Returns simplified schema with property names, types, and options for select/status
   */
  function extractPropertySchema(properties) {
    const schema = {};
    const dateProps = [];
    const selectProps = [];
    const statusProps = [];

    for (const [propName, propConfig] of Object.entries(properties || {})) {
      const prop = { type: propConfig.type };

      // Extract options for select/multi_select/status
      if (propConfig.type === "select" || propConfig.type === "multi_select") {
        const opts = propConfig[propConfig.type]?.options;
        if (opts) {
          prop.options = opts.map((o) => o.name);
        }
        if (propConfig.type === "select") selectProps.push(propName);
      } else if (propConfig.type === "status") {
        const opts = propConfig.status?.options;
        if (opts) {
          prop.options = opts.map((o) => o.name);
        }
        statusProps.push(propName);
      } else if (propConfig.type === "date") {
        dateProps.push(propName);
      } else if (propConfig.type === "number" && propConfig.number?.format) {
        prop.format = propConfig.number.format;
      }

      schema[propName] = prop;
    }

    return {
      properties: schema,
      dateProperties: dateProps,
      selectProperties: selectProps,
      statusProperties: statusProps,
    };
  }

  // Get database info for groups (parallelized)
  const databaseInfoPromises = Array.from(databaseGroups.entries()).map(
    async ([dbId, entries]) => {
      try {
        const dbRes = await fetch(`${NOTION_API}/databases/${dbId}`, {
          headers,
        });
        if (dbRes.ok) {
          const dbData = await dbRes.json();
          const dbTitle = dbData.title?.[0]?.plain_text || "Untitled Database";
          const dbIcon =
            dbData.icon?.type === "emoji"
              ? dbData.icon.emoji
              : dbData.icon?.external?.url || dbData.icon?.file?.url || null;

          // Extract property schema for view-critical property identification
          const schemaInfo = extractPropertySchema(dbData.properties);

          return {
            dbId,
            info: {
              title: dbTitle,
              icon: dbIcon,
              url: dbData.url,
              entryCount: entries.length,
              // Include property schema for calendar/board view support
              schema: schemaInfo,
            },
          };
        }
      } catch (e) {
        console.log(`  Warning: Could not fetch database ${dbId}: ${e.message}`);
      }
      return null;
    }
  );

  const databaseInfoResults = await Promise.all(databaseInfoPromises);
  const databaseInfo = new Map();
  for (const result of databaseInfoResults) {
    if (result) {
      databaseInfo.set(result.dbId, result.info);
    }
  }

  // 5. Save analysis for agent to interpret
  // Ensure output directories exist
  const analysisDir = analysisOutputPath.substring(
    0,
    analysisOutputPath.lastIndexOf("/")
  );
  const writingSamplesDir = writingSamplesOutputPath.substring(
    0,
    writingSamplesOutputPath.lastIndexOf("/")
  );
  if (analysisDir) fs.mkdirSync(analysisDir, { recursive: true });
  if (writingSamplesDir) fs.mkdirSync(writingSamplesDir, { recursive: true });

  // Write writing samples for style analysis (with source field for shared task)
  const writingSamples = {
    source: "notion",
    samples: substantialContent.map((result) => {
      const doc = docById.get(result.docId);
      return {
        id: result.docId,
        title: result.title,
        type: result.docType,
        text: result.texts.join("\n\n"),
        wordCount: result.texts.join(" ").split(/\s+/).length,
        characterCount: result.totalLength,
        createdBy: doc?.createdBy || null,
        lastEditedBy: doc?.lastEditedBy || null,
        userCreated: doc?.userCreated || false,
      };
    }),
  };
  fs.writeFileSync(
    writingSamplesOutputPath,
    JSON.stringify(writingSamples, null, 2)
  );
  console.log(
    `  Wrote ${writingSamples.samples.length} writing samples to ${writingSamplesOutputPath}`
  );

  const topCollaborators = Array.from(collaborators.entries())
    .map(([id, data]) => ({ id, ...data }))
    .sort((a, b) => b.count - a.count)
    .slice(0, 10);

  const topTopics = Array.from(topics.entries())
    .map(([topic, data]) => {
      const sortedDates = data.dates.sort();
      const earliest = sortedDates[0] || null;
      const latest = sortedDates[sortedDates.length - 1] || null;
      // Calculate date spread in days
      const spreadDays =
        earliest && latest
          ? Math.round(
              (new Date(latest) - new Date(earliest)) / (1000 * 60 * 60 * 24)
            )
          : 0;
      return {
        topic,
        count: data.count,
        earliest,
        latest,
        spreadDays,
      };
    })
    .sort((a, b) => b.count - a.count)
    .slice(0, 20);

  const analysisData = {
    user: userName,
    userId,
    period: `${formatDate(startDate)} - ${formatDate(endDate)}`,
    documents: {
      total: allDocuments.length,
      databaseEntries: databaseEntries.length,
      standalonePages: standalonePages.length,
      substantialStandalone: substantialStandalone.length,
      substantialDbEntries: substantialDbEntries.length,
    },
    databases: Array.from(databaseInfo.entries()).map(([id, info]) => ({
      id,
      ...info,
    })),
    collaborators: topCollaborators,
    topics: topTopics,
    sampleDocuments: allDocuments.slice(0, 30).map((d) => ({
      id: d.id,
      title: d.title,
      type: d.type,
      url: d.url,
      lastEdited: d.lastEditedTime,
    })),
  };

  fs.writeFileSync(analysisOutputPath, JSON.stringify(analysisData, null, 2));
  console.log(`  Saved analysis data to ${analysisOutputPath}`);

  // Diagnostic summary for debugging
  console.log(`\nDiagnostics:`);
  console.log(`  Pages searched: ${totalPagesSeen}`);
  console.log(`  In date range (90 days): ${pagesInDateRange}`);
  console.log(`  User created/edited: ${allDocuments.length}`);
  console.log(`  Content fetches attempted: ${sampleDocs.length}`);
  console.log(`  Content fetches succeeded: ${contentResults.filter((r) => r).length}`);
  console.log(`  Above threshold (≥500 chars): ${substantialContent.length}`);
  console.log(`  Collaborators found: ${topCollaborators.length} (with names: ${topCollaborators.filter((c) => c.name).length})`);
  console.log(`  Databases found: ${databaseInfo.size}`);

  // Schema summary for view-critical properties
  if (databaseInfo.size > 0) {
    console.log(`\nDatabase schemas (for calendar/board views):`);
    for (const [id, info] of databaseInfo.entries()) {
      if (info.schema) {
        const dateCount = info.schema.dateProperties?.length || 0;
        const statusCount = info.schema.statusProperties?.length || 0;
        const selectCount = info.schema.selectProperties?.length || 0;
        console.log(`  ${info.title}:`);
        if (dateCount > 0) {
          console.log(`    Date properties: ${info.schema.dateProperties.join(", ")} (for Calendar/Timeline)`);
        }
        if (statusCount > 0) {
          console.log(`    Status properties: ${info.schema.statusProperties.join(", ")} (for Board)`);
        } else if (selectCount > 0) {
          console.log(`    Select properties: ${info.schema.selectProperties.join(", ")} (for Board)`);
        }
      }
    }
  }

  console.log(`\n✓ Notion analysis complete`);
  console.log(
    JSON.stringify({
      success: true,
      analysisFile: analysisOutputPath,
      writingSamplesFile: writingSamplesOutputPath,
      user: userName,
      documentCount: allDocuments.length,
      databaseCount: databaseInfo.size,
      collaboratorCount: topCollaborators.length,
      topicsFound: topTopics.length,
      substantialStandaloneCount: substantialStandalone.length,
    })
  );
} catch (error) {
  console.error("Failed:", error.message);
  throw error;
}