Skip to main content

Three layers: a short note at the top, the key lines with our take in the middle, the full source at the bottom.

CI script

check-subprocessor-freshness.mjs

Fails CI when the sub-processor list goes stale relative to its last-reviewed date.

Repo path scripts/check-subprocessor-freshness.mjsLanguage JavaScript

Short note — more on the way

What this is

Fails CI when the sub-processor list goes stale relative to its last-reviewed date.

What it proves

This file backs one or more of the privacy promises. It is a continuous-integration script that lives versioned in the repository. Read the promise →

What to look for in the source below

  • Comments and headers that name what each section does.
  • File edges: imports at the top, exports or run-blocks at the bottom.
  • Any list, configuration, or assertion that looks load-bearing.
Show the full file (245 lines)

244 lines

#!/usr/bin/env node
//
// Sub-processor list freshness gate (PR-6).
//
// docs/sub-processors.md is a customer commitment: every third party
// that touches Customer Data is named, with 30 days' advance notice
// of changes. This gate flags drift between the list and the actual
// infrastructure declared in apps/api/wrangler.toml + services/*.toml.
//
// Detection is conservative -- the gate looks for known vendor tokens
// in IaC files (e.g. "anthropic", "openai", "twilio", "datadog",
// "amplitude") that are NOT present in docs/sub-processors.md. New
// vendors must EITHER be added to the doc + email the announcement
// list OR be removed from the IaC. There is no middle ground.

import { readFileSync, existsSync } from "node:fs";
import { fileURLToPath } from "node:url";
import path from "node:path";
import { execSync } from "node:child_process";

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const REPO = path.resolve(__dirname, "..");

const SUBPROCESSOR_DOC = "docs/sub-processors.md";

// Known third-party vendor tokens we look for. Add to this list
// when a new vendor lands in the wider ecosystem; the gate will
// flag the same token in IaC if it is not also in the doc.
const KNOWN_VENDOR_TOKENS = [
  "aws",
  "cloudflare",
  "fly",
  "stripe",
  "resend", // email — current shipped provider
  "postmark", // tripwire: was named in v2 docs; must not silently re-appear
  "sentry",
  "anthropic", // tripwire: v4 removed; must not silently re-appear
  "openai",
  "instructor",
  "cohere",
  "google",
  "vertex",
  "twilio",
  "sendgrid",
  "mailgun",
  "datadog",
  "newrelic",
  "amplitude",
  "mixpanel",
  "segment",
  "intercom",
  "zendesk",
  "auth0",
  "okta",
  "workos",
  "hcaptcha",
  "recaptcha",
  "neon",
  "intuit", // QuickBooks Online — sub-processor when posting flag flips on
  "quickbooks",
  "xero",
  "supabase",
  "planetscale",
];

function listIaCFiles() {
  // git ls-files keeps us aligned with the tree; falls back to a
  // broad find if git isn't available.
  //
  // Audit batch 5 P-F1 widening: the gate now also scans
  // apps/api/src/routes/**/*.ts because integrations like
  // QuickBooks Online (apps/api/src/routes/integrations/
  // quickbooks.ts) talk to vendor hostnames (e.g.
  // appcenter.intuit.com, sandbox-quickbooks.api.intuit.com)
  // directly from the Worker. Hostnames in route handlers are as
  // load-bearing for sub-processor disclosure as IaC config; if the
  // route file references a vendor token, that vendor MUST appear
  // in docs/sub-processors.md.
  try {
    return execSync(
      "git ls-files -- 'infra/**' 'services/**/fly.toml' 'apps/**/wrangler.toml' 'apps/api/src/routes/**/*.ts'",
      { cwd: REPO, encoding: "utf8" },
    )
      .split("\n")
      .filter(Boolean);
  } catch {
    return [];
  }
}

function tokensIn(text) {
  const lower = text.toLowerCase();
  const found = new Set();
  for (const token of KNOWN_VENDOR_TOKENS) {
    // Word-ish match. Treat dots, slashes, underscores as boundaries
    // so `aws.kms` and `fly.io` count.
    const re = new RegExp(`(^|[^a-z0-9])${token}([^a-z0-9]|$)`, "i");
    if (re.test(lower)) found.add(token);
  }
  return found;
}

/**
 * Parse the `Since` column from docs/sub-processors.md. Returns a map
 * { vendorToken -> { since: Date | null, planned: boolean } } keyed
 * by every KNOWN_VENDOR_TOKEN that appears in the row. The doc table
 * has the shape:
 *
 *   | # | Provider | Role | Region | DPA on file | Since |
 *   |--:|---|---|---|---:|---|
 *   | 1 | **AWS** | ... | ... | Yes | 2026-01-01 |
 *   | 7 | **Intuit (QuickBooks Online)** ... | ... | US | Pending | Planned ... |
 *
 * Rows whose DPA-on-file column is "Yes" are subject to the 30-day
 * freshness assertion. Rows marked "Pending" / "Planned" are not yet
 * active and skip the check.
 */
function parseSubprocessorRows(docText) {
  const rows = [];
  const lines = docText.split("\n");
  for (const line of lines) {
    // Match table rows that start with `| <digit>` (the # column).
    const m = line.match(/^\|\s*\d+\s*\|(.+)\|\s*$/);
    if (!m) continue;
    // Split on `|` to get cells, then trim. The doc has six columns:
    // [provider, role, region, dpa_on_file, since].
    const cells = m[1].split("|").map((s) => s.trim());
    if (cells.length < 5) continue;
    const [provider, _role, _region, dpaOnFile, since] = cells;
    const lowerProvider = provider.toLowerCase();
    const tokensForRow = [];
    for (const token of KNOWN_VENDOR_TOKENS) {
      const re = new RegExp(`(^|[^a-z0-9])${token}([^a-z0-9]|$)`, "i");
      if (re.test(lowerProvider)) tokensForRow.push(token);
    }
    const dpaYes = /^yes$/i.test(dpaOnFile);
    const planned =
      /planned/i.test(since) ||
      /pending/i.test(dpaOnFile) ||
      /planned/i.test(provider);
    // ISO date YYYY-MM-DD at the start of the cell, if present.
    const dateMatch = since.match(/(\d{4}-\d{2}-\d{2})/);
    const sinceDate = dateMatch ? new Date(dateMatch[1] + "T00:00:00Z") : null;
    for (const token of tokensForRow) {
      rows.push({
        token,
        provider,
        since: sinceDate,
        planned,
        active: dpaYes && !planned,
        rawSince: since,
      });
    }
  }
  return rows;
}

function main() {
  const docPath = path.join(REPO, SUBPROCESSOR_DOC);
  if (!existsSync(docPath)) {
    process.stderr.write(`FAIL: ${SUBPROCESSOR_DOC} not found.\n`);
    process.exit(1);
  }

  const docText = readFileSync(docPath, "utf8");
  const docTokens = tokensIn(docText);
  const subprocessorRows = parseSubprocessorRows(docText);

  const iacFiles = listIaCFiles();
  const violations = [];

  for (const rel of iacFiles) {
    const abs = path.join(REPO, rel);
    if (!existsSync(abs)) continue;
    const tokens = tokensIn(readFileSync(abs, "utf8"));
    for (const token of tokens) {
      if (!docTokens.has(token)) {
        violations.push({ file: rel, token });
      }
    }
  }

  // 30-day notice clause (DPA §6). For every row whose DPA-on-file is
  // "Yes" we require `(now - since) >= 30 days`. Rows marked Planned
  // / Pending are pre-disclosed and intentionally skip the check.
  const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
  const now = Date.now();
  const freshnessViolations = [];
  for (const row of subprocessorRows) {
    if (!row.active) continue;
    if (!row.since) {
      freshnessViolations.push({
        provider: row.provider,
        reason: `Since column missing or unparseable: "${row.rawSince}"`,
      });
      continue;
    }
    const elapsed = now - row.since.getTime();
    if (elapsed < THIRTY_DAYS_MS) {
      const days = Math.floor(elapsed / (24 * 60 * 60 * 1000));
      freshnessViolations.push({
        provider: row.provider,
        reason: `vendor ${row.provider} added at ${row.rawSince}, less than 30 days ago (only ${days} days elapsed); the 30-day notice clause has not elapsed.`,
      });
    }
  }

  if (violations.length === 0 && freshnessViolations.length === 0) {
    process.stdout.write(
      `check-subprocessor-freshness: OK (${iacFiles.length} IaC files, ${docTokens.size} vendors named, ${subprocessorRows.filter((r) => r.active).length} active rows passed the 30-day freshness check).\n`,
    );
    process.exit(0);
  }

  if (violations.length > 0) {
    process.stderr.write(
      `check-subprocessor-freshness: FAIL (${violations.length} undeclared vendors in IaC):\n`,
    );
    for (const v of violations) {
      process.stderr.write(
        `  ${v.file}: vendor token "${v.token}" not in ${SUBPROCESSOR_DOC}\n`,
      );
    }
    process.stderr.write(
      `\nFix: add the vendor to ${SUBPROCESSOR_DOC} (with 30 days notice on the announcement list)\n` +
        `     OR remove the IaC reference. The list is a customer commitment.\n`,
    );
  }
  if (freshnessViolations.length > 0) {
    process.stderr.write(
      `check-subprocessor-freshness: FAIL (${freshnessViolations.length} rows fail the 30-day notice clause):\n`,
    );
    for (const v of freshnessViolations) {
      process.stderr.write(`  FAIL: ${v.reason}\n`);
    }
    process.stderr.write(
      `\nFix: wait until 30 days after the Since date before flipping DPA-on-file to Yes,\n` +
        `     or move the row back to Planned/Pending until the clock has run.\n`,
    );
  }
  process.exit(1);
}

main();

This is the file as it lives at the moment of this build. The canonical history lives in git. If you want the full history or a specific commit, write to hello@muntin.digital.

check-subprocessor-freshness.mjs · Verify · Muntin Ledger · Muntin