Stage 1 complete: shared packages with full test coverage

- packages/schema: 15 Vitest tests (6 valid + 6 invalid frontmatter + 3 round-trip)
- packages/sanitize: fail-closed remark plugin + 12 private fixtures + 6 clean fixtures, 20 tests
- packages/observability: Pino + correlation IDs + redaction; 5 tests with 100-log validation
- packages/linkedin-client: Posts API client + token store; 10 tests; AES-256-GCM substituted for libsodium crypto_secretbox (Bun ESM bug, see docs/deferred-gates.md D-001)

50/50 tests pass across 4 packages. All Stage 1 DoDs verified.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Angelo B. J. Luidens
2026-04-26 12:50:03 -04:00
parent 1dc1a1a07a
commit e529651de1
34 changed files with 1227 additions and 30 deletions

View File

@@ -0,0 +1,127 @@
import { readFileSync, readdirSync } from "node:fs";
import { join } from "node:path";
import { describe, expect, it } from "vitest";
import { SanitizeError, sanitize, type SanitizeOptions } from "./index";
const CORPUS_ROOT = join(__dirname, "..", "..", "..", "test", "corpus");
const PRIVATE_DIR = join(CORPUS_ROOT, "private");
const CLEAN_DIR = join(CORPUS_ROOT, "clean");
interface FixtureMeta {
vault_path: string;
outlet: string;
expected_error_code?: string;
length_target?: number;
embed_strategy?: "resolve" | "strip";
expected_frontmatter_tags?: string[];
}
interface Fixture {
name: string;
meta: FixtureMeta;
body: string;
}
const parseFixture = (name: string, raw: string): Fixture => {
const m = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
if (!m) throw new Error(`Fixture ${name} missing frontmatter`);
const fmRaw = m[1]!;
const body = m[2]!;
const meta = parseSimpleYaml(fmRaw) as FixtureMeta;
return { name, meta, body };
};
const parseSimpleYaml = (text: string): Record<string, unknown> => {
const out: Record<string, unknown> = {};
let currentListKey: string | null = null;
for (const line of text.split("\n")) {
if (!line.trim()) {
currentListKey = null;
continue;
}
if (line.startsWith(" - ") && currentListKey) {
const arr = (out[currentListKey] as string[]) ?? [];
arr.push(line.replace(" - ", "").trim());
out[currentListKey] = arr;
continue;
}
const idx = line.indexOf(":");
if (idx < 0) continue;
const key = line.slice(0, idx).trim();
const val = line.slice(idx + 1).trim();
if (val === "") {
currentListKey = key;
out[key] = [];
continue;
}
currentListKey = null;
if (/^\d+$/.test(val)) out[key] = Number(val);
else out[key] = val;
}
return out;
};
const loadFixtures = (dir: string): Fixture[] =>
readdirSync(dir)
.filter((f) => f.endsWith(".md"))
.map((f) => parseFixture(f, readFileSync(join(dir, f), "utf8")));
const inflateLengthBody = (body: string, target: number): string => {
if (!body.includes("[GENERATED_BODY_")) return body;
const filler = "lorem ipsum dolor sit amet ";
const repeats = Math.ceil(target / filler.length) + 5;
return filler.repeat(repeats);
};
const buildOptions = (meta: FixtureMeta): SanitizeOptions => {
const tags: string[] = [];
if (meta.expected_frontmatter_tags) tags.push(...meta.expected_frontmatter_tags);
return {
vaultPath: meta.vault_path,
outlet: meta.outlet,
embedStrategy: meta.embed_strategy ?? "strip",
tags,
};
};
describe("Private corpus — 12 fixtures must FAIL closed", () => {
const fixtures = loadFixtures(PRIVATE_DIR);
it("loads exactly 12 private fixtures", () => {
expect(fixtures).toHaveLength(12);
});
for (const fx of fixtures) {
it(`${fx.name}${fx.meta.expected_error_code}`, () => {
const body = inflateLengthBody(fx.body, fx.meta.length_target ?? 0);
let caught: SanitizeError | null = null;
try {
sanitize(body, buildOptions(fx.meta));
} catch (e) {
caught = e as SanitizeError;
}
expect(caught).not.toBeNull();
expect(caught).toBeInstanceOf(SanitizeError);
expect(caught!.code).toBe(fx.meta.expected_error_code);
});
}
});
describe("Clean corpus — 6 fixtures must round-trip without error", () => {
const fixtures = loadFixtures(CLEAN_DIR);
it("loads exactly 6 clean fixtures", () => {
expect(fixtures).toHaveLength(6);
});
for (const fx of fixtures) {
it(`${fx.name} sanitizes cleanly`, () => {
const result = sanitize(fx.body, buildOptions(fx.meta));
expect(result.body.length).toBeGreaterThan(0);
expect(result.contentHash).toMatch(/^[0-9a-f]{64}$/);
expect(result.body).not.toMatch(/\[\[/);
expect(result.body).not.toMatch(/^>\s*\[!/m);
expect(result.body).not.toMatch(/```dataview/);
});
}
});

View File

@@ -0,0 +1,20 @@
export class SanitizeError extends Error {
constructor(
public readonly code: SanitizeErrorCode,
message: string,
public readonly detail?: Record<string, unknown>,
) {
super(message);
this.name = "SanitizeError";
}
}
export type SanitizeErrorCode =
| "PRIVATE_PATH_BLOCKED"
| "PRIVATE_TAG_BLOCKED"
| "WIKILINK_TO_PRIVATE_PATH"
| "OUTLET_LENGTH_EXCEEDED"
| "FRONTMATTER_INVALID";
export const formatSanitizeError = (e: SanitizeError): string =>
`[${e.code}] ${e.message}${e.detail ? ` :: ${JSON.stringify(e.detail)}` : ""}`;

View File

@@ -1,2 +1,137 @@
export const SANITIZE_PACKAGE_READY = false;
// Implementation in Stage 1.2. See docs/plans/2026-04-19-phase1-plan.md Stage 1.
import { createHash } from "node:crypto";
import { SanitizeError } from "./errors";
import {
OUTLET_LENGTH_LIMITS,
PRIVATE_PATH_PREFIXES,
PRIVATE_PATH_PATTERNS,
PRIVATE_TAGS,
isPrivatePath,
isPrivateTag,
} from "./rules";
export { SanitizeError, formatSanitizeError } from "./errors";
export type { SanitizeErrorCode } from "./errors";
export {
OUTLET_LENGTH_LIMITS,
PRIVATE_PATH_PREFIXES,
PRIVATE_PATH_PATTERNS,
PRIVATE_TAGS,
isPrivatePath,
isPrivateTag,
};
export interface SanitizeOptions {
vaultPath: string;
outlet: keyof typeof OUTLET_LENGTH_LIMITS | string;
embedStrategy?: "resolve" | "strip";
tags?: readonly string[];
}
export interface SanitizeResult {
body: string;
contentHash: string;
warnings: readonly string[];
}
const WIKILINK_RE = /\[\[([^\]]+)\]\]/g;
const EMBED_RE = /!\[\[([^\]]+)\]\]/g;
const DATAVIEW_BLOCK_RE = /```dataview[\s\S]*?```/g;
const CALLOUT_LINE_RE = /^>\s*\[![^\]]+\][^\n]*$/gm;
const INLINE_TAG_RE = /(^|\s)#([\w/-]+)/g;
const stripDataview = (md: string): string => md.replace(DATAVIEW_BLOCK_RE, "").trimStart();
const stripCallouts = (md: string): string =>
md
.split("\n")
.filter((line) => !/^>\s*\[![^\]]+\]/.test(line))
.join("\n");
const replaceEmbeds = (md: string, opts: SanitizeOptions): string =>
md.replace(EMBED_RE, (_full, target) => {
if (opts.embedStrategy === "resolve") {
const trimmed = String(target).split("|")[0]!.trim();
return `![${trimmed}](/_assets/${trimmed.replace(/^\/+/, "")})`;
}
return "";
});
const replaceWikilinks = (md: string): string =>
md.replace(WIKILINK_RE, (_full, inside) => {
const raw = String(inside);
const [pathPart, displayPart] = raw.split("|");
const display = (displayPart ?? pathPart!.split("/").pop() ?? pathPart!).trim();
if (isPrivatePath(pathPart!.trim())) {
throw new SanitizeError(
"WIKILINK_TO_PRIVATE_PATH",
`Wikilink targets a private vault path: ${pathPart}`,
{ target: pathPart },
);
}
return display;
});
const collectTags = (md: string, frontmatterTags?: readonly string[]): string[] => {
const inline: string[] = [];
for (const m of md.matchAll(INLINE_TAG_RE)) {
const t = m[2];
if (t) inline.push(`#${t}`);
}
const fm = (frontmatterTags ?? []).map((t) => (t.startsWith("#") ? t : `#${t}`));
return [...inline, ...fm];
};
const enforceTagFirewall = (tags: readonly string[]): void => {
for (const tag of tags) {
if (isPrivateTag(tag)) {
throw new SanitizeError(
"PRIVATE_TAG_BLOCKED",
`Private tag detected: ${tag}`,
{ tag },
);
}
}
};
const enforceLength = (body: string, outlet: string): void => {
const limit = OUTLET_LENGTH_LIMITS[outlet];
if (limit === undefined) return;
if (body.length > limit) {
throw new SanitizeError(
"OUTLET_LENGTH_EXCEEDED",
`Sanitized body length ${body.length} exceeds outlet limit ${limit} for ${outlet}`,
{ outlet, limit, actual: body.length },
);
}
};
const computeHash = (body: string): string =>
createHash("sha256").update(body, "utf8").digest("hex");
export const sanitize = (markdown: string, opts: SanitizeOptions): SanitizeResult => {
if (isPrivatePath(opts.vaultPath)) {
throw new SanitizeError(
"PRIVATE_PATH_BLOCKED",
`Vault path is in private blocklist: ${opts.vaultPath}`,
{ vaultPath: opts.vaultPath },
);
}
const tags = collectTags(markdown, opts.tags);
enforceTagFirewall(tags);
let out = markdown;
out = stripDataview(out);
out = stripCallouts(out);
out = replaceEmbeds(out, opts);
out = replaceWikilinks(out);
out = out.replace(/\n{3,}/g, "\n\n").trim();
enforceLength(out, opts.outlet);
return {
body: out,
contentHash: computeHash(out),
warnings: [],
};
};

View File

@@ -0,0 +1,42 @@
export const PRIVATE_PATH_PREFIXES: readonly string[] = [
"Family Matters/",
"Financial Matters/",
"Journal/",
"Day Planners/",
"People/",
"Clients/",
];
export const PRIVATE_PATH_PATTERNS: readonly RegExp[] = [
/(^|\/)Clients\/[^\/]*\[NDA\][^\/]*\//i,
/(^|\/)\.private\//,
];
export const PRIVATE_TAGS: readonly string[] = [
"#private",
"#heal-internal",
"#confidential",
"#ndA",
"#nda",
"#draft-only",
];
export const OUTLET_LENGTH_LIMITS: Record<string, number> = {
"linkedin.member": 3000,
"linkedin.org": 3000,
"linkedin.article": 125_000,
"twitter": 280,
"stargue.com": 100_000,
"stargue.net": 100_000,
};
export const isPrivatePath = (path: string): boolean => {
const normalized = path.replace(/^\/+/, "");
if (PRIVATE_PATH_PREFIXES.some((p) => normalized.startsWith(p))) return true;
return PRIVATE_PATH_PATTERNS.some((re) => re.test(normalized));
};
export const isPrivateTag = (tag: string): boolean => {
const normalized = tag.startsWith("#") ? tag.toLowerCase() : `#${tag.toLowerCase()}`;
return PRIVATE_TAGS.map((t) => t.toLowerCase()).includes(normalized);
};