/* FIGURES VIEW — multimodal figure gallery over the real /figures index.
Each card crops the source page image to the figure's bbox; the lightbox
shows the full page with the bbox overlaid. */
// Render a caption with KaTeX. Captions carry relatex'd math in $...$ or \(...\)
// (the VLM emits either), so both delimiters are enabled. Caption-only by design.
function MathText({ text, className, style }) {
const ref = useRef(null);
// The relatex VLM writes LaTeX-correct `\%` for a literal percent, but KaTeX
// only renders the math spans, so a prose `\%` shows its backslash. Unescape
// `\%` -> `%` outside `$...$` (inside math, `%` is a comment so leave it).
const cleaned = String(text || "").replace(/(\$[^$]*\$)|\\%/g, (_m, math) => math || "%");
useEffect(() => {
const el = ref.current;
if (!el || typeof window.renderMathInElement !== "function") return;
try {
window.renderMathInElement(el, {
delimiters: [
{ left: "$$", right: "$$", display: true },
{ left: "\\[", right: "\\]", display: true },
{ left: "$", right: "$", display: false },
{ left: "\\(", right: "\\)", display: false },
],
throwOnError: false,
});
} catch (_) { /* leave the raw text on a KaTeX error */ }
}, [cleaned]);
return
{cleaned}
;
}
// Drop repeated paragraphs. Table chunks store the caption twice — once as the
// extracted caption, once embedded in Docling's table markdown — and after the
// relatex pass one copy carries LaTeX ($\Delta V$) while the other is still flat
// (∆ V). Normalise to bare alphanumerics so the two collapse to one key, and
// keep the LaTeX copy. Plain duplicate paragraphs (identical) dedupe too.
function normCaption(p) {
return String(p)
.replace(/\$/g, "")
.replace(/\\[a-zA-Z]+/g, "")
.replace(/[{}^_\\]/g, "")
.replace(/[^a-zA-Z0-9]/g, "")
.toLowerCase();
}
function dedupeParagraphs(text) {
const kept = [];
const keys = [];
for (const p of String(text || "").split(/\n{2,}/).map((s) => s.trim()).filter(Boolean)) {
const k = normCaption(p);
const idx = k ? keys.indexOf(k) : -1;
if (idx === -1) { keys.push(k); kept.push(p); }
else if (p.includes("$") && !kept[idx].includes("$")) { kept[idx] = p; }
}
return kept.join("\n\n");
}
// A table chunk's text is "caption\n\n" (chunking.table_to_chunk).
// Split it into the descriptive name and the table markdown.
function splitCaptionData(text) {
const lines = String(text || "").split("\n");
const firstPipe = lines.findIndex((l) => l.trim().startsWith("|"));
if (firstPipe === -1) return { name: dedupeParagraphs(text), data: "" };
return {
name: dedupeParagraphs(lines.slice(0, firstPipe).join("\n")),
data: lines.slice(firstPipe).join("\n").trim(),
};
}
// Crop a 150-DPI page image to a figure's PDF-point bbox. Computes the crop
// transform from the image's natural size on load; falls back to the full page
// width until then (and when a chunk has no bbox).
function FigCrop({ url, bbox, fallbackH = 150 }) {
const [s, setS] = useState(null);
const onLoad = (e) => {
const img = e.target;
const nW = img.naturalWidth, nH = img.naturalHeight;
if (!nW || !nH || !Array.isArray(bbox) || bbox.length !== 4) return;
const DPI = 150, pageW = (nW * 72) / DPI, pageH = (nH * 72) / DPI;
const [x0, y0, x1, y1] = bbox;
const fx = x0 / pageW, fy = y0 / pageH;
const fw = Math.max((x1 - x0) / pageW, 0.02), fh = Math.max((y1 - y0) / pageH, 0.02);
setS({
widthPct: 100 / fw,
leftPct: -(fx / fw) * 100,
topPct: -(fy / fh) * 100,
aspect: (fw * nW) / (fh * nH),
});
};
return (
);
}
/* Display category: docling table chunks carry kind="table" while their role
stays "figure" — surface them as tables so the filter can separate them. */
function figCategory(f) {
return f.kind === "table" ? "table" : (f.role || "figure");
}
function FigureCard({ f, onOpen }) {
const { name } = splitCaptionData(f.caption);
const hasCap = name && !/^\[.+\]$/.test(name.trim());
return (
);
}
function FigureLightbox({ f, onClose }) {
const [ov, setOv] = useState(null);
const [jsonView, setJsonView] = useState(false);
const imgRef = useRef(null);
// Place the bbox overlay in pixels relative to .lb-img, derived from the
// image's own offset + rendered size. .lb-img has padding:22px and is a grid
// cell that stretches to the (taller) side column, so a %-based overlay
// measured the padded/stretched box, not the image — pixels off the image
// geometry are robust to both.
const place = useCallback(() => {
const img = imgRef.current;
if (!img || !img.naturalWidth || !Array.isArray(f?.bbox) || f.bbox.length !== 4) {
setOv(null);
return;
}
const DPI = 150, pW = (img.naturalWidth * 72) / DPI, pH = (img.naturalHeight * 72) / DPI;
const [x0, y0, x1, y1] = f.bbox;
setOv({
top: img.offsetTop + (y0 / pH) * img.clientHeight,
left: img.offsetLeft + (x0 / pW) * img.clientWidth,
width: ((x1 - x0) / pW) * img.clientWidth,
height: ((y1 - y0) / pH) * img.clientHeight,
});
}, [f]);
useEffect(() => { setOv(null); setJsonView(false); }, [f]);
useEffect(() => {
if (!f) return;
const onEsc = (e) => { if (e.key === "Escape") onClose(); };
const onResize = () => place();
document.addEventListener("keydown", onEsc);
window.addEventListener("resize", onResize);
return () => { document.removeEventListener("keydown", onEsc); window.removeEventListener("resize", onResize); };
}, [f, onClose, place]);
if (!f) return null;
const { name, data } = splitCaptionData(f.caption);
const hasCaption = name && !/^\[.+\]$/.test(name.trim());
const noCap =
Indexed as a {figCategory(f)} chunk; the box marks its region on the source page. {hasCaption ? (f.has_vlm_caption ? "Caption written by a VLM." : "Caption extracted from the document.") : "No caption was captured for this region."}