Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add KaTeX support for block and inline math rendering in Markdown #1643

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 119 additions & 53 deletions src/lib/components/chat/MarkdownRenderer.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,127 @@
import katex from "katex";
import DOMPurify from "isomorphic-dompurify";
import { Marked } from "marked";
import type { Tokens, TokenizerExtension, RendererExtension } from "marked";
import CodeBlock from "../CodeBlock.svelte";

export let content: string;
export let sources: WebSearchSource[] = [];

interface katexBlockToken extends Tokens.Generic {
type: "katexBlock";
raw: string;
text: string;
displayMode: true;
}

interface katexInlineToken extends Tokens.Generic {
type: "katexInline";
raw: string;
text: string;
displayMode: false;
}

export const katexBlockExtension: TokenizerExtension & RendererExtension = {
name: "katexBlock",
level: "block",

start(src: string): number | undefined {
const match = src.match(/(\${2}|\\\[)/);
return match ? match.index : -1;
},

tokenizer(src: string): katexBlockToken | undefined {
// 1) $$ ... $$
const rule1 = /^\${2}([\s\S]+?)\${2}/;
const match1 = rule1.exec(src);
if (match1) {
const token: katexBlockToken = {
type: "katexBlock",
raw: match1[0],
text: match1[1].trim(),
displayMode: true,
};
return token;
}

// 2) \[ ... \]
const rule2 = /^\\\[([\s\S]+?)\\\]/;
const match2 = rule2.exec(src);
if (match2) {
const token: katexBlockToken = {
type: "katexBlock",
raw: match2[0],
text: match2[1].trim(),
displayMode: true,
};
return token;
}

return undefined;
},

renderer(token) {
if (token.type === "katexBlock") {
return katex.renderToString(token.text, {
throwOnError: false,
displayMode: token.displayMode,
});
}

return undefined;
},
};

const katexInlineExtension: TokenizerExtension & RendererExtension = {
name: "katexInline",
level: "inline",

start(src: string): number | undefined {
const match = src.match(/(\$|\\\()/);
return match ? match.index : -1;
},

tokenizer(src: string): katexInlineToken | undefined {
// 1) $...$
const rule1 = /^\$([^$]+?)\$/;
const match1 = rule1.exec(src);
if (match1) {
const token: katexInlineToken = {
type: "katexInline",
raw: match1[0],
text: match1[1].trim(),
displayMode: false,
};
return token;
}

// 2) \(...\)
const rule2 = /^\\\(([\s\S]+?)\\\)/;
const match2 = rule2.exec(src);
if (match2) {
const token: katexInlineToken = {
type: "katexInline",
raw: match2[0],
text: match2[1].trim(),
displayMode: false,
};
return token;
}

return undefined;
},

renderer(token) {
if (token.type === "katexInline") {
return katex.renderToString(token.text, {
throwOnError: false,
displayMode: token.displayMode,
});
}
return undefined;
},
};

function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string {
const linkStyle =
"color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;";
Expand All @@ -30,63 +146,13 @@
});
}

function escapeHTML(content: string) {
return content.replace(
/[<>&\n]/g,
(x) =>
({
"<": "&lt;",
">": "&gt;",
"&": "&amp;",
}[x] || x)
);
}

function processLatex(parsed: string) {
const delimiters = [
{ left: "$$", right: "$$", display: true },
{ left: "$", right: "$", display: false },
{ left: "( ", right: " )", display: false },
{ left: "[ ", right: " ]", display: true },
];

for (const { left, right, display } of delimiters) {
// Escape special regex characters in the delimiters
const escapedLeft = left.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const escapedRight = right.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");

// Create regex pattern that matches content between delimiters
const pattern = new RegExp(`(?<!\\w)${escapedLeft}([^]*?)${escapedRight}(?!\\w)`, "g");

parsed = parsed.replace(pattern, (match, latex) => {
try {
// Remove the delimiters from the latex content
const cleanLatex = latex.trim();
const rendered = katex.renderToString(cleanLatex, { displayMode: display });

// For display mode, wrap in centered paragraph
if (display) {
return `<p style="width:100%;text-align:center;">${rendered}</p>`;
}
return rendered;
} catch (error) {
console.error("KaTeX error:", error);
return match; // Return original on error
}
});
}
return parsed;
}

const marked = new Marked({
hooks: {
preprocess: (md) => addInlineCitations(escapeHTML(md), sources),
postprocess: (html) => {
return DOMPurify.sanitize(processLatex(html));
},
preprocess: (md) => addInlineCitations(md, sources),
postprocess: (html) => DOMPurify.sanitize(html),
},
extensions: [katexBlockExtension, katexInlineExtension],
renderer: {
codespan: (code) => `<code>${code.replaceAll("&amp;", "&")}</code>`,
link: (href, title, text) =>
`<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`,
},
Expand Down
Loading