Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[lexical-markdown] Bug Fix: support link and inline code text formats #7004

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
01ecf0e
fix(markdown): incorrect markdown import for links containing text fo…
AlessioGr Dec 30, 2024
3628b82
simpler test examples
AlessioGr Dec 30, 2024
eb0ca84
fix: markdown export for link nodes with text formats
AlessioGr Dec 30, 2024
957bc66
new text format & text match importing logic. This ensures that outer…
AlessioGr Dec 30, 2024
054c865
add tests
AlessioGr Dec 30, 2024
320a569
fix export
AlessioGr Dec 30, 2024
5c4e11b
remove console.log
AlessioGr Dec 30, 2024
81f0a13
Merge branch 'main' into fork/fix-md-link-text-formats
AlessioGr Dec 30, 2024
14cda52
fix lint errors
AlessioGr Dec 30, 2024
b66b8cc
fix build
AlessioGr Dec 30, 2024
4dbded1
fix build
AlessioGr Dec 30, 2024
db80558
fix: node tags opened before the link node were incorrectly closed wi…
AlessioGr Dec 30, 2024
fb1a91c
add comment explaining logic
AlessioGr Dec 30, 2024
d4dbf73
fix: consecutive text match was not processed
AlessioGr Dec 30, 2024
8f051ad
fix: formatted inline code block are not formatted in the correct order
AlessioGr Jan 7, 2025
8afcb0d
Merge branch 'main' into fork/fix-md-link-text-formats
AlessioGr Jan 7, 2025
7f04ef3
more reliable fix
AlessioGr Jan 7, 2025
113860c
Merge branch 'main' into fork/fix-md-link-text-formats
etrepum Jan 20, 2025
4fd26bb
perf: less array.include calls
AlessioGr Jan 21, 2025
7987e04
perf: shared canContainTransformableMarkdown function, avoid editor.u…
AlessioGr Jan 21, 2025
a820427
Merge branch 'main' into fork/fix-md-link-text-formats
AlessioGr Jan 21, 2025
a0898cd
sanitize link title
AlessioGr Jan 21, 2025
51bf22b
undo change
AlessioGr Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 71 additions & 12 deletions packages/lexical-markdown/src/MarkdownExport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,20 @@ export function createMarkdownExport(

// Export only uses text formats that are responsible for single format
// e.g. it will filter out *** (bold, italic) and instead use separate ** and *
const textFormatTransformers = byType.textFormat.filter(
(transformer) => transformer.format.length === 1,
);
const textFormatTransformers = byType.textFormat
.filter((transformer) => transformer.format.length === 1)
// Make sure all text transformers that contain 'code' in their format are at the end of the array. Otherwise, formatted code like
// <strong><code>code</code></strong> will be exported as `**Bold Code**`, as the code format will be applied first, and the bold format
// will be applied second and thus skipped entirely, as the code format will prevent any further formatting.
.sort((a, b) => {
if (a.format.includes('code') && !b.format.includes('code')) {
return 1;
} else if (!a.format.includes('code') && b.format.includes('code')) {
return -1;
} else {
return 0;
}
AlessioGr marked this conversation as resolved.
Show resolved Hide resolved
});

return (node) => {
const output = [];
Expand Down Expand Up @@ -105,11 +116,18 @@ function exportChildren(
node: ElementNode,
textTransformersIndex: Array<TextFormatTransformer>,
textMatchTransformers: Array<TextMatchTransformer>,
unclosedTags?: Array<{format: TextFormatType; tag: string}>,
unclosableTags?: Array<{format: TextFormatType; tag: string}>,
): string {
const output = [];
const children = node.getChildren();
// keep track of unclosed tags from the very beginning
const unclosedTags: {format: TextFormatType; tag: string}[] = [];
if (!unclosedTags) {
unclosedTags = [];
}
if (!unclosableTags) {
unclosableTags = [];
}

mainLoop: for (const child of children) {
for (const transformer of textMatchTransformers) {
Expand All @@ -124,13 +142,21 @@ function exportChildren(
parentNode,
textTransformersIndex,
textMatchTransformers,
unclosedTags,
// Add current unclosed tags to the list of unclosable tags - we don't want nested tags from
// textmatch transformers to close the outer ones, as that may result in invalid markdown.
// E.g. **text [text**](https://lexical.io)
// is invalid markdown, as the closing ** is inside the link.
//
[...unclosableTags, ...unclosedTags],
),
(textNode, textContent) =>
exportTextFormat(
textNode,
textContent,
textTransformersIndex,
unclosedTags,
unclosableTags,
),
);

Expand All @@ -149,12 +175,19 @@ function exportChildren(
child.getTextContent(),
textTransformersIndex,
unclosedTags,
unclosableTags,
),
);
} else if ($isElementNode(child)) {
// empty paragraph returns ""
output.push(
exportChildren(child, textTransformersIndex, textMatchTransformers),
exportChildren(
child,
textTransformersIndex,
textMatchTransformers,
unclosedTags,
unclosableTags,
),
);
} else if ($isDecoratorNode(child)) {
output.push(child.getTextContent());
Expand All @@ -170,6 +203,7 @@ function exportTextFormat(
textTransformers: Array<TextFormatTransformer>,
// unclosed tags include the markdown tags that haven't been closed yet, and their associated formats
unclosedTags: Array<{format: TextFormatType; tag: string}>,
unclosableTags?: Array<{format: TextFormatType; tag: string}>,
): string {
// This function handles the case of a string looking like this: " foo "
// Where it would be invalid markdown to generate: "** foo **"
Expand All @@ -180,7 +214,8 @@ function exportTextFormat(
// the opening tags to be added to the result
let openingTags = '';
// the closing tags to be added to the result
let closingTags = '';
let closingTagsBefore = '';
let closingTagsAfter = '';

const prevNode = getTextSibling(node, true);
const nextNode = getTextSibling(node, false);
Expand Down Expand Up @@ -210,23 +245,47 @@ function exportTextFormat(

// close any tags in the same order they were applied, if necessary
for (let i = 0; i < unclosedTags.length; i++) {
const nodeHasFormat = hasFormat(node, unclosedTags[i].format);
const nextNodeHasFormat = hasFormat(nextNode, unclosedTags[i].format);

// prevent adding closing tag if next sibling will do it
if (hasFormat(nextNode, unclosedTags[i].format)) {
if (nodeHasFormat && nextNodeHasFormat) {
continue;
}

while (unclosedTags.length > i) {
const unclosedTag = unclosedTags.pop();
const unhandledUnclosedTags = [...unclosedTags]; // Shallow copy to avoid modifying the original array

while (unhandledUnclosedTags.length > i) {
const unclosedTag = unhandledUnclosedTags.pop();

// If tag is unclosable, don't close it and leave it in the original array,
// So that it can be closed when it's no longer unclosable
if (
unclosableTags &&
unclosedTag &&
unclosableTags.find((element) => element.tag === unclosedTag.tag)
) {
continue;
}

if (unclosedTag && typeof unclosedTag.tag === 'string') {
closingTags += unclosedTag.tag;
if (!nodeHasFormat) {
// Handles cases where the tag has not been closed before, e.g. if the previous node
// was a text match transformer that did not account for closing tags of the next node (e.g. a link)
closingTagsBefore += unclosedTag.tag;
} else if (!nextNodeHasFormat) {
closingTagsAfter += unclosedTag.tag;
}
}
// Mutate the original array to remove the closed tag
unclosedTags.pop();
}
break;
}

output = openingTags + output + closingTags;
output = openingTags + output + closingTagsAfter;
// Replace trimmed version of textContent ensuring surrounding whitespace is not modified
return textContent.replace(frozenString, () => output);
return closingTagsBefore + textContent.replace(frozenString, () => output);
}

// Get next or previous text sibling a text node, including cases
Expand Down
183 changes: 4 additions & 179 deletions packages/lexical-markdown/src/MarkdownImport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import type {
TextMatchTransformer,
Transformer,
} from './MarkdownTransformers';
import type {TextNode} from 'lexical';

import {$isListItemNode, $isListNode, ListItemNode} from '@lexical/list';
import {$isQuoteNode} from '@lexical/rich-text';
Expand All @@ -29,13 +28,10 @@ import {
} from 'lexical';
import {IS_APPLE_WEBKIT, IS_IOS, IS_SAFARI} from 'shared/environment';

import {
isEmptyParagraph,
PUNCTUATION_OR_SPACE,
transformersByType,
} from './utils';
import {importTextTransformers} from './importTextTransformers';
import {isEmptyParagraph, transformersByType} from './utils';

type TextFormatTransformersIndex = Readonly<{
export type TextFormatTransformersIndex = Readonly<{
fullMatchRegExpByTag: Readonly<Record<string, RegExp>>;
openTagsRegExp: RegExp;
transformersByTag: Readonly<Record<string, TextFormatTransformer>>;
Expand Down Expand Up @@ -246,7 +242,7 @@ function $importBlocks(
}
}

importTextFormatTransformers(
importTextTransformers(
textNode,
textFormatTransformersIndex,
textMatchTransformers,
Expand Down Expand Up @@ -284,177 +280,6 @@ function $importBlocks(
}
}

// Processing text content and replaces text format tags.
// It takes outermost tag match and its content, creates text node with
// format based on tag and then recursively executed over node's content
//
// E.g. for "*Hello **world**!*" string it will create text node with
// "Hello **world**!" content and italic format and run recursively over
// its content to transform "**world**" part
function importTextFormatTransformers(
textNode: TextNode,
textFormatTransformersIndex: TextFormatTransformersIndex,
textMatchTransformers: Array<TextMatchTransformer>,
) {
const textContent = textNode.getTextContent();
const match = findOutermostMatch(textContent, textFormatTransformersIndex);

if (!match) {
// Once text format processing is done run text match transformers, as it
// only can span within single text node (unline formats that can cover multiple nodes)
importTextMatchTransformers(textNode, textMatchTransformers);
return;
}

let currentNode, remainderNode, leadingNode;

// If matching full content there's no need to run splitText and can reuse existing textNode
// to update its content and apply format. E.g. for **_Hello_** string after applying bold
// format (**) it will reuse the same text node to apply italic (_)
if (match[0] === textContent) {
currentNode = textNode;
} else {
const startIndex = match.index || 0;
const endIndex = startIndex + match[0].length;

if (startIndex === 0) {
[currentNode, remainderNode] = textNode.splitText(endIndex);
} else {
[leadingNode, currentNode, remainderNode] = textNode.splitText(
startIndex,
endIndex,
);
}
}

currentNode.setTextContent(match[2]);
const transformer = textFormatTransformersIndex.transformersByTag[match[1]];

if (transformer) {
for (const format of transformer.format) {
if (!currentNode.hasFormat(format)) {
currentNode.toggleFormat(format);
}
}
}

// Recursively run over inner text if it's not inline code
if (!currentNode.hasFormat('code')) {
importTextFormatTransformers(
currentNode,
textFormatTransformersIndex,
textMatchTransformers,
);
}

// Run over leading/remaining text if any
if (leadingNode) {
importTextFormatTransformers(
leadingNode,
textFormatTransformersIndex,
textMatchTransformers,
);
}

if (remainderNode) {
importTextFormatTransformers(
remainderNode,
textFormatTransformersIndex,
textMatchTransformers,
);
}
}

function importTextMatchTransformers(
textNode_: TextNode,
textMatchTransformers: Array<TextMatchTransformer>,
) {
let textNode = textNode_;

mainLoop: while (textNode) {
for (const transformer of textMatchTransformers) {
if (!transformer.replace || !transformer.importRegExp) {
continue;
}
const match = textNode.getTextContent().match(transformer.importRegExp);

if (!match) {
continue;
}

const startIndex = match.index || 0;
const endIndex = transformer.getEndIndex
? transformer.getEndIndex(textNode, match)
: startIndex + match[0].length;

if (endIndex === false) {
continue;
}

let replaceNode, newTextNode;

if (startIndex === 0) {
[replaceNode, textNode] = textNode.splitText(endIndex);
} else {
[, replaceNode, newTextNode] = textNode.splitText(startIndex, endIndex);
}

if (newTextNode) {
importTextMatchTransformers(newTextNode, textMatchTransformers);
}
transformer.replace(replaceNode, match);
continue mainLoop;
}

break;
}
}

// Finds first "<tag>content<tag>" match that is not nested into another tag
function findOutermostMatch(
textContent: string,
textTransformersIndex: TextFormatTransformersIndex,
): RegExpMatchArray | null {
const openTagsMatch = textContent.match(textTransformersIndex.openTagsRegExp);

if (openTagsMatch == null) {
return null;
}

for (const match of openTagsMatch) {
// Open tags reg exp might capture leading space so removing it
// before using match to find transformer
const tag = match.replace(/^\s/, '');
const fullMatchRegExp = textTransformersIndex.fullMatchRegExpByTag[tag];
if (fullMatchRegExp == null) {
continue;
}

const fullMatch = textContent.match(fullMatchRegExp);
const transformer = textTransformersIndex.transformersByTag[tag];
if (fullMatch != null && transformer != null) {
if (transformer.intraword !== false) {
return fullMatch;
}

// For non-intraword transformers checking if it's within a word
// or surrounded with space/punctuation/newline
const {index = 0} = fullMatch;
const beforeChar = textContent[index - 1];
const afterChar = textContent[index + fullMatch[0].length];

if (
(!beforeChar || PUNCTUATION_OR_SPACE.test(beforeChar)) &&
(!afterChar || PUNCTUATION_OR_SPACE.test(afterChar))
) {
return fullMatch;
}
}
}

return null;
}

function createTextFormatTransformersIndex(
textTransformers: Array<TextFormatTransformer>,
): TextFormatTransformersIndex {
Expand Down
Loading
Loading