facebook · AlessioGr · Dec 30, 2024 · Dec 30, 2024 · Dec 30, 2024 · Dec 30, 2024
@@ -38,9 +38,20 @@ export function createMarkdownExport(
 
   // Export only uses text formats that are responsible for single format
   // e.g. it will filter out *** (bold, italic) and instead use separate ** and *
-  const textFormatTransformers = byType.textFormat.filter(
-    (transformer) => transformer.format.length === 1,
-  );
+  const textFormatTransformers = byType.textFormat
+    .filter((transformer) => transformer.format.length === 1)
+    // Make sure all text transformers that contain 'code' in their format are at the end of the array. Otherwise, formatted code like
+    // <strong><code>code</code></strong> will be exported as `**Bold Code**`, as the code format will be applied first, and the bold format
+    // will be applied second and thus skipped entirely, as the code format will prevent any further formatting.
+    .sort((a, b) => {
+      if (a.format.includes('code') && !b.format.includes('code')) {
+        return 1;
+      } else if (!a.format.includes('code') && b.format.includes('code')) {
+        return -1;
+      } else {
+        return 0;
+      }
+    });
 
   return (node) => {
     const output = [];
@@ -105,11 +116,18 @@ function exportChildren(
   node: ElementNode,
   textTransformersIndex: Array<TextFormatTransformer>,
   textMatchTransformers: Array<TextMatchTransformer>,
+  unclosedTags?: Array<{format: TextFormatType; tag: string}>,
+  unclosableTags?: Array<{format: TextFormatType; tag: string}>,
 ): string {
   const output = [];
   const children = node.getChildren();
   // keep track of unclosed tags from the very beginning
-  const unclosedTags: {format: TextFormatType; tag: string}[] = [];
+  if (!unclosedTags) {
+    unclosedTags = [];
+  }
+  if (!unclosableTags) {
+    unclosableTags = [];
+  }
 
   mainLoop: for (const child of children) {
     for (const transformer of textMatchTransformers) {
@@ -124,13 +142,21 @@ function exportChildren(
             parentNode,
             textTransformersIndex,
             textMatchTransformers,
+            unclosedTags,
+            // Add current unclosed tags to the list of unclosable tags - we don't want nested tags from
+            // textmatch transformers to close the outer ones, as that may result in invalid markdown.
+            // E.g. **text [text**](https://lexical.io)
+            // is invalid markdown, as the closing ** is inside the link.
+            //
+            [...unclosableTags, ...unclosedTags],
           ),
         (textNode, textContent) =>
           exportTextFormat(
             textNode,
             textContent,
             textTransformersIndex,
             unclosedTags,
+            unclosableTags,
           ),
       );
 
@@ -149,12 +175,19 @@ function exportChildren(
           child.getTextContent(),
           textTransformersIndex,
           unclosedTags,
+          unclosableTags,
         ),
       );
     } else if ($isElementNode(child)) {
       // empty paragraph returns ""
       output.push(
-        exportChildren(child, textTransformersIndex, textMatchTransformers),
+        exportChildren(
+          child,
+          textTransformersIndex,
+          textMatchTransformers,
+          unclosedTags,
+          unclosableTags,
+        ),
       );
     } else if ($isDecoratorNode(child)) {
       output.push(child.getTextContent());
@@ -170,6 +203,7 @@ function exportTextFormat(
   textTransformers: Array<TextFormatTransformer>,
   // unclosed tags include the markdown tags that haven't been closed yet, and their associated formats
   unclosedTags: Array<{format: TextFormatType; tag: string}>,
+  unclosableTags?: Array<{format: TextFormatType; tag: string}>,
 ): string {
   // This function handles the case of a string looking like this: "   foo   "
   // Where it would be invalid markdown to generate: "**   foo   **"
@@ -180,7 +214,8 @@ function exportTextFormat(
   // the opening tags to be added to the result
   let openingTags = '';
   // the closing tags to be added to the result
-  let closingTags = '';
+  let closingTagsBefore = '';
+  let closingTagsAfter = '';
 
   const prevNode = getTextSibling(node, true);
   const nextNode = getTextSibling(node, false);
@@ -210,23 +245,47 @@ function exportTextFormat(
 
   // close any tags in the same order they were applied, if necessary
   for (let i = 0; i < unclosedTags.length; i++) {
+    const nodeHasFormat = hasFormat(node, unclosedTags[i].format);
+    const nextNodeHasFormat = hasFormat(nextNode, unclosedTags[i].format);
+
     // prevent adding closing tag if next sibling will do it
-    if (hasFormat(nextNode, unclosedTags[i].format)) {
+    if (nodeHasFormat && nextNodeHasFormat) {
       continue;
     }
 
-    while (unclosedTags.length > i) {
-      const unclosedTag = unclosedTags.pop();
+    const unhandledUnclosedTags = [...unclosedTags]; // Shallow copy to avoid modifying the original array
+
+    while (unhandledUnclosedTags.length > i) {
+      const unclosedTag = unhandledUnclosedTags.pop();
+
+      // If tag is unclosable, don't close it and leave it in the original array,
+      // So that it can be closed when it's no longer unclosable
+      if (
+        unclosableTags &&
+        unclosedTag &&
+        unclosableTags.find((element) => element.tag === unclosedTag.tag)
+      ) {
+        continue;
+      }
+
       if (unclosedTag && typeof unclosedTag.tag === 'string') {
-        closingTags += unclosedTag.tag;
+        if (!nodeHasFormat) {
+          // Handles cases where the tag has not been closed before, e.g. if the previous node
+          // was a text match transformer that did not account for closing tags of the next node (e.g. a link)
+          closingTagsBefore += unclosedTag.tag;
+        } else if (!nextNodeHasFormat) {
+          closingTagsAfter += unclosedTag.tag;
+        }
       }
+      // Mutate the original array to remove the closed tag
+      unclosedTags.pop();
     }
     break;
   }
 
-  output = openingTags + output + closingTags;
+  output = openingTags + output + closingTagsAfter;
   // Replace trimmed version of textContent ensuring surrounding whitespace is not modified
-  return textContent.replace(frozenString, () => output);
+  return closingTagsBefore + textContent.replace(frozenString, () => output);
 }
 
 // Get next or previous text sibling a text node, including cases

@@ -13,7 +13,6 @@ import type {
   TextMatchTransformer,
   Transformer,
 } from './MarkdownTransformers';
-import type {TextNode} from 'lexical';
 
 import {$isListItemNode, $isListNode, ListItemNode} from '@lexical/list';
 import {$isQuoteNode} from '@lexical/rich-text';
@@ -29,13 +28,10 @@ import {
 } from 'lexical';
 import {IS_APPLE_WEBKIT, IS_IOS, IS_SAFARI} from 'shared/environment';
 
-import {
-  isEmptyParagraph,
-  PUNCTUATION_OR_SPACE,
-  transformersByType,
-} from './utils';
+import {importTextTransformers} from './importTextTransformers';
+import {isEmptyParagraph, transformersByType} from './utils';
 
-type TextFormatTransformersIndex = Readonly<{
+export type TextFormatTransformersIndex = Readonly<{
   fullMatchRegExpByTag: Readonly<Record<string, RegExp>>;
   openTagsRegExp: RegExp;
   transformersByTag: Readonly<Record<string, TextFormatTransformer>>;
@@ -246,7 +242,7 @@ function $importBlocks(
     }
   }
 
-  importTextFormatTransformers(
+  importTextTransformers(
     textNode,
     textFormatTransformersIndex,
     textMatchTransformers,
@@ -284,177 +280,6 @@ function $importBlocks(
   }
 }
 
-// Processing text content and replaces text format tags.
-// It takes outermost tag match and its content, creates text node with
-// format based on tag and then recursively executed over node's content
-//
-// E.g. for "*Hello **world**!*" string it will create text node with
-// "Hello **world**!" content and italic format and run recursively over
-// its content to transform "**world**" part
-function importTextFormatTransformers(
-  textNode: TextNode,
-  textFormatTransformersIndex: TextFormatTransformersIndex,
-  textMatchTransformers: Array<TextMatchTransformer>,
-) {
-  const textContent = textNode.getTextContent();
-  const match = findOutermostMatch(textContent, textFormatTransformersIndex);
-
-  if (!match) {
-    // Once text format processing is done run text match transformers, as it
-    // only can span within single text node (unline formats that can cover multiple nodes)
-    importTextMatchTransformers(textNode, textMatchTransformers);
-    return;
-  }
-
-  let currentNode, remainderNode, leadingNode;
-
-  // If matching full content there's no need to run splitText and can reuse existing textNode
-  // to update its content and apply format. E.g. for **_Hello_** string after applying bold
-  // format (**) it will reuse the same text node to apply italic (_)
-  if (match[0] === textContent) {
-    currentNode = textNode;
-  } else {
-    const startIndex = match.index || 0;
-    const endIndex = startIndex + match[0].length;
-
-    if (startIndex === 0) {
-      [currentNode, remainderNode] = textNode.splitText(endIndex);
-    } else {
-      [leadingNode, currentNode, remainderNode] = textNode.splitText(
-        startIndex,
-        endIndex,
-      );
-    }
-  }
-
-  currentNode.setTextContent(match[2]);
-  const transformer = textFormatTransformersIndex.transformersByTag[match[1]];
-
-  if (transformer) {
-    for (const format of transformer.format) {
-      if (!currentNode.hasFormat(format)) {
-        currentNode.toggleFormat(format);
-      }
-    }
-  }
-
-  // Recursively run over inner text if it's not inline code
-  if (!currentNode.hasFormat('code')) {
-    importTextFormatTransformers(
-      currentNode,
-      textFormatTransformersIndex,
-      textMatchTransformers,
-    );
-  }
-
-  // Run over leading/remaining text if any
-  if (leadingNode) {
-    importTextFormatTransformers(
-      leadingNode,
-      textFormatTransformersIndex,
-      textMatchTransformers,
-    );
-  }
-
-  if (remainderNode) {
-    importTextFormatTransformers(
-      remainderNode,
-      textFormatTransformersIndex,
-      textMatchTransformers,
-    );
-  }
-}
-
-function importTextMatchTransformers(
-  textNode_: TextNode,
-  textMatchTransformers: Array<TextMatchTransformer>,
-) {
-  let textNode = textNode_;
-
-  mainLoop: while (textNode) {
-    for (const transformer of textMatchTransformers) {
-      if (!transformer.replace || !transformer.importRegExp) {
-        continue;
-      }
-      const match = textNode.getTextContent().match(transformer.importRegExp);
-
-      if (!match) {
-        continue;
-      }
-
-      const startIndex = match.index || 0;
-      const endIndex = transformer.getEndIndex
-        ? transformer.getEndIndex(textNode, match)
-        : startIndex + match[0].length;
-
-      if (endIndex === false) {
-        continue;
-      }
-
-      let replaceNode, newTextNode;
-
-      if (startIndex === 0) {
-        [replaceNode, textNode] = textNode.splitText(endIndex);
-      } else {
-        [, replaceNode, newTextNode] = textNode.splitText(startIndex, endIndex);
-      }
-
-      if (newTextNode) {
-        importTextMatchTransformers(newTextNode, textMatchTransformers);
-      }
-      transformer.replace(replaceNode, match);
-      continue mainLoop;
-    }
-
-    break;
-  }
-}
-
-// Finds first "<tag>content<tag>" match that is not nested into another tag
-function findOutermostMatch(
-  textContent: string,
-  textTransformersIndex: TextFormatTransformersIndex,
-): RegExpMatchArray | null {
-  const openTagsMatch = textContent.match(textTransformersIndex.openTagsRegExp);
-
-  if (openTagsMatch == null) {
-    return null;
-  }
-
-  for (const match of openTagsMatch) {
-    // Open tags reg exp might capture leading space so removing it
-    // before using match to find transformer
-    const tag = match.replace(/^\s/, '');
-    const fullMatchRegExp = textTransformersIndex.fullMatchRegExpByTag[tag];
-    if (fullMatchRegExp == null) {
-      continue;
-    }
-
-    const fullMatch = textContent.match(fullMatchRegExp);
-    const transformer = textTransformersIndex.transformersByTag[tag];
-    if (fullMatch != null && transformer != null) {
-      if (transformer.intraword !== false) {
-        return fullMatch;
-      }
-
-      // For non-intraword transformers checking if it's within a word
-      // or surrounded with space/punctuation/newline
-      const {index = 0} = fullMatch;
-      const beforeChar = textContent[index - 1];
-      const afterChar = textContent[index + fullMatch[0].length];
-
-      if (
-        (!beforeChar || PUNCTUATION_OR_SPACE.test(beforeChar)) &&
-        (!afterChar || PUNCTUATION_OR_SPACE.test(afterChar))
-      ) {
-        return fullMatch;
-      }
-    }
-  }
-
-  return null;
-}
-
 function createTextFormatTransformersIndex(
   textTransformers: Array<TextFormatTransformer>,
 ): TextFormatTransformersIndex {