refactor: Remove gene order HTML template from code

beiko-lab · Oct 15, 2023 · b5fdfd7 · b5fdfd7
1 parent 8a3a40f
commit b5fdfd7
Show file tree

Hide file tree

Showing 7 changed files with 145 additions and 73 deletions.
diff --git a/bin/clustering.py b/bin/clustering.py
@@ -697,9 +697,7 @@ def cluster_neighborhoods(
         surrogates_json_data = clean_json_data(surrogates_json_data)
         with open(output_path + "/JSON/" + gene + "_surrogates.json", "w+") as outfile:
             json.dump(surrogates_json_data, outfile)
-        write_clustermap_JSON_HTML(
-            gene, output_path + "/index.html", output_path, rep_type="surrogates"
-        )
+        write_clustermap_JSON_HTML(gene, output_path, rep_type="surrogates")
 
     # Get neighborhoods dict for calculating similarity matrices (needed to compare contig ends)
     neighborhoods = get_neighborhoods_dict(fasta_path)

diff --git a/bin/extraction.py b/bin/extraction.py
@@ -69,13 +69,6 @@ def parse_args(args=None):
         "extracted neighborhood FASTA files will"
         " be saved.",
     )
-    parser.add_argument(
-        "-w",
-        dest="HTML_TEMPLATE",
-        metavar="html_template",
-        type=str,
-        help="Path to HTML template.",
-    )
     parser.add_argument(
         "-n",
         metavar="n",
@@ -887,7 +880,6 @@ def extract_neighborhoods(
     extract_path,
     gbk_path,
     output_path,
-    html_template,
     num_neighbors,
     cutoff_percent,
     label_cols=None,
@@ -1038,7 +1030,7 @@ def extract_neighborhoods(
             # Create JSON file
             write_neighborhood_JSON(neighborhood_JSON_dict, gene, output_path)
 
-        make_gene_HTML(neighborhoods.keys(), html_template, output_path)
+        make_gene_HTML(neighborhoods.keys(), output_path)
 
         with open(output_path + "/" + "neighborhood_indices.json", "w+") as outfile:
             outfile.write(json.dumps(neighborhood_indices, indent=4, sort_keys=True))
@@ -1116,7 +1108,7 @@ def extract_neighborhoods(
             # Create JSON file
             write_neighborhood_JSON(neighborhood_JSON_dict, gene, output_path)
 
-        make_gene_HTML(neighborhoods.keys(), html_template, output_path)
+        make_gene_HTML(neighborhoods.keys(), output_path)
 
         with open(output_path + "/" + "neighborhood_indices.txt", "w+") as outfile:
             outfile.write(str(neighborhood_indices))
@@ -1137,7 +1129,6 @@ def main(args=None):
         args.EXTRACT_PATH,
         args.GBK_PATH,
         args.OUTPUT_PATH,
-        args.HTML_TEMPLATE,
         args.n,
         args.p,
         args.c,

diff --git a/bin/json_utils.py b/bin/json_utils.py
@@ -511,14 +511,34 @@ def write_neighborhood_JSON(
         outfile.write("}\n")
 
 
-def write_clustermap_JSON_HTML(gene, sample_data_path, out_path, rep_type="standard"):
+def get_JSON_specific_configs(json_filename, json_path):
+    """
+    Determines values for json filepath and clustermap chart height to write to JSON HTML file.
+    """
+    json_file_path = f'\t\td3.json("{json_filename}")\n'
+
+    with open(json_path, "r") as infile:
+        if len(infile.readlines()) != 0:
+            infile.seek(0)
+            json_data = json.load(infile)
+
+    num_clusters = len(json_data["clusters"])
+
+    # Calculate optimal canvas height as proportional to number of genomes
+    height = int(64.67 * num_clusters + 100)
+    height_px = str(height) + "px"
+    if height < 900:
+        height_px = "100vh"
+
+    return json_file_path, height_px
+
+
+def write_clustermap_JSON_HTML(gene, out_path, rep_type="standard"):
     """
     Generates accompanying HTML file for clustermap compatible JSON representation of neighborhood.
     Creates standalone HTML file for each respective type of neighborhood representation (e.g. standard,
     surrogates, or with representative UPGMA cluster) in case user wants to load individual visualizations.
     """
-    json_filename = ""
-    html_filename = ""
 
     if rep_type == "upgma":
         json_filename = f"{gene}_upgma.json"
@@ -532,54 +552,123 @@ def write_clustermap_JSON_HTML(gene, sample_data_path, out_path, rep_type="stand
 
     json_path = f"{out_path}/JSON/{json_filename}"
     file_path = f"{out_path}/JSON/{html_filename}"
-    second_line = f'\t\td3.json("{json_filename}")\n'
-
-    # Make HTML index file with appropriate JSON
-    with open(file_path, "w") as html_outfile, open(sample_data_path) as template:
-        for line in template:
-            if "height: 100vh;" in line:
-                # Determine number of genomes present
-                with open(json_path, "r") as infile:
-                    if len(infile.readlines()) != 0:
-                        infile.seek(0)
-                        json_data = json.load(infile)
-                num_clusters = len(json_data["clusters"])
-
-                # Calculate optimal canvas height as proportional to number of genomes
-                height = int(64.67 * num_clusters + 100)
-                height_px = str(height) + "px"
-                if height < 900:
-                    height_px = "100vh"
-                html_outfile.write("\t\t\t\theight: {};\n".format(height_px))
-            else:
-                html_outfile.write(line)
-
-        html_outfile.write("\n")
-        html_outfile.write(second_line)
-        html_outfile.write("\t\t\t.then(data => {\n")
-        html_outfile.write('\t\t\t\tdiv.selectAll("div")\n')
-        html_outfile.write("\t\t\t\t\t.data([data])\n")
-        html_outfile.write('\t\t\t\t\t.join("div")\n')
-        html_outfile.write("\t\t\t\t\t.call(chart)\n\n")
-        html_outfile.write('\t\t\t\tlet svg = div.select("svg")\n')
-        html_outfile.write('\t\t\t\td3.select("#btn-save-svg")\n')
-        html_outfile.write('\t\t\t\t\t.on("click", () => {\n')
-        html_outfile.write("\t\t\t\t\t\tconst blob = serialise(svg)\n")
-        html_outfile.write('\t\t\t\t\t\tdownload(blob, "clinker.svg")\n')
-        html_outfile.write("\t\t\t\t\t})\n")
-        html_outfile.write("\t\t\t})\n")
-        html_outfile.write("\t</script>\n")
-        html_outfile.write("</html>")
-
-
-def make_gene_HTML(genes_list, sample_data_path, out_path):
+
+    json_file_path, height_px = get_JSON_specific_configs(json_filename, json_path)
+
+    # Write HTML contents to file to represent clustermap chart for the gene
+    html_content = """\
+    <!DOCTYPE html>
+    <html>
+        <head>
+            <meta charset="utf-8">
+            <title>cmap</title>
+            <script src="../dist/d3.min.js"></script>
+            <style>
+                body {{ margin: 0; padding: 0; }}
+                div {{
+                    width: 100vw;
+                    height: {height_value};
+                    margin: 0;
+                    padding: 0;
+                }}
+            </style>
+        </head>
+        <body>
+            <main>
+                <button id="btn-save-svg">Save</button>
+                <div id="plot"></div>
+            </main>
+        </body>
+        <script type="module">
+            import clusterMap from "../src/clusterMap.js"
+            function serialise(svg) {{
+                /* Saves the figure to SVG in its current state.
+                 * Clones the provided SVG and sets the width/height of the clone to the
+                 * bounding box of the original SVG. Thus, downloaded figures will be sized
+                 * correctly.
+                 * This function returns a new Blob, which can then be downloaded.
+                */
+                let node = svg.node();
+                const xmlns = "http://www.w3.org/2000/xmlns/";
+                const xlinkns = "http://www.w3.org/1999/xlink";
+                const svgns = "http://www.w3.org/2000/node";
+                const bbox = svg.select("g").node().getBBox()
+                node = node.cloneNode(true);
+                node.setAttribute("width", bbox.width);
+                node.setAttribute("height", bbox.height);
+                node.setAttributeNS(xmlns, "xmlns", svgns);
+                node.setAttributeNS(xmlns, "xmlns:xlink", xlinkns);
+                // Adjust x/y of <g> to account for axis/title position.
+                // Replaces the transform attribute, so drag/zoom is ignored.
+                d3.select(node)
+                    .select("g")
+                    .attr("transform", `translate({{Math.abs(bbox.x)}}, {{Math.abs(bbox.y)}})`)
+                const serializer = new window.XMLSerializer;
+                const string = serializer.serializeToString(node);
+                return new Blob([string], {{type: "image/node+xml"}});
+            }}
+            function download(blob, filename) {{
+                /* Downloads a given blob to filename.
+                 * This function appends a new anchor to the document, which points to the
+                 * supplied blob. The anchor.click() method is called to trigger the download,
+                 * then the anchor is removed.
+                */
+                const link = document.createElement("a");
+                link.href = URL.createObjectURL(blob);
+                link.download = filename;
+                document.body.appendChild(link);
+                link.click();
+                document.body.removeChild(link);
+            }}
+            const div = d3.select("#plot")
+                .attr("width", "2400vw")
+                .attr("height", "{height_value}")
+            const chart = clusterMap()
+                .config({{
+                    cluster: {{
+                        alignLabels: true
+                    }},
+                    gene: {{
+                        label: {{
+                            show: false,
+                        }}
+                    }},
+                    link: {{
+                        threshold: 0.3,
+                        bestOnly: true,
+                    }}
+                }})
+            d3.json("{path_to_json}")
+                    .then(data => {{
+                        div.selectAll("div")
+                            .data([data])
+                            .join("div")
+                            .call(chart)
+                        let svg = div.select("svg")
+                        d3.select("#btn-save-svg")
+                            .on("click", () => {{
+                                const blob = serialise(svg)
+                                download(blob, "clinker.svg")
+                            }})
+                    }})
+        </script>
+    </html>
+    """
+
+    with open(file_path, "w") as html_outfile:
+        html_outfile.write(
+            html_content.format(height_value=height_px, path_to_json=json_file_path)
+        )
+
+
+def make_gene_HTML(genes_list, out_path):
     """
     For each AMR gene for which a JSON file was created, generates an accompanying HTML file for rendering its gene
     order visualization using clustermap with. This is done for each gene individually.
     """
     for gene in genes_list:
         # Make HTML index file with appropriate JSON
-        write_clustermap_JSON_HTML(gene, sample_data_path, out_path)
+        write_clustermap_JSON_HTML(gene, out_path)
 
 
 def get_cluster_data_genes_uid_list(json_cluster_data, genome_ids):
@@ -1044,6 +1133,4 @@ def make_representative_UPGMA_cluster_JSON(
         json.dump(final_json_data, outfile)
 
     # Make respective HTML file for Coeus
-    write_clustermap_JSON_HTML(
-        gene, output_path + "/index.html", output_path, rep_type="upgma"
-    )
+    write_clustermap_JSON_HTML(gene, output_path, rep_type="upgma")
diff --git a/modules/local/gene_order/extraction.nf b/modules/local/gene_order/extraction.nf
@@ -11,7 +11,6 @@ process EXTRACTION {
       path input_file_path
       path extract_path
       path gbk_path
-      path html_template
       val num_neighbors
       val percent_cutoff
       val label_cols
@@ -30,7 +29,6 @@ process EXTRACTION {
         -x $extract_path \\
         -g $gbk_path \\
         -o . \\
-        -w $html_template \\
         -n $num_neighbors \\
         -p $percent_cutoff \\
         -c $label_cols

diff --git a/nextflow.config b/nextflow.config
@@ -49,9 +49,8 @@ params {
     // Gene Order
     run_gene_order             = false
     input_file_path            = "$projectDir/test/gene-order/rgi_input.txt"
-    gene_order_html_template   = "$projectDir/test/gene-order/index.html"
     gene_order_percent_cutoff  = 0.25
-    gene_order_label_cols                 = null
+    gene_order_label_cols      = null
     num_neighbors              = 10
 
     // Optional clustering module hyperparameter options

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -217,7 +217,7 @@
                 },
                 "accessory_similarity": {
                     "type": "number",
-                    "default": 99.0,
+                    "default": 99,
                     "fa_icon": "far fa-clone",
                     "description": "Similarity threshold for accessory genes"
                 }
@@ -233,11 +233,6 @@
                     "type": "boolean",
                     "description": "Whether to run the Gene Order subworkflow"
                 },
-                "gene_order_html_template": {
-                    "type": "string",
-                    "default": "/home/jvfe/dev/dalhousie/arete/test/gene-order/index.html",
-                    "hidden": true
-                },
                 "input_file_path": {
                     "type": "string",
                     "default": "/home/jvfe/dev/dalhousie/arete/test/gene-order/rgi_input.txt",
@@ -248,6 +243,11 @@
                     "default": 0.25,
                     "description": "Cutoff percentage of genomes a gene should be present within to be included in extraction and subsequent analysis. Should a float between 0 and 1 (e.g., 0.25 means only genes present in a minimum of 25% of genomes are kept)."
                 },
+                "gene_order_label_cols": {
+                    "type": "string",
+                    "default": "None",
+                    "description": "If using annotation files predicting features, list of space separated column names to be added to the gene names"
+                },
                 "num_neighbors": {
                     "type": "integer",
                     "default": 10,

diff --git a/subworkflows/local/gene_order.nf b/subworkflows/local/gene_order.nf
@@ -30,7 +30,6 @@ workflow GENE_ORDER {
         file(params.input_file_path),
         rgiFiles,
         gbkFiles,
-        file(params.gene_order_html_template),
         num_neighbors,
         percent_cutoff,
         label_cols