Skip to content

Commit

Permalink
refactor: Remove gene order HTML template from code
Browse files Browse the repository at this point in the history
  • Loading branch information
jvfe committed Oct 15, 2023
1 parent 8a3a40f commit b5fdfd7
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 73 deletions.
4 changes: 1 addition & 3 deletions bin/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,9 +697,7 @@ def cluster_neighborhoods(
surrogates_json_data = clean_json_data(surrogates_json_data)
with open(output_path + "/JSON/" + gene + "_surrogates.json", "w+") as outfile:
json.dump(surrogates_json_data, outfile)
write_clustermap_JSON_HTML(
gene, output_path + "/index.html", output_path, rep_type="surrogates"
)
write_clustermap_JSON_HTML(gene, output_path, rep_type="surrogates")

# Get neighborhoods dict for calculating similarity matrices (needed to compare contig ends)
neighborhoods = get_neighborhoods_dict(fasta_path)
Expand Down
13 changes: 2 additions & 11 deletions bin/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,6 @@ def parse_args(args=None):
"extracted neighborhood FASTA files will"
" be saved.",
)
parser.add_argument(
"-w",
dest="HTML_TEMPLATE",
metavar="html_template",
type=str,
help="Path to HTML template.",
)
parser.add_argument(
"-n",
metavar="n",
Expand Down Expand Up @@ -887,7 +880,6 @@ def extract_neighborhoods(
extract_path,
gbk_path,
output_path,
html_template,
num_neighbors,
cutoff_percent,
label_cols=None,
Expand Down Expand Up @@ -1038,7 +1030,7 @@ def extract_neighborhoods(
# Create JSON file
write_neighborhood_JSON(neighborhood_JSON_dict, gene, output_path)

make_gene_HTML(neighborhoods.keys(), html_template, output_path)
make_gene_HTML(neighborhoods.keys(), output_path)

with open(output_path + "/" + "neighborhood_indices.json", "w+") as outfile:
outfile.write(json.dumps(neighborhood_indices, indent=4, sort_keys=True))
Expand Down Expand Up @@ -1116,7 +1108,7 @@ def extract_neighborhoods(
# Create JSON file
write_neighborhood_JSON(neighborhood_JSON_dict, gene, output_path)

make_gene_HTML(neighborhoods.keys(), html_template, output_path)
make_gene_HTML(neighborhoods.keys(), output_path)

with open(output_path + "/" + "neighborhood_indices.txt", "w+") as outfile:
outfile.write(str(neighborhood_indices))
Expand All @@ -1137,7 +1129,6 @@ def main(args=None):
args.EXTRACT_PATH,
args.GBK_PATH,
args.OUTPUT_PATH,
args.HTML_TEMPLATE,
args.n,
args.p,
args.c,
Expand Down
183 changes: 135 additions & 48 deletions bin/json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,14 +511,34 @@ def write_neighborhood_JSON(
outfile.write("}\n")


def write_clustermap_JSON_HTML(gene, sample_data_path, out_path, rep_type="standard"):
def get_JSON_specific_configs(json_filename, json_path):
"""
Determines values for json filepath and clustermap chart height to write to JSON HTML file.
"""
json_file_path = f'\t\td3.json("{json_filename}")\n'

with open(json_path, "r") as infile:
if len(infile.readlines()) != 0:
infile.seek(0)
json_data = json.load(infile)

num_clusters = len(json_data["clusters"])

# Calculate optimal canvas height as proportional to number of genomes
height = int(64.67 * num_clusters + 100)
height_px = str(height) + "px"
if height < 900:
height_px = "100vh"

return json_file_path, height_px


def write_clustermap_JSON_HTML(gene, out_path, rep_type="standard"):
"""
Generates accompanying HTML file for clustermap compatible JSON representation of neighborhood.
Creates standalone HTML file for each respective type of neighborhood representation (e.g. standard,
surrogates, or with representative UPGMA cluster) in case user wants to load individual visualizations.
"""
json_filename = ""
html_filename = ""

if rep_type == "upgma":
json_filename = f"{gene}_upgma.json"
Expand All @@ -532,54 +552,123 @@ def write_clustermap_JSON_HTML(gene, sample_data_path, out_path, rep_type="stand

json_path = f"{out_path}/JSON/{json_filename}"
file_path = f"{out_path}/JSON/{html_filename}"
second_line = f'\t\td3.json("{json_filename}")\n'

# Make HTML index file with appropriate JSON
with open(file_path, "w") as html_outfile, open(sample_data_path) as template:
for line in template:
if "height: 100vh;" in line:
# Determine number of genomes present
with open(json_path, "r") as infile:
if len(infile.readlines()) != 0:
infile.seek(0)
json_data = json.load(infile)
num_clusters = len(json_data["clusters"])

# Calculate optimal canvas height as proportional to number of genomes
height = int(64.67 * num_clusters + 100)
height_px = str(height) + "px"
if height < 900:
height_px = "100vh"
html_outfile.write("\t\t\t\theight: {};\n".format(height_px))
else:
html_outfile.write(line)

html_outfile.write("\n")
html_outfile.write(second_line)
html_outfile.write("\t\t\t.then(data => {\n")
html_outfile.write('\t\t\t\tdiv.selectAll("div")\n')
html_outfile.write("\t\t\t\t\t.data([data])\n")
html_outfile.write('\t\t\t\t\t.join("div")\n')
html_outfile.write("\t\t\t\t\t.call(chart)\n\n")
html_outfile.write('\t\t\t\tlet svg = div.select("svg")\n')
html_outfile.write('\t\t\t\td3.select("#btn-save-svg")\n')
html_outfile.write('\t\t\t\t\t.on("click", () => {\n')
html_outfile.write("\t\t\t\t\t\tconst blob = serialise(svg)\n")
html_outfile.write('\t\t\t\t\t\tdownload(blob, "clinker.svg")\n')
html_outfile.write("\t\t\t\t\t})\n")
html_outfile.write("\t\t\t})\n")
html_outfile.write("\t</script>\n")
html_outfile.write("</html>")


def make_gene_HTML(genes_list, sample_data_path, out_path):

json_file_path, height_px = get_JSON_specific_configs(json_filename, json_path)

# Write HTML contents to file to represent clustermap chart for the gene
html_content = """\
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>cmap</title>
<script src="../dist/d3.min.js"></script>
<style>
body {{ margin: 0; padding: 0; }}
div {{
width: 100vw;
height: {height_value};
margin: 0;
padding: 0;
}}
</style>
</head>
<body>
<main>
<button id="btn-save-svg">Save</button>
<div id="plot"></div>
</main>
</body>
<script type="module">
import clusterMap from "../src/clusterMap.js"
function serialise(svg) {{
/* Saves the figure to SVG in its current state.
* Clones the provided SVG and sets the width/height of the clone to the
* bounding box of the original SVG. Thus, downloaded figures will be sized
* correctly.
* This function returns a new Blob, which can then be downloaded.
*/
let node = svg.node();
const xmlns = "http://www.w3.org/2000/xmlns/";
const xlinkns = "http://www.w3.org/1999/xlink";
const svgns = "http://www.w3.org/2000/node";
const bbox = svg.select("g").node().getBBox()
node = node.cloneNode(true);
node.setAttribute("width", bbox.width);
node.setAttribute("height", bbox.height);
node.setAttributeNS(xmlns, "xmlns", svgns);
node.setAttributeNS(xmlns, "xmlns:xlink", xlinkns);
// Adjust x/y of <g> to account for axis/title position.
// Replaces the transform attribute, so drag/zoom is ignored.
d3.select(node)
.select("g")
.attr("transform", `translate({{Math.abs(bbox.x)}}, {{Math.abs(bbox.y)}})`)
const serializer = new window.XMLSerializer;
const string = serializer.serializeToString(node);
return new Blob([string], {{type: "image/node+xml"}});
}}
function download(blob, filename) {{
/* Downloads a given blob to filename.
* This function appends a new anchor to the document, which points to the
* supplied blob. The anchor.click() method is called to trigger the download,
* then the anchor is removed.
*/
const link = document.createElement("a");
link.href = URL.createObjectURL(blob);
link.download = filename;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}}
const div = d3.select("#plot")
.attr("width", "2400vw")
.attr("height", "{height_value}")
const chart = clusterMap()
.config({{
cluster: {{
alignLabels: true
}},
gene: {{
label: {{
show: false,
}}
}},
link: {{
threshold: 0.3,
bestOnly: true,
}}
}})
d3.json("{path_to_json}")
.then(data => {{
div.selectAll("div")
.data([data])
.join("div")
.call(chart)
let svg = div.select("svg")
d3.select("#btn-save-svg")
.on("click", () => {{
const blob = serialise(svg)
download(blob, "clinker.svg")
}})
}})
</script>
</html>
"""

with open(file_path, "w") as html_outfile:
html_outfile.write(
html_content.format(height_value=height_px, path_to_json=json_file_path)
)


def make_gene_HTML(genes_list, out_path):
"""
For each AMR gene for which a JSON file was created, generates an accompanying HTML file for rendering its gene
order visualization using clustermap with. This is done for each gene individually.
"""
for gene in genes_list:
# Make HTML index file with appropriate JSON
write_clustermap_JSON_HTML(gene, sample_data_path, out_path)
write_clustermap_JSON_HTML(gene, out_path)


def get_cluster_data_genes_uid_list(json_cluster_data, genome_ids):
Expand Down Expand Up @@ -1044,6 +1133,4 @@ def make_representative_UPGMA_cluster_JSON(
json.dump(final_json_data, outfile)

# Make respective HTML file for Coeus
write_clustermap_JSON_HTML(
gene, output_path + "/index.html", output_path, rep_type="upgma"
)
write_clustermap_JSON_HTML(gene, output_path, rep_type="upgma")
2 changes: 0 additions & 2 deletions modules/local/gene_order/extraction.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ process EXTRACTION {
path input_file_path
path extract_path
path gbk_path
path html_template
val num_neighbors
val percent_cutoff
val label_cols
Expand All @@ -30,7 +29,6 @@ process EXTRACTION {
-x $extract_path \\
-g $gbk_path \\
-o . \\
-w $html_template \\
-n $num_neighbors \\
-p $percent_cutoff \\
-c $label_cols
Expand Down
3 changes: 1 addition & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ params {
// Gene Order
run_gene_order = false
input_file_path = "$projectDir/test/gene-order/rgi_input.txt"
gene_order_html_template = "$projectDir/test/gene-order/index.html"
gene_order_percent_cutoff = 0.25
gene_order_label_cols = null
gene_order_label_cols = null
num_neighbors = 10

// Optional clustering module hyperparameter options
Expand Down
12 changes: 6 additions & 6 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@
},
"accessory_similarity": {
"type": "number",
"default": 99.0,
"default": 99,
"fa_icon": "far fa-clone",
"description": "Similarity threshold for accessory genes"
}
Expand All @@ -233,11 +233,6 @@
"type": "boolean",
"description": "Whether to run the Gene Order subworkflow"
},
"gene_order_html_template": {
"type": "string",
"default": "/home/jvfe/dev/dalhousie/arete/test/gene-order/index.html",
"hidden": true
},
"input_file_path": {
"type": "string",
"default": "/home/jvfe/dev/dalhousie/arete/test/gene-order/rgi_input.txt",
Expand All @@ -248,6 +243,11 @@
"default": 0.25,
"description": "Cutoff percentage of genomes a gene should be present within to be included in extraction and subsequent analysis. Should a float between 0 and 1 (e.g., 0.25 means only genes present in a minimum of 25% of genomes are kept)."
},
"gene_order_label_cols": {
"type": "string",
"default": "None",
"description": "If using annotation files predicting features, list of space separated column names to be added to the gene names"
},
"num_neighbors": {
"type": "integer",
"default": 10,
Expand Down
1 change: 0 additions & 1 deletion subworkflows/local/gene_order.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ workflow GENE_ORDER {
file(params.input_file_path),
rgiFiles,
gbkFiles,
file(params.gene_order_html_template),
num_neighbors,
percent_cutoff,
label_cols
Expand Down

0 comments on commit b5fdfd7

Please sign in to comment.