From 21e127a2b915485cbf7a59fa4c5ba9313294749d Mon Sep 17 00:00:00 2001 From: Josiah Seaman Date: Thu, 15 Nov 2018 14:52:58 +0000 Subject: [PATCH] #62 Breaking fasta file into chunks files and retrieving all sequences. Updating to DNASkittleUtils 1.0.11 --- DDV/TileLayout.py | 18 +- DDV/html_template/DDV_nucleotideNumber.js | 405 ---------------------- DDV/html_template/nucleotideNumber.js | 72 ++-- Requirements.txt | 2 +- 4 files changed, 56 insertions(+), 441 deletions(-) delete mode 100644 DDV/html_template/DDV_nucleotideNumber.js diff --git a/DDV/TileLayout.py b/DDV/TileLayout.py index 1d424c0..f96a63e 100644 --- a/DDV/TileLayout.py +++ b/DDV/TileLayout.py @@ -51,6 +51,7 @@ def level_layout_factory(modulos, padding=None): return levels + class TileLayout(object): def __init__(self, use_fat_headers=False, use_titles=True, sort_contigs=False, @@ -245,8 +246,10 @@ def draw_nucleotides(self): def output_fasta(self, output_folder, fasta, no_webpage, extract_contigs, sort_contigs): - fasta_destination = os.path.join(output_folder, os.path.basename(fasta)) + bare_file = os.path.basename(fasta) + fasta_destination = os.path.join(output_folder, bare_file) if extract_contigs or sort_contigs: + write_contigs_to_chunks_dir(output_folder, bare_file, self.contigs) length_sum = sum([len(c.seq) for c in self.contigs]) fasta_destination = '%s__%ibp.fa' % (os.path.splitext(fasta_destination)[0], length_sum) write_contigs_to_file(fasta_destination, self.contigs) # shortened fasta @@ -257,7 +260,7 @@ def output_fasta(self, output_folder, fasta, no_webpage, extract_contigs, sort_c except shutil.SameFileError: pass # not a problem - self.fasta_source.append(os.path.basename(fasta_destination)) + self.fasta_source.append(bare_file) print("Sequence saved in:", fasta_destination) return fasta_destination @@ -587,3 +590,14 @@ def get_packed_coordinates(self): def additional_html_content(self, html_content): return {} # override in children + +def write_contigs_to_chunks_dir(project_dir, fasta_name, contigs): + chunks_dir = os.path.join(project_dir, 'chunks', fasta_name) + try: + os.makedirs(chunks_dir, exist_ok=True) + except BaseException: + pass + for i, contig in enumerate(contigs): + filename = os.path.join(chunks_dir, '%i.fa' % i) + write_contigs_to_file(filename, [contig],verbose=False) + diff --git a/DDV/html_template/DDV_nucleotideNumber.js b/DDV/html_template/DDV_nucleotideNumber.js deleted file mode 100644 index abc4c55..0000000 --- a/DDV/html_template/DDV_nucleotideNumber.js +++ /dev/null @@ -1,405 +0,0 @@ -var PRECISION = 10; // number of decimal places -var viewer = null; -var pointerStatus = "-"; -var ColumnNumber = 0; -var ColumnRemainder = "-"; -var PositionInColumn = "-"; -var ColumnWidthNoPadding= iLineLength * pixelSize; -var iNucleotidesPerColumn = iLineLength * originalImageHeight / pixelSize; -var ColumnWidth = ColumnPadding + ColumnWidthNoPadding; -var originalAspectRatio = originalImageHeight/originalImageWidth; -var Nucleotide = "-"; -var NucleotideY = "-"; -var nucNumX = 0; -var nucNumY = 0; - -var mySequence; -var theSequenceSplit=""; -var theSequence=""; -var fragmentid=""; -var sequence_data_loaded=0; -var sequence_data_viewer_initialized=0; - -function init() { - viewer = OpenSeadragon({ - id: "container", - prefixUrl: "img/", - showNavigator: true, - tileSources: ["GeneratedImages/dzc_output.xml" ], - maxZoomPixelRatio: 6 - }); - viewer.scalebar({ - type: OpenSeadragon.ScalebarType.MAP, - pixelsPerMeter:2, - minWidth: "70px", - location: OpenSeadragon.ScalebarLocation.BOTTOM_LEFT, - xOffset: 5, - yOffset: 10, - stayInsideImage: false, - color: "rgb(30, 30, 30)", - fontColor: "rgb(10, 10, 10)", - backgroundColor: "rgba(255, 255, 255, 0.5)", - fontSize: "normal", - barThickness: 1, - sizeAndTextRenderer: OpenSeadragon.ScalebarSizeAndTextRenderer.BASEPAIR_LENGTH - }); - - OpenSeadragon.addEvent(viewer.element, "mousemove", showNucleotideNumber); - - //copy content of pointed at sequence fragment to result log - $('body').keyup(function (event) { - if (theSequence){ - if (event.keyCode == 88) { - $("#outfile").prepend("
"+fragmentid+"
"+theSequence+"
"); - } - } - }); - - $('#SequenceFragmentInstruction').hide(); - - - -} - -function showNucleotideNumber(event) { - - - // getMousePosition() returns position relative to page, - // while we want the position relative to the viewer - // element. so subtract the difference. - var pixel = OpenSeadragon.getMousePosition(event).minus - (OpenSeadragon.getElementPosition(viewer.element)); - - document.getElementById("mousePixels").innerHTML - = toString(pixel, true); - - if (!viewer.isOpen()) { - return; - } - - var point = viewer.viewport.pointFromPixel(pixel); - - document.getElementById("mousePoints").innerHTML - = toString(point, true); - - document.getElementById("nucleotideNumberX").innerHTML - = point.x; - document.getElementById("nucleotideNumberY").innerHTML - = point.y; - - - if ((point.x < 0) || (point.x > 1)) { - nucNumX="-"; - Nucleotide = "-"; - pointerStatus = "Outside of Image (X)"; - - } - else { - nucNumX=(point.x * originalImageWidth).toFixed(0); - } - - if ((point.y < 0) || (point.y > originalAspectRatio)){ - nucNumY="-"; - Nucleotide = "-"; - pointerStatus = "Outside of Image (Y)"; - } - else { - nucNumY=(point.y * originalImageWidth).toFixed(0); - } - - if ((nucNumX != "-")&&(nucNumY != "-")){ - ColumnNumber = Math.floor(nucNumX/ColumnWidth); - ColumnRemainder = nucNumX % ColumnWidth; - - PositionInColumn = Math.floor(ColumnRemainder / pixelSize) + 1; - NucleotideY = iLineLength * Math.floor(nucNumY/pixelSize); - - if ((ColumnRemainder <= ColumnWidth) && (ColumnRemainder >= ColumnWidthNoPadding )){ - ColumnNumber = "-"; - Nucleotide="-"; - PositionInColumn="-"; - pointerStatus = "Outside of Image (Inbetween Columns)"; - } - else { - Nucleotide = iNucleotidesPerColumn * ColumnNumber + NucleotideY + PositionInColumn; - if (Nucleotide > ipTotal) { - //End of Sequence - Nucleotide = "-"; - } - - } - } - - document.getElementById("Nucleotide").innerHTML = Nucleotide; - - //show sequence fragment - if (sequence_data_viewer_initialized){ - var lineNumber="-"; - if ($.isNumeric(Nucleotide)){ - lineNumber=Math.floor (Nucleotide /iLineLength); - remainder=Nucleotide % iLineLength; - if (lineNumber>0){ - theSequence=theSequenceSplit[lineNumber-1]+theSequenceSplit[lineNumber]+theSequenceSplit[lineNumber+1]; - tempTo=((lineNumber+2)*iLineLength); - if (ipTotal < tempTo){tempTo=ipTotal;} - fragmentid= "Sequence fragment at ["+Nucleotide+"], showing: ("+((lineNumber-1)*iLineLength+1)+" - "+tempTo+")"; - mySequence.setSequence(theSequence,fragmentid); - mySequence.setSelection(remainder+iLineLength, remainder+iLineLength); - } - else{ - theSequence=theSequenceSplit[lineNumber]+theSequenceSplit[lineNumber+1]; - fragmentid= "Sequence fragment at ["+Nucleotide+"], showing: "+((lineNumber)*iLineLength+1)+" - "+((lineNumber+2)*iLineLength)+")"; - mySequence.setSequence(theSequence,fragmentid); - mySequence.setSelection(remainder, remainder); - } - - $('#SequenceFragmentInstruction').show(); - - } - else{ - mySequence.clearSequence(""); - theSequence=""; - fragmentid=""; - $('#SequenceFragmentInstruction').hide(); - } - } - -} - -function toString(point, useParens) { - var x = point.x; - var y = point.y; - - if (x % 1 || y % 1) { // if not an integer, - x = x.toFixed(PRECISION); // then restrict number of - y = y.toFixed(PRECISION); // decimal places - } - - if (useParens) { - return "(" + x + ", " + y + ")"; - } else { - return x + " x " + y; - } -} - -function addLoadEvent(func) { - - var oldonload = window.onload; - if (typeof window.onload != 'function') { - window.onload = func; - } - else { - window.onload = function () { - if (oldonload) { - oldonload(); - } - func(); - } - - } - -} - - -function getSequence() { - - - $.ajax({xhr: function() - { - var xhr = new window.XMLHttpRequest(); - //Download progress - xhr.addEventListener("load", function (evt) { - $("#status").html("Sequence data loaded. Display of sequence fragments activated."); - $("#btnCallGCSkew").click(function (event) { - GenerateGCSkewChart(); - }); - $("#status").append("
Generate GC Skew activated."); - sequence_data_loaded = 1; - }, false); - xhr.addEventListener("progress", function (evt) { - if (evt.lengthComputable) { - var percentComplete = (evt.loaded / evt.total) * 100; - //Do something with download progress - if (percentComplete < 100) { - $("#status").html(" Loading sequence data: " + parseFloat(percentComplete).toFixed(2) + "% complete"); - } - } - else { - $("#status").html("Loading sequence data ... [ " + parseFloat(evt.loaded / 1048576).toFixed(2) + " MB loaded ]"); - } - }, false); - return xhr; - }, - type: "GET", - url: direct_data_file, - contentType: "text/html", - success: initSequence, - error: processInitSequenceError - }); -} - -function initSequence (theSequence) { - theSequenceSplit=theSequence.split("\n"); - theSequenceSplit.splice(0,1); - mySequence = new Biojs.Sequence({ - sequence : "", - target : "SequenceFragmentFASTA", - format : 'FASTA', - columns : {size:70,spacedEach:0} , - formatSelectorVisible: false, - fontSize: '11px', - }); - sequence_data_viewer_initialized=1; - mySequence.clearSequence(""); - $('#SequenceFragmentInstruction').hide(); - -} - -function processInitSequenceError() { - //do nothing -}; - -addLoadEvent(init); -addLoadEvent(getSequence); - -function outputTable (){ - outputTable1 = '
Nucleotide Number-
press "x" key using keyboard to copy this fragment to Result Log
'; - outputTable21 = ''; - outputTable22 = ''; - outputTable23 = ''; - outputTable24 = ''; - outputTable25 = ''; - outputTable26 = ''; - document.write(outputTable1); - document.write(outputTable21); - document.write(outputTable22); - document.write(outputTable23); - document.write(outputTable24); - document.write(outputTable25); - document.write(outputTable26); -} - -function GenerateGCSkewChart() { - - $("#status").html("Generating GC Skew Plot..."); - - $.getScript("../../d3.v3.js", function(){ - - - sbegin=$("#sbegin").val(); - send=$("#send").val(); - length = send - sbegin; - -//10 000 000 > 10 000 - //10 000 000 > 1 000 - //1 000 000 > 1 00 - //100 000 > 50 - -//set the default gc_skew_window to 10000 - var gc_skew_window = 10000; - if (length < 100000 ) {gc_skew_window = 50;} - else if (length < 1000000) {gc_skew_window = 100;} - else if (length < 10000000) {gc_skew_window = 1000;} - - var step_G=0; - var step_C=0; - var step_GC_skew=0; - - $("#outfile").prepend("
GC Skew chart [bp "+sbegin+" to "+send+" ]. GC skew window = "+gc_skew_window+"
"); - - - var lineData = jQuery.map( theSequenceSplit, function( item, index ) { - - step_G += (item.match(/G/g) || []).length; - step_C += (item.match(/C/g) || []).length; - - if (((index*iLineLength) > sbegin) && ((index*iLineLength) < send) && ((index*iLineLength) % gc_skew_window == 0)){ - - if ((step_G + step_C)==0){step_GC_skew=0;} - else {step_GC_skew = (step_G - step_C)/(step_G + step_C);} - step_G=0; - step_C=0; - return ({'x':(index*iLineLength),'y':step_GC_skew}); - } - else { - return null; - } - }); - - var vis = d3.select("#gcSkewChart"), - WIDTH = 800, - HEIGHT = 300, - MARGINS = { - top: 20, - right: 20, - bottom: 20, - left: 50 - }, - xRange = d3.scale.linear().range([MARGINS.left, WIDTH - MARGINS.right]).domain([d3.min(lineData, function (d) { - return d.x; - }), - d3.max(lineData, function (d) { - return d.x; - }) - ]), - - yRange = d3.scale.linear().range([HEIGHT - MARGINS.top, MARGINS.bottom]).domain([d3.min(lineData, function (d) { - return d.y; - }), - d3.max(lineData, function (d) { - return d.y; - }) - ]), - - xAxis = d3.svg.axis() - .scale(xRange) - .tickSize(5) - .tickSubdivide(true), - - yAxis = d3.svg.axis() - .scale(yRange) - .tickSize(5) - .orient("left") - .tickSubdivide(true); - - - vis.append("svg:g") - .attr("class", "x axis") - .attr("transform", "translate(0," + (HEIGHT - MARGINS.bottom) + ")") - .call(xAxis) - .append("text") - .attr("x", 116) - .attr("y", 40) - .style("text-anchor", "start") - .style("font-size","12px") - .text("Position in sequence "); - - vis.append("svg:g") - .attr("class", "y axis") - .attr("transform", "translate(" + (MARGINS.left) + ",0)") - .call(yAxis) - .append("text") - .attr("transform", "rotate(-90)") - .attr("y", 10) - .style("text-anchor", "end") - .style("font-size","12px") - .text("GC-Skew "); - - var lineFunc = d3.svg.line() - .x(function (d) { - return xRange(d.x); - }) - .y(function (d) { - return yRange(d.y); - }) - .interpolate('linear'); - - vis.append("svg:path") - .attr("d", lineFunc(lineData)) - .attr("stroke", "blue") - .attr("stroke-width", 2) - .attr("fill", "none"); - - $("#status").html("GC Skew Plot added to results."); - }); -} \ No newline at end of file diff --git a/DDV/html_template/nucleotideNumber.js b/DDV/html_template/nucleotideNumber.js index 8619781..b8c8d25 100644 --- a/DDV/html_template/nucleotideNumber.js +++ b/DDV/html_template/nucleotideNumber.js @@ -243,37 +243,44 @@ function addLoadEvent(func) { } } -function getSequence() { - - - $.ajax({xhr: function() - { - var xhr = new window.XMLHttpRequest(); - //Download progress - xhr.addEventListener("load", function (evt) { - $("#status").html("Sequence data loaded. Display of sequence fragments activated."); - // $("#btnCallGCSkew").click(function (event) { - // GenerateGCSkewChart(); - // }); - // $("#status").append("
Generate GC Skew activated."); - sequence_data_loaded = 1; - }, false); - xhr.addEventListener("progress", function (evt) { - if (evt.lengthComputable) { - var percentComplete = (evt.loaded / evt.total) * 100; - //Do something with download progress - if (percentComplete < 100) { - $("#status").html(" Loading sequence data: " + parseFloat(percentComplete).toFixed(2) + "% complete"); - } +function loading_function() +{ + var xhr = new window.XMLHttpRequest(); + //Download progress + xhr.addEventListener("load", function (evt) { + $("#status").html("Sequence data loaded. Display of sequence fragments activated."); + sequence_data_loaded = 1; + }, false); + xhr.addEventListener("progress", function (evt) { + if (evt.lengthComputable) { + var percentComplete = (evt.loaded / evt.total) * 100; + //Do something with download progress + if (percentComplete < 100) { + $("#status").html(" Loading sequence data: " + + parseFloat(percentComplete).toFixed(2) + "% complete"); } - else { - $("#status").html("Loading sequence data ... [ " + parseFloat(evt.loaded / 1048576).toFixed(2) + " MB loaded ]"); - } - }, false); - return xhr; - }, + } + else { + $("#status").html("Loading sequence data ... [ " + + parseFloat(evt.loaded / 1048576).toFixed(2) + " MB loaded ]"); + } + }, false); + return xhr; +} + +function get_all_sequences() { + var fasta_path = fasta_source[0]; + for(let [index, contig] of ContigSpacingJSON.entries()){ + getSequence(index); // dispatch one request for each contig + } +} + +function getSequence(contig_index) { + var fasta_path = "chunks/" + fasta_source[0] + "/" + contig_index + ".fa"; + + $.ajax({xhr: loading_function, type: "GET", - url: fasta_source[0], + url: fasta_path, contentType: "text/html", success: initSequence, error: processInitSequenceError @@ -283,7 +290,6 @@ function getSequence() { function read_contigs(sequence_received) { //read_contigs equiv in javascript theSequenceSplit = sequence_received.split(/^>|\n>/);// begin line, caret ">"); - var contigs = {} for (let contig_s of theSequenceSplit) { var lines = contig_s.split(/\r?\n/); var title = lines[0] @@ -293,7 +299,7 @@ function read_contigs(sequence_received) { return contigs } function initSequence (sequence_received) { - contigs = read_contigs(sequence_received); + read_contigs(sequence_received); // TODO: file specific contigs = visible_seq_obj = new Biojs.Sequence({ sequence : "", @@ -313,12 +319,12 @@ function processInitSequenceError() { }; addLoadEvent(init_all); -addLoadEvent(getSequence); +addLoadEvent(get_all_sequences); function outputTable() { document.write('
Nucleotide Number-
' + - '' + + '' + '
' + '
' + '
' + diff --git a/Requirements.txt b/Requirements.txt index 0892692..c218fb0 100644 --- a/Requirements.txt +++ b/Requirements.txt @@ -4,7 +4,7 @@ #wheel==0.24.0 #cx_Freeze==5.1.1 -git+https://github.com/josiahseaman/DNASkittleUtils.git@1.0.10 +git+https://github.com/josiahseaman/DNASkittleUtils.git@1.0.11 Pillow>=3.2.0 six==1.10.0 psutil==4.3.1