diff --git a/bin/gto b/bin/gto index b59ffcd..0d33bfe 100755 Binary files a/bin/gto and b/bin/gto differ diff --git a/bin/gto_fasta_merge_streams b/bin/gto_fasta_merge_streams new file mode 100755 index 0000000..4facff1 Binary files /dev/null and b/bin/gto_fasta_merge_streams differ diff --git a/bin/gto_fasta_split_streams b/bin/gto_fasta_split_streams index 4151d19..dbfe18e 100755 Binary files a/bin/gto_fasta_split_streams and b/bin/gto_fasta_split_streams differ diff --git a/conda/build.sh b/conda/build.sh index 0f5682e..442338d 100755 --- a/conda/build.sh +++ b/conda/build.sh @@ -110,3 +110,4 @@ cp bin/gto_segment $PREFIX/bin/ cp bin/gto_sum $PREFIX/bin/ cp bin/gto_upper_bound $PREFIX/bin/ cp bin/gto_word_search $PREFIX/bin/ +cp bin/gto_fasta_split_streams $PREFIX/bin/ diff --git a/conda/meta.yaml b/conda/meta.yaml index 208b2af..36c08f4 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -5,10 +5,10 @@ package: name: gto - version: '1.5.3' + version: '1.5.4' source: - git_rev: v1.5.3 + git_rev: v1.5.4 git_url: https://github.com/cobilab/gto.git requirements: diff --git a/manual/manual.pdf b/manual/manual.pdf index 1d06826..727bd7d 100644 Binary files a/manual/manual.pdf and b/manual/manual.pdf differ diff --git a/manual/manual.tex b/manual/manual.tex index 0c04bb1..0bcbc40 100644 --- a/manual/manual.tex +++ b/manual/manual.tex @@ -84,7 +84,7 @@ $^3$Department of Information and Communications Technologies, University of A Coru\~na, A Coru\~na, Spain\\ $^4$Department of Virology, University of Helsinki, Helsinki, Finland\\ ~\\ -Version 1.5.1 +Version 1.5.4 } \date{} \maketitle diff --git a/manual/sections/FASTA_tools/FASTA_tools.tex b/manual/sections/FASTA_tools/FASTA_tools.tex index 84c0808..0036a26 100644 --- a/manual/sections/FASTA_tools/FASTA_tools.tex +++ b/manual/sections/FASTA_tools/FASTA_tools.tex @@ -39,6 +39,8 @@ \chapter{FASTA tools} \item \texttt{gto\char`_fasta\char`_split\char`_streams}: it splits and writes a FASTA file into three channels of information: headers, extra and DNA. +\item \texttt{gto\char`_fasta\char`_merge\char`_streams}: it merges the three channels of information (headers, extra and DNA) and writes it into a FASTA file. + \end{enumerate} @@ -56,4 +58,5 @@ \chapter{FASTA tools} \input{\FASTAToolsPath/FastaExtractPatternCoords.tex} \input{\FASTAToolsPath/FastaComplement.tex} \input{\FASTAToolsPath/FastaReverse.tex} -\input{\FASTAToolsPath/FastaSpitStreams.tex} \ No newline at end of file +\input{\FASTAToolsPath/FastaSplitStreams.tex} +\input{\FASTAToolsPath/FastaMergeStreams.tex} \ No newline at end of file diff --git a/manual/sections/FASTA_tools/FastaMergeStreams.tex b/manual/sections/FASTA_tools/FastaMergeStreams.tex new file mode 100644 index 0000000..815cdab --- /dev/null +++ b/manual/sections/FASTA_tools/FastaMergeStreams.tex @@ -0,0 +1,39 @@ +\section{Program gto\char`_fasta\char`_merge\char`_streams} +The \texttt{gto\char`_fasta\char`_merge\char`_streams} merges the three channels of information (headers, extra and DNA) and writes it into a FASTA file. \\ +For help type: +\begin{lstlisting} +./gto_fasta_merge_streams -h +\end{lstlisting} +In the following subsections, we explain the input and output paramters. + +\subsection*{Input parameters} + +The \texttt{gto\char`_fasta\char`_merge\char`_streams} program needs the three files resulting from the execution of the \texttt{gto\char`_fasta\char`_split\char`_streams} tool, and the output standard stream for computation. The output stream is a FASTA or Multi-FASTA file.\\ +The attribution is given according to: +\begin{lstlisting} +Usage: ./gto_fasta_merge_streams [options] [[--] args] + or: ./gto_fasta_merge_streams [options] + +It merges the three channels of information (headers, extra and DNA) and writes it into a FASTA file. + + -h, --help Show this help message and exit + +Basic options + -e, --extra= Output file for the extra information + -d, --dna= Output file for the DNA information + -H, --headers= Output file for the headers information + > output Output FASTA file format (stdout) + +Example: ./gto_fasta_merge_streams -e -d -H > output.fasta +\end{lstlisting} + +\subsection*{Output} + +The output of the \texttt{gto\char`_fasta\char`_merge\char`_streams} program is a FASTA or Multi-FASTA file.\\ +Using the three output files of the \texttt{gto\char`_fasta\char`_split\char`_streams} tool as input in this example, the output of this tool is the following: +\begin{lstlisting} +>AB000264 |acc=AB000264|descr=Homo sapiens mRNA +ACAAGACGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCCTGGAGGGTCCACCGCTGCCCTGCTGCCATTGTCCCCGGCCCCACCTAAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAAGTGGTTTGAGTGGACCTCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGCAGGCCAGTGCCGCGAATCCGCGCGCCGGGACAGAATCTCCTGCAAAGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCACCCCCCCAGCTAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAA +>AB000263 |acc=AB000263|descr=Homo sapiens mRNA +ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCCCCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAA +\end{lstlisting} \ No newline at end of file diff --git a/manual/sections/FASTA_tools/FastaSpitStreams.tex b/manual/sections/FASTA_tools/FastaSplitStreams.tex similarity index 82% rename from manual/sections/FASTA_tools/FastaSpitStreams.tex rename to manual/sections/FASTA_tools/FastaSplitStreams.tex index 4305936..30de0c5 100644 --- a/manual/sections/FASTA_tools/FastaSpitStreams.tex +++ b/manual/sections/FASTA_tools/FastaSplitStreams.tex @@ -19,9 +19,12 @@ \subsection*{Input parameters} -h, --help Show this help message and exit Basic options + -e, --extra= Output file for the extra information + -d, --dna= Output file for the DNA information + -H, --headers= Output file for the headers information < input.fastq Input FASTA file format (stdin) -Example: ./gto_fasta_split_streams < input.fastq +Example: ./gto_fasta_split_streams -e -d -H < input.fasta \end{lstlisting} An example of such an input file is: \begin{lstlisting} @@ -41,4 +44,4 @@ \subsection*{Input parameters} \subsection*{Output} -The output of the \texttt{gto\char`_fasta\char`_split\char`_streams} program are three files containing the headers, extra information and DNA. +The output of the \texttt{gto\char`_fasta\char`_split\char`_streams} program are three files containing the headers, extra information and DNA. The name of those files can be passed in the tool's paramenters. The default names are HEADERS.JV2, EXTRA.JV2 and DNA.JV2. diff --git a/src/FastaMergeStreams.c b/src/FastaMergeStreams.c new file mode 100644 index 0000000..507e9c3 --- /dev/null +++ b/src/FastaMergeStreams.c @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include "argparse.h" +#include + + +/* + * This application merges FASTA into three channels of information: + * - HEADERS; + * - EXTRA; + * - DNA. + */ +int main(int argc, char *argv[]) +{ + + FILE *HEADERS, *EXTRA, *DNA; + int c, d = 0; + const char *output_headers = NULL; + const char *output_extra = NULL; + const char *output_dna = NULL; + + char *programName = argv[0]; + struct argparse_option options[] = { + OPT_HELP(), + OPT_GROUP("Basic options"), + OPT_STRING('e', "extra", &output_extra, "Output file for the extra information"), + OPT_STRING('d', "dna", &output_dna, "Output file for the DNA information"), + OPT_STRING('H', "headers", &output_headers, "Output file for the headers information"), + OPT_BUFF('>', "output", "Output FASTA file format (stdout)"), + OPT_END(), + }; + struct argparse argparse; + + char usage[250] = "\nExample: "; + strcat(usage, programName); + strcat(usage, " -e -d -H > output.fasta\n"); + + argparse_init(&argparse, options, NULL, programName, 0); + argparse_describe(&argparse, "\nIt merges the three channels of information (headers, extra and DNA) and writes it into a FASTA file.", usage); + argc = argparse_parse(&argparse, argc, argv); + + if(argc != 0) + argparse_help_cb(&argparse, options); + + if(output_headers == NULL) + output_headers = "HEADERS.JV2"; + + if((HEADERS = fopen (output_headers, "r")) == NULL) + { + fprintf(stderr, "Error: could not open file!"); + return 1; + } + + if(output_extra == NULL) + output_extra = "EXTRA.JV2"; + + if((EXTRA = fopen (output_extra, "r")) == NULL) + { + fprintf(stderr, "Error: could not open file!"); + return 1; + } + + if(output_dna == NULL) + output_dna = "DNA.JV2"; + + if((DNA = fopen (output_dna, "r")) == NULL) + { + fprintf(stderr, "Error: could not open file!"); + return 1; + } + + while((c = fgetc(EXTRA)) != EOF) + { + + if(c == '>') + { + fprintf(stdout, "%c", c); + while((c = fgetc(HEADERS)) != EOF) + { + if(c == EOF) goto x; + fprintf(stdout, "%c", c); + if(c == '\n') break; + } + continue; + } + + switch(c) + { + + case 0: + if((d = fgetc(DNA)) == EOF) + { + fprintf(stderr, "Error: invalid format!"); + return 1; + } + fprintf(stdout, "%c", d); + break; + + case 1: + if((d = fgetc(DNA)) == EOF) + { + fprintf(stderr, "Error: invalid format!"); + return 1; + } + fprintf(stdout, "%c", tolower(d)); + break; + + default: + fprintf(stdout, "%c", c); + break; + } + } + + x: + + if(!HEADERS) fclose(HEADERS); + if(!EXTRA) fclose(EXTRA); + if(!DNA) fclose(DNA); + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/src/FastaSplitStreams.c b/src/FastaSplitStreams.c index 273bc0a..19b5987 100644 --- a/src/FastaSplitStreams.c +++ b/src/FastaSplitStreams.c @@ -33,7 +33,7 @@ int main(int argc, char *argv[]) char usage[250] = "\nExample: "; strcat(usage, programName); - strcat(usage, " -e -d -H < input.fastq\n"); + strcat(usage, " -e -d -H < input.fasta\n"); argparse_init(&argparse, options, NULL, programName, 0); argparse_describe(&argparse, "\nIt splits and writes a FASTA file into three channels of information: headers, extra and DNA.", usage); diff --git a/src/GTO.c b/src/GTO.c index 25b7012..979decc 100644 --- a/src/GTO.c +++ b/src/GTO.c @@ -15,7 +15,7 @@ int main(int argc, char *argv[]) " ╚═════╝ ╚═╝ ╚═════╝ \n" " \n" "NAME \n" - " GTO v%u.%u.3, \n" + " GTO v1.5.4, \n" " The Genomics-Proteomics Toolkit. \n" " \n" "AUTHORS \n" @@ -243,6 +243,14 @@ int main(int argc, char *argv[]) " It uses the Chester-visual to visualize relative singularity \n" " regions. \n" " \n" + " [gto_fasta_split_streams] \n" + " It splits and writes a FASTA file into three channels of \n" + " information: headers, extra and DNA. \n" + " \n" + " [gto_fasta_merge_streams] \n" + " It merges the three channels of information (headers, extra \n" + " and DNA) and writes it into a FASTA file. \n" + " \n" "Genomic Sequence Tools \n" " [gto_genomic_count_bases] \n" " It counts the number of bases in sequence, FASTA or \n" @@ -399,7 +407,6 @@ int main(int argc, char *argv[]) " GTO: A toolkit to unify pipelines in genomic and proteomic research.\n", " J. R. Almeida, A. J. Pinho, J. L. Oliveira, O. Fajarda, D. Pratas, \n", " SoftwareX, Volume 12, 2020, 100535, \n", - " doi: https://doi.org/10.1016/j.softx.2020.100535 \n", - VERSION, RELEASE); + " doi: https://doi.org/10.1016/j.softx.2020.100535 \n"); return EXIT_SUCCESS; } diff --git a/src/Makefile b/src/Makefile index d539b01..e7e7506 100644 --- a/src/Makefile +++ b/src/Makefile @@ -109,7 +109,8 @@ PROGS = $(BIN)/gto \ $(BIN)/gto_amino_acid_from_fasta \ $(BIN)/gto_amino_acid_from_fastq \ $(BIN)/gto_amino_acid_from_seq \ - $(BIN)/gto_fasta_split_streams + $(BIN)/gto_fasta_split_streams \ + $(BIN)/gto_fasta_merge_streams #$(BIN)/gto_amino_acid_to_seq @@ -258,6 +259,8 @@ $(BIN)/gto_amino_acid_from_seq: AminoAcidFromSeq.c $(DEPS) $(OBJS) $(CC) $(CFLAGS) -o $(BIN)/gto_amino_acid_from_seq AminoAcidFromSeq.c $(OBJS) $(LIBS) $(BIN)/gto_fasta_split_streams: FastaSplitStreams.c $(DEPS) $(OBJS) $(CC) $(CFLAGS) -o $(BIN)/gto_fasta_split_streams FastaSplitStreams.c $(OBJS) $(LIBS) +$(BIN)/gto_fasta_merge_streams: FastaMergeStreams.c $(DEPS) $(OBJS) + $(CC) $(CFLAGS) -o $(BIN)/gto_fasta_merge_streams FastaMergeStreams.c $(OBJS) $(LIBS) #$(BIN)/gto_amino_acid_to_seq: AminoAcidToSeq.c $(DEPS) $(OBJS) # $(CC) $(CFLAGS) -o $(BIN)/gto_amino_acid_to_seq AminoAcidToSeq.c $(OBJS) $(LIBS) diff --git a/tester/gto_fasta_merge_streams/DNA.JV2 b/tester/gto_fasta_merge_streams/DNA.JV2 new file mode 100644 index 0000000..a1ec07f --- /dev/null +++ b/tester/gto_fasta_merge_streams/DNA.JV2 @@ -0,0 +1 @@ +ACAAGACGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCCTGGAGGGTCCACCGCTGCCCTGCTGCCATTGTCCCCGGCCCCACCTAAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAAGTGGTTTGAGTGGACCTCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGCAGGCCAGTGCCGCGAATCCGCGCGCCGGGACAGAATCTCCTGCAAAGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCACCCCCCCAGCTAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAAACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCCCCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAA \ No newline at end of file diff --git a/tester/gto_fasta_merge_streams/EXTRA.JV2 b/tester/gto_fasta_merge_streams/EXTRA.JV2 new file mode 100644 index 0000000..7abb57e Binary files /dev/null and b/tester/gto_fasta_merge_streams/EXTRA.JV2 differ diff --git a/tester/gto_fasta_merge_streams/HEADERS.JV2 b/tester/gto_fasta_merge_streams/HEADERS.JV2 new file mode 100644 index 0000000..8a7fc84 --- /dev/null +++ b/tester/gto_fasta_merge_streams/HEADERS.JV2 @@ -0,0 +1,2 @@ +AB000264 |acc=AB000264|descr=Homo sapiens mRNA +AB000263 |acc=AB000263|descr=Homo sapiens mRNA diff --git a/tester/gto_fasta_merge_streams/output.fasta b/tester/gto_fasta_merge_streams/output.fasta new file mode 100644 index 0000000..8085beb --- /dev/null +++ b/tester/gto_fasta_merge_streams/output.fasta @@ -0,0 +1,4 @@ +>AB000264 |acc=AB000264|descr=Homo sapiens mRNA +ACAAGACGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCCTGGAGGGTCCACCGCTGCCCTGCTGCCATTGTCCCCGGCCCCACCTAAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAAGTGGTTTGAGTGGACCTCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGCAGGCCAGTGCCGCGAATCCGCGCGCCGGGACAGAATCTCCTGCAAAGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCACCCCCCCAGCTAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAA +>AB000263 |acc=AB000263|descr=Homo sapiens mRNA +ACAAGATGCCATTGTCCCCCGGCCTCCTGCTGCTGCTGCTCTCCGGGGCCACGGCCACCGCTGCCCTGCCCCTGGAGGGTGGCCCCACCGGCCGAGACAGCGAGCATATGCAGGAAGCGGCAGGAATAAGGAAAAGCAGCCTCCTGACTTTCCTCGCTTGGTGGTTTGAGTGGACCTCCCAGGCCAGTGCCGGGCCCCTCATAGGAGAGGAAGCTCGGGAGGTGGCCAGGCGGCAGGAAGGCGCACCCCCCCAGCAATCCGCGCGCCGGGACAGAATGCCCTGCAGGAACTTCTTCTGGAAGACCTTCTCCTCCTGCAAATAAAACCTCACCCATGAATGCTCACGCAAGTTTAATTACAGACCTGAA \ No newline at end of file diff --git a/tester/gto_fasta_merge_streams/runExample.sh b/tester/gto_fasta_merge_streams/runExample.sh new file mode 100644 index 0000000..bb36e53 --- /dev/null +++ b/tester/gto_fasta_merge_streams/runExample.sh @@ -0,0 +1,2 @@ +#!/bin/bash +../../bin/gto_fasta_merge_streams -e EXTRA.JV2 -H HEADERS.JV2 -d DNA.JV2 > output.fasta \ No newline at end of file diff --git a/tester/runAllTests.sh b/tester/runAllTests.sh index 4fc1264..708189e 100644 --- a/tester/runAllTests.sh +++ b/tester/runAllTests.sh @@ -213,4 +213,7 @@ sh runExample.sh cd .. cd gto_fasta_split_streams sh runExample.sh +cd .. +cd gto_fasta_merge_streams +sh runExample.sh cd .. \ No newline at end of file