-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFastaRecord.cpp
68 lines (58 loc) · 1.58 KB
/
FastaRecord.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include "FastaRecord.h"
#include <cstring>
#include <errno.h>
#include <err.h>
#include <stdlib.h>
// Read one record from a FASTA file.
// Assumption: we are positioned ready to read the first character of
// the first/next sequence entry
void
FastaRecord::readsinglefasta(std::ifstream& is)
{
static unsigned int line = 1;
const std::string bases = "ABCDEFGHIKLMNPQRSTUVWXYZ-*abcdefghiklmnpqrstuvwxyz";
if (is.eof()) {
std::cerr << "end of file before >id";
}
if (is.peek() != '>') {
std::cerr << "Next character is not a '>' as it should be on line ";
std::cerr << line << std::endl;
}
is.ignore(1); // The >
std::getline(is, id);
line++;
if (is.eof()) {
std::cerr << "end of file before sequence";
}
// FASTA sequence can be multiple lines
seq = "";
while (!is.eof() && is.peek() != '>') {
std::string s;
std::getline(is, s);
seq += s;
line++;
}
std::string::size_type p;
if ((p = seq.find_first_not_of(bases)) != std::string::npos) {
std::cerr << "Invalid character at position " << p << " in '" << seq;
std::cerr << std::endl;
exit(1);
}
}
fastavec_t readfastafile(const std::string& fastafile)
{
std::ifstream f;
f.open(fastafile.c_str());
if (!f.is_open()) {
std::cerr << "cannot open '" << fastafile << "' for reading: ";
std::cerr << strerror(errno) << std::endl;
exit(1);
}
fastavec_t sequences;
for (unsigned int i=0; !f.eof(); ++i) {
FastaRecord seq;
seq.readsinglefasta(f);
sequences.emplace_back(seq);
}
return sequences;
}