-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* validate bin with sig * bins indexing implemented * pairwise minor modifications * dumping modes * minor fix * 🐛 handle gzipped sigs * sig to bin * sigs to bins update * :fix: all parallel * refactot * remove print inside parallel loop * :fix: validate * skip converted files * sequential loading * legends to phmap * check invalid bins * bins indexing done * update kProcessor submodule * filter by abundance * delete kProcessor submodule * modify kProcessor * new json parser * adapt the new json parser changes * modify json import * update kProcessor * update kProcessor * update kProcessor * update kProcessor * update kProcessor * update kp * update CMAKE flags * print colors size * more stats * more options * fix * representative sketches * reorganize
- Loading branch information
Showing
38 changed files
with
3,815 additions
and
1,677 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#include <fstream> | ||
#include <iostream> | ||
#include <string> | ||
#include <vector> | ||
#include <boost/algorithm/string.hpp> | ||
#include <cstdint> | ||
#include <unordered_map> | ||
#include <parallel_hashmap/phmap.h> | ||
|
||
using namespace boost::algorithm; | ||
using namespace std; | ||
|
||
bool comp(pair<uint64_t,uint64_t> a, pair<uint64_t,uint64_t> b) { | ||
return a.second > b.second; | ||
} | ||
|
||
|
||
int main(int argc, char** argv) { | ||
ifstream fin(argv[1]); | ||
phmap::flat_hash_map<uint64_t, uint64_t> count; | ||
string line; | ||
getline(fin, line); // skip header. | ||
while (getline(fin, line)) { | ||
// Split line into tab-separated parts | ||
vector<string> parts; | ||
split(parts, line, boost::is_any_of("\t")); | ||
float containment = stof(parts[4]); | ||
if (containment > 0.20) { | ||
uint64_t from_node = stoi(parts[0]); | ||
uint64_t to_node = stoi(parts[1]); | ||
count[from_node]++; | ||
count[to_node]++; | ||
} | ||
|
||
} | ||
fin.close(); | ||
|
||
std::vector<std::pair<uint64_t, uint64_t>> elems(count.begin(), count.end()); | ||
std::sort(elems.begin(), elems.end(), comp); | ||
|
||
for (auto& [k, v] : elems) { | ||
cout << k << ": " << v << endl; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#include "kSpider.hpp" | ||
|
||
inline uint64_t to_uint64_t(std::string const& value) { | ||
uint64_t result = 0; | ||
char const* p = value.c_str(); | ||
char const* q = p + value.size(); | ||
while (p < q) { | ||
result *= 10; | ||
result += *(p++) - '0'; | ||
} | ||
return result; | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
if(argc < 6){ | ||
cout << "args: <bins_dir> <kSize> <output_prefix> <initial_reserve_size> <legend_reserve>\n"; | ||
exit(1); | ||
} | ||
string bins_dir = argv[1]; | ||
int kSize = stoi(argv[2]); | ||
string output_prefix = argv[3]; | ||
uint64_t reserve_size = to_uint64_t(argv[4]); | ||
uint64_t legend_reserve = to_uint64_t(argv[5]); | ||
|
||
kSpider::bins_indexing(bins_dir, kSize, output_prefix, reserve_size, legend_reserve); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#include <iostream> | ||
#include <cstdint> | ||
#include <chrono> | ||
#include "parallel_hashmap/phmap.h" | ||
#include <ctime> | ||
#include<omp.h> | ||
#include <glob.h> | ||
#include <string> | ||
#include <stdexcept> | ||
#include "parallel_hashmap/phmap_dump.h" | ||
#include <cstdlib> | ||
|
||
using namespace std; | ||
// using namespace phmap; | ||
|
||
|
||
int main(int argc, char** argv) { | ||
|
||
if (argc != 2) { | ||
cout << "run: ./check_bin <bin>" << endl; | ||
exit(1); | ||
} | ||
|
||
string bin_path = argv[1]; | ||
phmap::flat_hash_set<uint64_t> table_in; | ||
phmap::BinaryInputArchive ar_in(bin_path.c_str()); | ||
table_in.phmap_load(ar_in); | ||
|
||
|
||
cout << "VALID_BIN: " << table_in.size(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#include <iostream> | ||
#include <cstdint> | ||
#include <chrono> | ||
#include "parallel_hashmap/phmap.h" | ||
#include <ctime> | ||
#include<omp.h> | ||
#include <glob.h> | ||
#include <string> | ||
#include <stdexcept> | ||
#include "parallel_hashmap/phmap_dump.h" | ||
#include <cstdlib> | ||
|
||
using namespace std; | ||
// using namespace phmap; | ||
|
||
|
||
int main(int argc, char** argv) { | ||
|
||
if (argc != 2) { | ||
cout << "run: ./dump_bin <bin>" << endl; | ||
exit(1); | ||
} | ||
|
||
string bin_path = argv[1]; | ||
|
||
phmap::flat_hash_set<uint64_t> table_in; | ||
phmap::BinaryInputArchive ar_in(bin_path.c_str()); | ||
table_in.phmap_load(ar_in); | ||
cerr << "loaded bin size: " << table_in.size() << endl; | ||
|
||
for(const uint64_t & hash : table_in) cout << hash << endl; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#include <iostream> | ||
#include <cstdint> | ||
#include <chrono> | ||
#include "parallel_hashmap/phmap.h" | ||
#include <ctime> | ||
#include <omp.h> | ||
#include "cpp-json/json.h" | ||
#include "zstr.hpp" | ||
#include <glob.h> | ||
#include <string> | ||
#include <stdexcept> | ||
#include "parallel_hashmap/phmap_dump.h" | ||
#include <cstdlib> | ||
|
||
using namespace std; | ||
// using namespace phmap; | ||
|
||
typedef std::chrono::high_resolution_clock Time; | ||
|
||
|
||
int main(int argc, char** argv) { | ||
|
||
if (argc != 3) { | ||
cout << "run: ./dump_sig <sig> <kSize>" << endl; | ||
exit(1); | ||
} | ||
|
||
string sig_path = argv[1]; | ||
int kSize = stoi(argv[2]); | ||
|
||
phmap::flat_hash_set<uint64_t> tmp_hashes; | ||
|
||
auto begin_time = Time::now(); | ||
zstr::ifstream sig_stream(sig_path); | ||
json::value json = json::parse(sig_stream); | ||
auto sourmash_sig = json[0]["signatures"]; | ||
const json::array& sig_array = as_array(sourmash_sig); | ||
for (auto it = sig_array.begin(); it != sig_array.end(); ++it) { | ||
const json::value& v = *it; | ||
if (v["ksize"] == kSize) { | ||
const json::array& mins = as_array(v["mins"]); | ||
auto mins_it = mins.begin(); | ||
while (mins_it != mins.end()) { | ||
tmp_hashes.insert(json::to_number<uint64_t>(*mins_it)); | ||
mins_it++; | ||
} | ||
} | ||
break; | ||
} | ||
|
||
|
||
for (const uint64_t& hash : tmp_hashes) cout << hash << endl; | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
cmake_minimum_required (VERSION 3.0) | ||
project(cpp-json CXX) | ||
|
||
add_subdirectory(lib) | ||
add_subdirectory(test) |
Oops, something went wrong.