-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathbinary_collection.hpp
154 lines (124 loc) · 3.9 KB
/
binary_collection.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#pragma once
#include <boost/iostreams/device/mapped_file.hpp>
#include <stdexcept>
#include <iterator>
#include <stdint.h>
#include <sys/mman.h>
#include "util.hpp"
namespace ds2i {
class binary_collection {
public:
typedef uint32_t posting_type;
binary_collection(const char* filename)
{
m_file.open(filename);
if ( !m_file.is_open() ) {
throw std::runtime_error("Error opening file");
}
m_data = (posting_type const*)m_file.data();
m_data_size = m_file.size() / sizeof(m_data[0]);
auto ret = posix_madvise((void*)m_data, m_data_size, POSIX_MADV_SEQUENTIAL);
if (ret) logger() << "Error calling madvice: " << errno << std::endl;
}
class iterator;
iterator begin() const
{
return iterator(this, 0);
}
iterator end() const
{
return iterator(this, m_data_size);
}
class sequence {
public:
sequence()
: m_begin(nullptr)
, m_end(nullptr)
{}
posting_type const* begin() const
{
return m_begin;
}
posting_type const* end() const
{
return m_end;
}
posting_type back() const
{
assert(size());
return *(m_end - 1);
}
size_t size() const
{
return m_end - m_begin;
}
private:
friend class binary_collection::iterator;
sequence(posting_type const* begin, posting_type const* end)
: m_begin(begin)
, m_end(end)
{}
posting_type const* m_begin;
posting_type const* m_end;
};
class iterator : public std::iterator<std::forward_iterator_tag,
sequence> {
public:
iterator()
: m_collection(nullptr)
{}
value_type const& operator*() const
{
return m_cur_seq;
}
value_type const* operator->() const
{
return &m_cur_seq;
}
iterator& operator++()
{
m_pos = m_next_pos;
read();
return *this;
}
bool operator==(iterator const& other) const
{
assert(m_collection == other.m_collection);
return m_pos == other.m_pos;
}
bool operator!=(iterator const& other) const
{
return !(*this == other);
}
private:
friend class binary_collection;
iterator(binary_collection const* coll, size_t pos)
: m_collection(coll)
, m_pos(pos)
{
read();
}
void read()
{
assert(m_pos <= m_collection->m_data_size);
if (m_pos == m_collection->m_data_size) return;
size_t n = 0;
size_t pos = m_pos;
while (!(n = m_collection->m_data[pos++])); // skip empty seqs
// file might be truncated
n = std::min(n, size_t(m_collection->m_data_size - pos));
posting_type const* begin = &m_collection->m_data[pos];
posting_type const* end = begin + n;
m_next_pos = pos + n;
m_cur_seq = sequence(begin, end);
}
binary_collection const* m_collection;
size_t m_pos, m_next_pos;
sequence m_cur_seq;
};
private:
boost::iostreams::mapped_file_source m_file;
posting_type const* m_data;
size_t m_data_size;
};
}