-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathDisambigDefs.cpp
153 lines (129 loc) · 5.09 KB
/
DisambigDefs.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*
* DisambigLib.cpp
*
* Created on: Dec 7, 2010
* Author: ysun
*/
#include "DisambigDefs.h"
#include <algorithm>
#include <cstring>
using std::list;
using std::string;
const cSimilarity_Compare::cException_Different_Similarity_Dimensions cSimilarity_Compare::default_sp_exception("Error: Different Similarity profile dimensions");
vector <string> cAttribute::Derived_Class_Name_Registry;
/*
* This function splits the input string and save it into the attribute object.
* Legacy format of data is in the form of "DATA1~COUNT1/DATA2~COUNT2/DATA3~COUNT3", so this function splits the string and
* saves the pointer to "DATA1", 'DATA2', 'DATA3'. There was a counter class member, but it was removed later.
*
* One needs to be very familiar with how the strings look like and what they can be in the source text file. Especially the delimiters.
* However, one is also assumed to have familiarized with the source text format.
*
*/
bool cAttribute::split_string(const char* recdata) {
static const string emptystring ("");
vector < const string * > & data = this->get_data_modifiable();
const char * p = recdata;
const char * pend = p + strlen(recdata);
if ( pend == p ) {
data.push_back( this->add_string(emptystring));
return true;
}
char string_count_cache[10];
const char delim = '/';
const char secondary_delim = '~';
const char * q;
unsigned int count_length;
while ( (q = std::find(p, pend, delim)) != pend ) {
// r points to the secondary delimiter
// q points to the primary delimiter
const char * r = std::find(p, q, secondary_delim);
const string temp ( p, r );
data.push_back(this->add_string(temp));
if ( r != q ) {
count_length = q - r - 1;
memcpy(string_count_cache, r + 1, count_length *sizeof(char) );
*(string_count_cache + count_length ) = '\0';
}
else {
}
p = q + 1;
}
const char * r = std::find(p, q, secondary_delim);
const string tp ( p, r);
const string * ptr = this->add_string(tp);
data.push_back( ptr );
if ( r != q ) {
count_length = q - r - 1;
memcpy(string_count_cache, r + 1, count_length *sizeof(char) );
*(string_count_cache + count_length ) = '\0';
}
else {
}
// now use swap trick to minimize the volumn of each attribute. Effective STL by Scott Meyers, Item 17
vector< const string* > (data).swap(data);
if ( data.size() > 1 )
throw cException_Vector_Data(recdata);
return true;
}
/*
* This is a global function, not a class member function!!!!!!!
* usually for set_mode classes, attrib_merge works, and classes of single_mode do not support such operation.
* if this function is called, the attributes in list1 and list2 will be merged into a larger attribute object,
* and the pointers in list1 and list2 will then point to the newly created large object.
*
*/
void attrib_merge ( list < const cAttribute * *> & l1, list < const cAttribute * *> & l2 ) {
static const string errmsg = "Error: attribute pointers are not pointing to the same object. Attribute Type = ";
if ( l1.empty() || l2.empty() )
return;
// calls the class member function to check if attrib merge is supported.
// if Null is returned, it means attrib_merge is not supported.
// usually for set_mode classes, attrib_merge works, and classes of single_mode do not support such operation.
const cAttribute * new_object_pointer = (*l1.front())->attrib_merge(** l2.front());
if ( new_object_pointer == NULL )
return;
for ( list < const cAttribute * * >::const_iterator p = l1.begin(); p != l1.end(); ++p ) {
if ( **p != * l1.front() ) {
std::cout << "------------" <<std::endl;
std::cout << "Front address: " << *l1.front() << " Other address: " << **p << std::endl;
(*l1.front())->print();
(**p)->print();
std::cout << "------------" << std::endl;
throw cException_Other ( ( errmsg + (*l1.front())->get_class_name()).c_str() ) ;
}
}
for ( list < const cAttribute * * >::const_iterator p = l2.begin(); p != l2.end(); ++p ) {
if ( **p != *l2.front() ) {
std::cout << "------------" <<std::endl;
std::cout << "Front address: " << *l2.front() << " Other address: " << **p << std::endl;
(*l2.front())->print();
(**p)->print();
std::cout << "------------" << std::endl;
throw cException_Other ( ( errmsg + (*l2.front())->get_class_name()).c_str() );
}
}
const unsigned int l1_size = l1.size();
const unsigned int l2_size = l2.size();
if ( l1_size != 1 )
(*l1.front())->reduce_attrib(l1_size - 1);
if ( l2_size != 1)
(*l2.front())->reduce_attrib(l2_size - 1);
if ( l1_size + l2_size != 2 )
new_object_pointer->add_attrib( l1_size + l2_size - 2 );
for ( list < const cAttribute * * >::const_iterator p = l1.begin(); p != l1.end(); ++p )
**p = new_object_pointer;
for ( list < const cAttribute * * >::const_iterator p = l2.begin(); p != l2.end(); ++p )
**p = new_object_pointer;
}
void cAttribute::register_class_names( const vector < string > & input) {
Derived_Class_Name_Registry = input;
}
int cAttribute::position_in_registry( const string & s ) {
int i = 0;
for ( i = 0; i < static_cast<int> (Derived_Class_Name_Registry.size()); ++i ) {
if ( s == Derived_Class_Name_Registry.at(i))
return i;
}
return -1;
}