-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlcpskip.c
188 lines (153 loc) · 4.56 KB
/
lcpskip.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#include <stdlib.h>
#include <stdio.h>
#include "lcpskip.h"
// debug output
#ifdef NDEBUG
#define eprintf(...)
#define INC(var)
#else
#define eprintf(...) fprintf (stderr, __VA_ARGS__);
#define INC(var) var++
#endif
// stats
stat_cmps=0;
stat_chr_cmps=0;
stat_lcp_cmps=0;
int randomLevel() {
int l = 0;
while( rand() < PROB * RAND_MAX )
l++;
return l;
}
Node *mkNode( int height ) {
Node * n = (Node *) malloc( 100+ sizeof( Node ) + (height)*sizeof(fwd));
int i;
for( i = 0; i < height; i++ ) {
n->forward[i].ptr = NULL;
n->forward[i].lcp = 0;
}
n->key = NULL;
n->value = NULL;
return n;
}
SkipList * mkList() {
SkipList *l = (SkipList *) malloc( sizeof( SkipList ));
l->header = mkNode(MAXHEIGHT);
l->level=0; // level is 0-based, ie 0 means 1 link
return l;
}
// strcmp with lcp length input/return
// input lcp is start, output set to last cmp
inline int strlcpcmp( const char * s, const char * t, int *lcp ) {
int i,r;
register unsigned char *ps;
register unsigned char *pt;
ps = (const unsigned char *) s + *lcp;
pt = (const unsigned char *) t + *lcp;
// to test no lcp:
//return strcmp(s,t);
INC(stat_cmps);
for( ; *pt == *ps && *ps != '\0'; pt++, ps++ )
INC(stat_chr_cmps);
r = *pt - *ps;
INC( stat_chr_cmps );
*lcp = ps - (unsigned char *) s; // output new lcp
return r; // works w/ \0 etc.
}
// Search similar to pseudo-code in Pugh's skip list paper
// same control flow, but key comparisons use lcp tricks
// similar to lcp merge sort algo
char *Search( SkipList *list, char * key ) {
// loop variables
// x is current/most recent comparand
// lcp is lcp(x,key), to check against lcp(x,x->forward[i])
// lcpfwd is result of forward cmp
Node * x = list->header;
int lcp = 0; // lcp( key, x->key ) from most recent > comparison
int lcpfwd = 0; // lcp from most recent comp even if >=
int i;
for( i = list->level; i >= 0; i-- ) {
lcpfwd = lcp;
// same level traversal
// lcp-based optimization:
// using x as current key and forward as next in (sorted) linked list
//
// basic trick is to predict strcmp(key, forward)
// given key>x and we know lcp(key,x):
// => if lcp(key,x) < lcp(x,forward), then key > forward
// since x and forward have same prefix up to where key cmp stops
// => key==forward only if lcp(key,x)==lcp(x,forward)
// ie forward must agree w/x where key does, but no more
// any shorter lcp(x,forward) means forward>key (stop)
//
// this rule is described in Ng and Kakehi
// https://www.jstage.jst.go.jp/article/ipsjdc/4/0/4_0_69/_pdf
//
while(x->forward[i].ptr != NULL
&& ( lcp < x->forward[i].lcp
|| lcp == x->forward[i].lcp && strlcpcmp(x->forward[i].ptr->key, key, &lcpfwd ) < 0 ))
{
INC( stat_lcp_cmps );
x = x->forward[i].ptr;
lcp = lcpfwd;
eprintf( "search: level %d lcp %d > %s\n", i, lcpfwd, x->key );
}
}
INC( stat_lcp_cmps ); // count last cmp
// x must be predecessor or NULL/end of list
x = x->forward[0].ptr;
// if( x != NULL && !strlcpcmp( key, x->key, &lcp ))
return x->value;
//else
//return NULL;
}
int Insert( SkipList *list, char * key, char * value ) {
Node * x = list->header;
Node * update[MAXHEIGHT];
int lcpupdate[MAXHEIGHT];
int lcp_fwd[MAXHEIGHT];
int lcp = 0; // lcp( key, x->key )
int lcpfwd = 0;
int i;
for(i = list->level; i >= 0; i-- ) {
lcpfwd = lcp;
while( x->forward[i].ptr != NULL
&& ( lcp < x->forward[i].lcp
|| lcp == x->forward[i].lcp && strlcpcmp( x->forward[i].ptr->key, key, &lcpfwd ) < 0 ))
{
eprintf( "key: %s level=%d lcp = %d key > %s\n", key, i, lcp, x->key );
x = x->forward[i].ptr;
lcp = lcpfwd;
}
eprintf( "key: %s level=%d lcp = %d stop > %s\n", key, i, lcp, x->key );
update[i] = x;
lcpupdate[i] = lcp;
// figure out forward lcp based on boolean exprs above
if( x->forward[i].ptr == NULL )
lcp_fwd[i] = 0;
else if( lcp > x->forward[i].lcp )
lcp_fwd[i] = x->forward[i].lcp;
else // ==
lcp_fwd[i] = lcpfwd;
}
// create new node and update predecessors
int lvl = randomLevel();
if( lvl > list->level ) {
for( i = list->level +1; i <= lvl; i++ ) {
update[i]=list->header;
lcp_fwd[i]=0; // to end/null
lcpupdate[i]=0; // from header
}
list->level = lvl;
}
x = mkNode( lvl );
x->key = key;
x->value = value;
for( i = 0; i <= lvl; i++ ) {
x->forward[i].ptr = update[i]->forward[i].ptr;
x->forward[i].lcp = lcp_fwd[i];
update[i]->forward[i].ptr = x;
update[i]->forward[i].lcp = lcpupdate[i];
}
return 1;
}