-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjoin_example.go
76 lines (69 loc) · 1.74 KB
/
join_example.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package main
import (
"encoding/csv"
"io"
"os"
"strconv"
"unsafe"
"github.com/pingcap/tidb/util/mvmap"
)
// JoinExample performs a simple hash join algorithm.
func JoinExample(f0, f1 string, offset0, offset1 []int) (sum uint64) {
tbl0, tbl1 := readCSVFileIntoTbl(f0), readCSVFileIntoTbl(f1)
hashtable := buildHashTable(tbl0, offset0)
for _, row := range tbl1 {
rowIDs := probe(hashtable, row, offset1)
for _, id := range rowIDs {
v, err := strconv.ParseUint(tbl0[id][0], 10, 64)
if err != nil {
panic("JoinExample panic\n" + err.Error())
}
sum += v
}
}
return sum
}
func readCSVFileIntoTbl(f string) (tbl [][]string) {
csvFile, err := os.Open(f)
if err != nil {
panic("ReadFileIntoTbl " + f + " fail\n" + err.Error())
}
defer csvFile.Close()
csvReader := csv.NewReader(csvFile)
for {
row, err := csvReader.Read()
if err == io.EOF {
break
} else if err != nil {
panic("ReadFileIntoTbl " + f + " fail\n" + err.Error())
}
tbl = append(tbl, row)
}
return tbl
}
func buildHashTable(data [][]string, offset []int) (hashtable *mvmap.MVMap) {
var keyBuffer []byte
valBuffer := make([]byte, 8)
hashtable = mvmap.NewMVMap()
for i, row := range data {
for _, off := range offset {
keyBuffer = append(keyBuffer, []byte(row[off])...)
}
*(*int64)(unsafe.Pointer(&valBuffer[0])) = int64(i)
hashtable.Put(keyBuffer, valBuffer)
keyBuffer = keyBuffer[:0]
}
return
}
func probe(hashtable *mvmap.MVMap, row []string, offset []int) (rowIDs []int64) {
var keyHash []byte
var vals [][]byte
for _, off := range offset {
keyHash = append(keyHash, []byte(row[off])...)
}
vals = hashtable.Get(keyHash, vals)
for _, val := range vals {
rowIDs = append(rowIDs, *(*int64)(unsafe.Pointer(&val[0])))
}
return rowIDs
}