-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbananasplit.go
95 lines (81 loc) · 2.42 KB
/
bananasplit.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package bananasplit
import (
"github.com/rivo/uniseg"
)
// RuneRange represents a Unicode code-point range
type RuneRange struct {
Start rune
End rune
}
// Word represents a matched part which only contains characters of
// the RuneRanges represented by the type field
type Word struct {
Text string
Type string
}
var (
// See https://unicode.org/charts/ for codepoint ranges
Dingbats = RuneRange{0x2700, 0x27BF}
OrnamentalDingbats = RuneRange{0x1F650, 0x1F67F}
Emoticons = RuneRange{0x1F600, 0x1F64F}
MiscellaneousSymbols = RuneRange{0x2600, 0x26FF}
MiscellaneousSymbolsAndPictographs = RuneRange{0x1F300, 0x1F5FF}
SupplementalSymbolsAndPictographs = RuneRange{0x1F900, 0x1F9FF}
SymbolsAndPictographsExtendedA = RuneRange{0x1FA70, 0x1FAFF}
TransportAndMapSymbols = RuneRange{0x1F680, 0x1F6FF}
// Emoji & Pictographs ranges
EmojiRange = []RuneRange{
Dingbats,
OrnamentalDingbats, Emoticons,
MiscellaneousSymbols,
MiscellaneousSymbolsAndPictographs,
SupplementalSymbolsAndPictographs,
SymbolsAndPictographsExtendedA,
TransportAndMapSymbols,
}
)
// IsPartOfRange checks if the given rune matches one of the RuneRanges
func IsPartOfRange(r rune, rng []RuneRange) bool {
for _, v := range rng {
if r >= v.Start && r <= v.End {
return true
}
}
return false
}
// SplitByRanges splits the given string by the supplied ranges into Words
// If a given part of a string does not match any ranges, it is tagged as
// unmatched instead.
func SplitByRanges(s string, ranges map[string][]RuneRange) []Word {
var sentence []Word
var currentWord = new(Word)
gr := uniseg.NewGraphemes(s)
for gr.Next() {
matched := false
runes := gr.Runes()
for name, rng := range ranges {
if IsPartOfRange(runes[0], rng) {
if currentWord.Type != name && currentWord.Type != "" && len(currentWord.Text) > 0 {
sentence = append(sentence, *currentWord)
currentWord = new(Word)
}
if currentWord.Type == "" {
currentWord.Type = name
}
matched = true
break
}
}
if !matched {
if (currentWord.Type != "unmatched" || currentWord.Type == "") && len(currentWord.Text) > 0 {
sentence = append(sentence, *currentWord)
currentWord = new(Word)
}
if currentWord.Type == "" {
currentWord.Type = "unmatched"
}
}
currentWord.Text += string(runes)
}
return append(sentence, *currentWord)
}