Skip to content

Commit

Permalink
making і & ї and г & ґ count as the same letter
Browse files Browse the repository at this point in the history
  • Loading branch information
David Klinger committed Nov 4, 2022
1 parent 187a21e commit 34f627d
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 13 deletions.
4 changes: 2 additions & 2 deletions etl/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,10 +648,10 @@ def make_index(self, loc1, loc2, indent=None):
def_words = usage.get_definition_words()
form_words = usage.get_form_words() + re.sub(r"[^\w']+", ' ', word.get_word_no_accent()).strip().split()
for d in def_words:
d = d.lower()
d = d.lower().replace('ї', 'і').replace('ґ', 'г')
word_index[d].add(i)
for f in form_words:
f = f.lower()
f = f.lower().replace('ї', 'і').replace('ґ', 'г')
word_index[f].add(i)

word_index_list = {}
Expand Down
20 changes: 12 additions & 8 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ var main = (data, increase) => {
const highlightFunc = (t) => {

const find = (word, phrase, literal, mustPreceed) => {
console.log(word, phrase)
const letters = 'abcdefghijklmnopqrstuvwxyzабвгдежзийклмнопрстуфхцчшщъыьэюяєії'
let index = 0;
let parenthesis = 0;
Expand All @@ -318,7 +319,7 @@ var main = (data, increase) => {
const beforeClear = isBeginning || !letters.includes(phrase[i - 1].toLowerCase());
const afterClear = isEnd || !letters.includes(phrase[i + 1].toLowerCase());

const isWordMatch = thisLetter.toLowerCase() === word[index];
const isWordMatch = thisLetter.toLowerCase().replaceAll('ї', 'і').replaceAll('ґ', 'г') === word[index];
const isAccent = thisLetter === "́";

if (index === 0) {
Expand Down Expand Up @@ -532,10 +533,11 @@ function searchHelper() {
}
let indexes;
const canInclude = fuzzyWords.length === 1 && fuzzyWords[0].replace(/[^a-z]/g, '').length === 0;
for (const word of fuzzyWords) {
for (let word of fuzzyWords) {
if (!word) break;
// generate words containing all searched letters
let wordIndexes;
word = word.replaceAll('ї', 'і').replaceAll('ґ', 'г')
for (const l of new Set(word)) {
if (!wordIndexes) wordIndexes = wordDict[l];
else {
Expand Down Expand Up @@ -564,9 +566,10 @@ function searchHelper() {
}
}
console.log(literalWords)
for (const word of literalWords) {
for (let word of literalWords) {
if (!word) break;
// generate words containing all searched letters
word = word.replaceAll('ї', 'і').replaceAll('ґ', 'г')
let wordIndexes;
for (const l of new Set(word)) {
if (!wordIndexes) wordIndexes = wordDict[l];
Expand Down Expand Up @@ -607,7 +610,7 @@ function searchHelper() {
else if (l === ')') paren--;
else if (paren === 0) noParen += l;
}
return noParen.toLowerCase().includes(literalRes)
return noParen.toLowerCase().replace('ї', 'і').replace('ґ', 'г').includes(literalRes)
}

const unpack = (y) => {
Expand All @@ -624,8 +627,8 @@ function searchHelper() {
let goodData = d3.filter(
allData,
x => (
d3.filter(x.defs, filterFunc) + d3.filter(unpack(x.forms), y => { return y.replaceAll('\u0301', '') === literalRes; } )
).length > 0 || x.word.replaceAll('\u0301', '') === literalRes
d3.filter(x.defs, filterFunc) + d3.filter(unpack(x.forms), y => { return y.replaceAll('\u0301', '').replaceAll('ї', 'і').replaceAll('ґ', 'г') === literalRes; } )
).length > 0 || x.word.replaceAll('\u0301', '').replaceAll('ї', 'і').replaceAll('ґ', 'г') === literalRes
).map(x => x.index)

const _indexes = d3.filter(Array.from(indexes), x => goodData.includes(x))
Expand Down Expand Up @@ -660,8 +663,9 @@ function search(changeURL = true) {
const letters = "abcdefghijklmnopqrstuvwxyzабвгдежзийклмнопрстуфхцчшщъыьэюяєіїґ '\""
const oldSearch = searchTerm;
searchTerm = document.querySelector('input#search').value.toLowerCase();
searchTerm = searchTerm.replace('“', '"').replace('”', '"').replace('«', '"').replace('»', '"')
searchTerm = searchTerm.replace('‘', "'").replace('’', "'").replace('‛', "'")
searchTerm = searchTerm.replaceAll('“', '"').replaceAll('”', '"').replaceAll('«', '"').replaceAll('»', '"')
searchTerm = searchTerm.replaceAll('‘', "'").replaceAll('’', "'").replaceAll('‛', "'")
searchTerm = searchTerm.replaceAll('ї', 'і').replaceAll('ґ', 'г') // letter normalization
let newSearchTerm = ''
for (const s of searchTerm) { if (letters.includes(s)) newSearchTerm += s; }
searchTerm = newSearchTerm;
Expand Down
2 changes: 1 addition & 1 deletion index.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion word_dict.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion words.json

Large diffs are not rendered by default.

0 comments on commit 34f627d

Please sign in to comment.