Skip to content
This repository has been archived by the owner on Feb 25, 2023. It is now read-only.

New version of JMdict for Yomichan #40

Merged
merged 19 commits into from
Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2016-2022 Alex Yatskov
Copyright 2016-2023 Yomichan-Import Authors

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
Expand Down
86 changes: 65 additions & 21 deletions common.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ const (
DefaultTitle = ""
)

const databaseFormat = 3

type dbRecord []interface{}
type dbRecord []any
type dbRecordList []dbRecord

type dbTag struct {
Expand All @@ -46,7 +44,7 @@ func (meta dbTagList) crush() dbRecordList {
type dbMeta struct {
Expression string
Mode string
Data interface{}
Data any
}

type dbMetaList []dbMeta
Expand All @@ -66,7 +64,7 @@ type dbTerm struct {
DefinitionTags []string
Rules []string
Score int
Glossary []string
Glossary []any
Sequence int
TermTags []string
}
Expand Down Expand Up @@ -142,11 +140,34 @@ func (kanji dbKanjiList) crush() dbRecordList {
return results
}

func writeDb(outputPath, title, revision string, sequenced bool, recordData map[string]dbRecordList, stride int, pretty bool) error {
type dbIndex struct {
Title string `json:"title"`
Format int `json:"format"`
Revision string `json:"revision"`
Sequenced bool `json:"sequenced"`
Author string `json:"author"`
Url string `json:"url"`
Description string `json:"description"`
Attribution string `json:"attribution"`
}

func (index *dbIndex) setDefaults() {
stephenmk marked this conversation as resolved.
Show resolved Hide resolved
if index.Format == 0 {
index.Format = 3
}
if index.Author == "" {
index.Author = "yomichan-import"
}
if index.Url == "" {
index.Url = "https://github.com/FooSoft/yomichan-import"
}
}

func writeDb(outputPath string, index dbIndex, recordData map[string]dbRecordList, stride int, pretty bool) error {
var zbuff bytes.Buffer
zip := zip.NewWriter(&zbuff)

marshalJSON := func(obj interface{}, pretty bool) ([]byte, error) {
marshalJSON := func(obj any, pretty bool) ([]byte, error) {
if pretty {
return json.MarshalIndent(obj, "", " ")
}
Expand Down Expand Up @@ -186,25 +207,14 @@ func writeDb(outputPath, title, revision string, sequenced bool, recordData map[
}

var err error
var db struct {
Title string `json:"title"`
Format int `json:"format"`
Revision string `json:"revision"`
Sequenced bool `json:"sequenced"`
}

db.Title = title
db.Format = databaseFormat
db.Revision = revision
db.Sequenced = sequenced

for recordType, recordEntries := range recordData {
if _, err := writeDbRecords(recordType, recordEntries); err != nil {
return err
}
}

bytes, err := marshalJSON(db, pretty)
bytes, err := marshalJSON(index, pretty)
if err != nil {
return err
}
Expand Down Expand Up @@ -252,6 +262,39 @@ func hasString(needle string, haystack []string) bool {
return false
}

func intersection(s1, s2 []string) []string {
s := []string{}
m := make(map[string]bool)
for _, e := range s1 {
m[e] = true
}
for _, e := range s2 {
if m[e] {
s = append(s, e)
m[e] = false
}
}
return s
}

func union(s1, s2 []string) []string {
s := []string{}
m := make(map[string]bool)
for _, e := range s1 {
if !m[e] {
s = append(s, e)
m[e] = true
}
}
for _, e := range s2 {
if !m[e] {
s = append(s, e)
m[e] = true
}
}
return s
}

func detectFormat(path string) (string, error) {
switch filepath.Ext(path) {
case ".sqlite":
Expand All @@ -263,7 +306,7 @@ func detectFormat(path string) (string, error) {
}

switch filepath.Base(path) {
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml", "JMdict_e_examp":
return "edict", nil
case "JMnedict", "JMnedict.xml":
return "enamdict", nil
Expand Down Expand Up @@ -293,7 +336,8 @@ func detectFormat(path string) (string, error) {

func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, string, int, bool) error{
"edict": jmdictExportDb,
"edict": jmdExportDb,
"forms": formsExportDb,
"enamdict": jmnedictExportDb,
"epwing": epwingExportDb,
"kanjidic": kanjidicExportDb,
Expand Down
4 changes: 2 additions & 2 deletions daijirin.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
for _, reading := range readings {
term := dbTerm{
Expression: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand All @@ -79,7 +79,7 @@ func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
term := dbTerm{
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand Down
4 changes: 2 additions & 2 deletions daijisen.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
if len(expressions) == 0 {
term := dbTerm{
Expression: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand All @@ -82,7 +82,7 @@ func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
term := dbTerm{
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand Down
Loading