Skip to content
This repository has been archived by the owner on Feb 25, 2023. It is now read-only.

Commit

Permalink
Merge pull request #40 from stephenmk/master
Browse files Browse the repository at this point in the history
New version of JMdict for Yomichan
  • Loading branch information
FooSoft authored Jan 30, 2023
2 parents 9222417 + 0b328e1 commit 74de4ce
Show file tree
Hide file tree
Showing 28 changed files with 2,360 additions and 321 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2016-2022 Alex Yatskov
Copyright 2016-2023 Yomichan-Import Authors

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
Expand Down
87 changes: 66 additions & 21 deletions common.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ const (
DefaultTitle = ""
)

const databaseFormat = 3

type dbRecord []interface{}
type dbRecord []any
type dbRecordList []dbRecord

type dbTag struct {
Expand All @@ -46,7 +44,7 @@ func (meta dbTagList) crush() dbRecordList {
type dbMeta struct {
Expression string
Mode string
Data interface{}
Data any
}

type dbMetaList []dbMeta
Expand All @@ -66,7 +64,7 @@ type dbTerm struct {
DefinitionTags []string
Rules []string
Score int
Glossary []string
Glossary []any
Sequence int
TermTags []string
}
Expand Down Expand Up @@ -142,11 +140,34 @@ func (kanji dbKanjiList) crush() dbRecordList {
return results
}

func writeDb(outputPath, title, revision string, sequenced bool, recordData map[string]dbRecordList, stride int, pretty bool) error {
type dbIndex struct {
Title string `json:"title"`
Format int `json:"format"`
Revision string `json:"revision"`
Sequenced bool `json:"sequenced"`
Author string `json:"author"`
Url string `json:"url"`
Description string `json:"description"`
Attribution string `json:"attribution"`
}

func (index *dbIndex) setDefaults() {
if index.Format == 0 {
index.Format = 3
}
if index.Author == "" {
index.Author = "yomichan-import"
}
if index.Url == "" {
index.Url = "https://github.com/FooSoft/yomichan-import"
}
}

func writeDb(outputPath string, index dbIndex, recordData map[string]dbRecordList, stride int, pretty bool) error {
var zbuff bytes.Buffer
zip := zip.NewWriter(&zbuff)

marshalJSON := func(obj interface{}, pretty bool) ([]byte, error) {
marshalJSON := func(obj any, pretty bool) ([]byte, error) {
if pretty {
return json.MarshalIndent(obj, "", " ")
}
Expand Down Expand Up @@ -186,25 +207,15 @@ func writeDb(outputPath, title, revision string, sequenced bool, recordData map[
}

var err error
var db struct {
Title string `json:"title"`
Format int `json:"format"`
Revision string `json:"revision"`
Sequenced bool `json:"sequenced"`
}

db.Title = title
db.Format = databaseFormat
db.Revision = revision
db.Sequenced = sequenced

for recordType, recordEntries := range recordData {
if _, err := writeDbRecords(recordType, recordEntries); err != nil {
return err
}
}

bytes, err := marshalJSON(db, pretty)
index.setDefaults()
bytes, err := marshalJSON(index, pretty)
if err != nil {
return err
}
Expand Down Expand Up @@ -252,6 +263,39 @@ func hasString(needle string, haystack []string) bool {
return false
}

func intersection(s1, s2 []string) []string {
s := []string{}
m := make(map[string]bool)
for _, e := range s1 {
m[e] = true
}
for _, e := range s2 {
if m[e] {
s = append(s, e)
m[e] = false
}
}
return s
}

func union(s1, s2 []string) []string {
s := []string{}
m := make(map[string]bool)
for _, e := range s1 {
if !m[e] {
s = append(s, e)
m[e] = true
}
}
for _, e := range s2 {
if !m[e] {
s = append(s, e)
m[e] = true
}
}
return s
}

func detectFormat(path string) (string, error) {
switch filepath.Ext(path) {
case ".sqlite":
Expand All @@ -263,7 +307,7 @@ func detectFormat(path string) (string, error) {
}

switch filepath.Base(path) {
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml", "JMdict_e_examp":
return "edict", nil
case "JMnedict", "JMnedict.xml":
return "enamdict", nil
Expand Down Expand Up @@ -293,7 +337,8 @@ func detectFormat(path string) (string, error) {

func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, string, int, bool) error{
"edict": jmdictExportDb,
"edict": jmdExportDb,
"forms": formsExportDb,
"enamdict": jmnedictExportDb,
"epwing": epwingExportDb,
"kanjidic": kanjidicExportDb,
Expand Down
4 changes: 2 additions & 2 deletions daijirin.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
for _, reading := range readings {
term := dbTerm{
Expression: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand All @@ -79,7 +79,7 @@ func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
term := dbTerm{
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand Down
4 changes: 2 additions & 2 deletions daijisen.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
if len(expressions) == 0 {
term := dbTerm{
Expression: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand All @@ -82,7 +82,7 @@ func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
term := dbTerm{
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Glossary: []any{entry.Text},
Sequence: sequence,
}

Expand Down
Loading

0 comments on commit 74de4ce

Please sign in to comment.