diff --git a/modules/exporter/tsv.go b/modules/exporter/tsv.go index 9ca2550..d986834 100644 --- a/modules/exporter/tsv.go +++ b/modules/exporter/tsv.go @@ -59,7 +59,7 @@ func TsvExport(subjectScoreMap map[string]SubjectStudentCore, output string) { for _, value := range subjectScoreMap { _, subjectExistYet := subjectsDataMap[value.SubjectCode] - if !subjectExistYet { + if !subjectExistYet && value.SubjectCode != "" { subjectInfo := SubjectInfo{ SubjectName: value.SubjectName, SubjectCode: value.SubjectCode, diff --git a/modules/scanner/scanner.go b/modules/scanner/scanner.go index 3a63215..1a82356 100644 --- a/modules/scanner/scanner.go +++ b/modules/scanner/scanner.go @@ -101,7 +101,7 @@ func scanFile(input string) map[string]exporter.SubjectStudentCore { } // Find the review items. Which is

tag with text "Số TC:" and tag - allSelection := doc.Find("p:contains(\"Số TC:\"), table") + allSelection := doc.Find("p:contains(\"CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM\"), p:contains(\"Số TC:\"), table") // Global map to store all scores and subject info var ssScores = make(map[string]exporter.SubjectStudentCore) @@ -113,12 +113,24 @@ func scanFile(input string) map[string]exporter.SubjectStudentCore { s := allSelection.Eq(i) // Check if the current selection is

tag - if s.Is("p") { + // Before each table segments, there are

tag with subject data and "header" tag + // And the subject data

tag usually next to the "header" tag + if s.Is("p") && strings.Contains(strings.ToUpper(s.Text()), "CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM") { + // Always subject data is in the next

tag + s1 := allSelection.Eq(i + 1) + + // In some cases, the subject data is not next to "header" tag, or just don't have. + //Continue and use the previous subject code + if !strings.Contains(s1.Text(), "Số TC:") { + continue + } + // Split the string by "Số TC:" - parts := strings.Split(s.Text(), "Số TC:") + parts := strings.Split(s1.Text(), "Số TC:") // The first part contains the course name, split it by ":" courseParts := strings.Split(parts[0], ":") + subjectName := utils.CleanSubjectName(strings.TrimSpace(courseParts[1])) // The second part contains the course credit and code, split it by "Mã học phần:" diff --git a/utils/scanner.go b/utils/scanner.go index 80ac1aa..2e7b134 100644 --- a/utils/scanner.go +++ b/utils/scanner.go @@ -65,6 +65,11 @@ func CleanSubjectName(value string) string { // Remove last part valueParts := strings.Split(value, "-") + + if len(valueParts) == 1 { + return value + } + value = strings.TrimSpace(strings.Join(valueParts[:len(valueParts)-1], " ")) return value