diff --git a/modules/exporter/tsv.go b/modules/exporter/tsv.go
index 9ca2550..d986834 100644
--- a/modules/exporter/tsv.go
+++ b/modules/exporter/tsv.go
@@ -59,7 +59,7 @@ func TsvExport(subjectScoreMap map[string]SubjectStudentCore, output string) {
for _, value := range subjectScoreMap {
_, subjectExistYet := subjectsDataMap[value.SubjectCode]
- if !subjectExistYet {
+ if !subjectExistYet && value.SubjectCode != "" {
subjectInfo := SubjectInfo{
SubjectName: value.SubjectName,
SubjectCode: value.SubjectCode,
diff --git a/modules/scanner/scanner.go b/modules/scanner/scanner.go
index 3a63215..1a82356 100644
--- a/modules/scanner/scanner.go
+++ b/modules/scanner/scanner.go
@@ -101,7 +101,7 @@ func scanFile(input string) map[string]exporter.SubjectStudentCore {
}
// Find the review items. Which is
tag with text "Số TC:" and
tag
- allSelection := doc.Find("p:contains(\"Số TC:\"), table")
+ allSelection := doc.Find("p:contains(\"CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM\"), p:contains(\"Số TC:\"), table")
// Global map to store all scores and subject info
var ssScores = make(map[string]exporter.SubjectStudentCore)
@@ -113,12 +113,24 @@ func scanFile(input string) map[string]exporter.SubjectStudentCore {
s := allSelection.Eq(i)
// Check if the current selection is tag
- if s.Is("p") {
+ // Before each table segments, there are
tag with subject data and "header" tag
+ // And the subject data
tag usually next to the "header" tag
+ if s.Is("p") && strings.Contains(strings.ToUpper(s.Text()), "CỘNG HÒA XÃ HỘI CHỦ NGHĨA VIỆT NAM") {
+ // Always subject data is in the next
tag
+ s1 := allSelection.Eq(i + 1)
+
+ // In some cases, the subject data is not next to "header" tag, or just don't have.
+ //Continue and use the previous subject code
+ if !strings.Contains(s1.Text(), "Số TC:") {
+ continue
+ }
+
// Split the string by "Số TC:"
- parts := strings.Split(s.Text(), "Số TC:")
+ parts := strings.Split(s1.Text(), "Số TC:")
// The first part contains the course name, split it by ":"
courseParts := strings.Split(parts[0], ":")
+
subjectName := utils.CleanSubjectName(strings.TrimSpace(courseParts[1]))
// The second part contains the course credit and code, split it by "Mã học phần:"
diff --git a/utils/scanner.go b/utils/scanner.go
index 80ac1aa..2e7b134 100644
--- a/utils/scanner.go
+++ b/utils/scanner.go
@@ -65,6 +65,11 @@ func CleanSubjectName(value string) string {
// Remove last part
valueParts := strings.Split(value, "-")
+
+ if len(valueParts) == 1 {
+ return value
+ }
+
value = strings.TrimSpace(strings.Join(valueParts[:len(valueParts)-1], " "))
return value