diff --git a/go.mod b/go.mod index f24ebb7bc..c1eec141f 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/gin-contrib/requestid v0.0.6 github.com/gin-contrib/static v0.0.1 github.com/gin-gonic/gin v1.9.1 + github.com/go-shiori/go-epub v1.2.0 github.com/go-shiori/go-readability v0.0.0-20230421032831-c66949dfc0ad github.com/go-shiori/warc v0.0.0-20200621032813-359908319d1d github.com/go-sql-driver/mysql v1.7.1 @@ -57,6 +58,7 @@ require ( github.com/go-playground/validator/v10 v10.15.3 // indirect github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c // indirect github.com/goccy/go-json v0.10.2 // indirect + github.com/gofrs/uuid/v5 v5.0.0 // indirect github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect github.com/google/uuid v1.3.1 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect @@ -81,6 +83,7 @@ require ( github.com/tdewolff/parse v2.3.4+incompatible // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect + github.com/vincent-petithory/dataurl v1.0.0 // indirect go.etcd.io/bbolt v1.3.7 // indirect go.uber.org/atomic v1.11.0 // indirect golang.org/x/arch v0.5.0 // indirect diff --git a/go.sum b/go.sum index 0b1e1a612..5f88aa22f 100644 --- a/go.sum +++ b/go.sum @@ -87,6 +87,8 @@ github.com/go-playground/validator/v10 v10.15.3/go.mod h1:9iXMNT7sEkjXb0I+enO7QX github.com/go-shiori/dom v0.0.0-20190930082056-9d974a4f8b25/go.mod h1:360KoNl36ftFYhjLHuEty78kWUGw8i1opEicvIDLfRk= github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c h1:wpkoddUomPfHiOziHZixGO5ZBS73cKqVzZipfrLmO1w= github.com/go-shiori/dom v0.0.0-20230515143342-73569d674e1c/go.mod h1:oVDCh3qjJMLVUSILBRwrm+Bc6RNXGZYtoh9xdvf1ffM= +github.com/go-shiori/go-epub v1.2.0 h1:c2b3DblHpNIiD8ISlQ+0Mc/tsRmn1mX1l6Q/0LzavN4= +github.com/go-shiori/go-epub v1.2.0/go.mod h1:gQCqrK+dIMLA7JMd8GxdBvhn811wb7XCa733RxWfPYw= github.com/go-shiori/go-readability v0.0.0-20230421032831-c66949dfc0ad h1:3VP5Q8Mh165h2DHmXWFT4LJlwwvgTRlEuoe2vnsVnJ4= github.com/go-shiori/go-readability v0.0.0-20230421032831-c66949dfc0ad/go.mod h1:2DpZlTJO/ycxp/vsc/C11oUyveStOgIXB88SYV1lncI= github.com/go-shiori/warc v0.0.0-20200621032813-359908319d1d h1:+SEf4hYDaAt2eyq8Xu3YyWCpnMsK8sZfbYsDRFCUgBM= @@ -258,6 +260,8 @@ github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLY github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/vincent-petithory/dataurl v1.0.0 h1:cXw+kPto8NLuJtlMsI152irrVw9fRDX8AbShPRpg2CI= +github.com/vincent-petithory/dataurl v1.0.0/go.mod h1:FHafX5vmDzyP+1CQATJn7WFKc9CvnvxyvZy6I1MrG/U= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= diff --git a/internal/core/ebook.go b/internal/core/ebook.go index 6fd45464d..d3720cce1 100644 --- a/internal/core/ebook.go +++ b/internal/core/ebook.go @@ -1,17 +1,13 @@ package core import ( - "archive/zip" "fmt" - "io" - "log" - "net/http" "os" fp "path/filepath" - "regexp" "strconv" "strings" + epub "github.com/go-shiori/go-epub" "github.com/go-shiori/shiori/internal/model" "github.com/pkg/errors" ) @@ -20,8 +16,6 @@ import ( // The destination path `dstPath` should include file name with ".epub" extension // The bookmark model will be used to update the UI based on whether this function is successful or not. func GenerateEbook(req ProcessRequest, dstPath string) (book model.Bookmark, err error) { - // variable for store generated html code - var html string book = req.Bookmark @@ -30,8 +24,7 @@ func GenerateEbook(req ProcessRequest, dstPath string) (book model.Bookmark, err return book, errors.New("bookmark ID is not valid") } - // get current state of bookmark - // cheak archive and thumb + // Get current state of bookmark cheak archive and thumb strID := strconv.Itoa(book.ID) imagePath := fp.Join(req.DataDir, "thumb", fmt.Sprintf("%d", book.ID)) @@ -45,192 +38,45 @@ func GenerateEbook(req ProcessRequest, dstPath string) (book model.Bookmark, err book.HasArchive = true } - // this function create ebook from reader mode of bookmark so + // This function create ebook from reader mode of bookmark so // we can't create ebook from PDF so we return error here if bookmark is a pdf contentType := req.ContentType if strings.Contains(contentType, "application/pdf") { return book, errors.New("can't create ebook for pdf") } - // create temporary epub file + // Create temporary epub file tmpFile, err := os.CreateTemp("", "ebook") if err != nil { return book, errors.Wrap(err, "can't create temporary EPUB file") } defer os.Remove(tmpFile.Name()) - // Create zip archive - epubWriter := zip.NewWriter(tmpFile) + // Create last line of ebook + lastline := `

Generated By Shiori From This Page

` - // Create the mimetype file - mimetypeWriter, err := epubWriter.Create("mimetype") + // Create ebook + ebook, err := epub.NewEpub(book.Title) if err != nil { - return book, errors.Wrap(err, "can't create mimetype") - } - _, err = mimetypeWriter.Write([]byte("application/epub+zip")) - if err != nil { - return book, errors.Wrap(err, "can't write into mimetype file") - } - - // Create the container.xml file - containerWriter, err := epubWriter.Create("META-INF/container.xml") - if err != nil { - return book, errors.Wrap(err, "can't create container.xml") - } - - _, err = containerWriter.Write([]byte(` - - - - -`)) - if err != nil { - return book, errors.Wrap(err, "can't write into container.xml file") - } - - contentOpfWriter, err := epubWriter.Create("OEBPS/content.opf") - if err != nil { - return book, errors.Wrap(err, "can't create content.opf") - } - _, err = contentOpfWriter.Write([]byte(` - - - ` + book.Title + ` - - - - - - - - - -`)) - if err != nil { - return book, errors.Wrap(err, "can't write into container.opf file") + return book, errors.Wrap(err, "can't create EPUB") } - // Create the style.css file - styleWriter, err := epubWriter.Create("style.css") - if err != nil { - return book, errors.Wrap(err, "can't create content.xml") - } - _, err = styleWriter.Write([]byte(`content { - display: block; - font-size: 1em; - line-height: 1.2; - padding-left: 0; - padding-right: 0; - text-align: justify; - margin: 0 5pt -} -img { - margin: auto; - display: block; -}`)) + ebook.SetTitle(book.Title) + ebook.SetAuthor(book.Author) + ebook.SetDescription(book.Excerpt) + _, err = ebook.AddSection(`

`+book.Title+`

`+book.HTML+lastline, book.Title, "", "") if err != nil { - return book, errors.Wrap(err, "can't write into style.css file") + return book, errors.Wrap(err, "can't add ebook Section") } - // Create the toc.ncx file - tocNcxWriter, err := epubWriter.Create("OEBPS/toc.ncx") + ebook.EmbedImages() + err = ebook.Write(tmpFile.Name()) if err != nil { - return book, errors.Wrap(err, "can't create toc.ncx") + return book, errors.Wrap(err, "can't create ebook file") } - _, err = tocNcxWriter.Write([]byte(` - - - - - - - - - - ` + book.Title + ` - - - - - ` + book.Title + ` - - - - -`)) - if err != nil { - return book, errors.Wrap(err, "can't write into toc.ncx file") - } - - // get list of images tag in html - imageList, _ := GetImages(book.HTML) - imgRegex := regexp.MustCompile(``) - - // Create a set to store unique image URLs - imageSet := make(map[string]bool) - - // Download image in html file and generate new html - html = book.HTML - for _, match := range imgRegex.FindAllStringSubmatch(book.HTML, -1) { - imageURL := match[1] - if _, ok := imageList[imageURL]; ok && !imageSet[imageURL] { - // Add the image URL to the set - imageSet[imageURL] = true - - // Download the image - resp, err := http.Get(imageURL) - if err != nil { - log.Fatal(err) - } - defer resp.Body.Close() - // Get the image data - imageData, err := io.ReadAll(resp.Body) - if err != nil { - return book, errors.Wrap(err, "can't get image from the internet") - } - - fileName := fp.Base(imageURL) - filePath := "images/" + fileName - imageWriter, err := epubWriter.Create(filePath) - if err != nil { - log.Fatal(err) - } - - // Write the image to the file - _, err = imageWriter.Write(imageData) - if err != nil { - return book, errors.Wrap(err, "can't create image file") - } - // Replace the image tag with the new downloaded image - html = strings.ReplaceAll(html, match[0], fmt.Sprintf(``, filePath)) - } - } - // Create the content.html file - contentHtmlWriter, err := epubWriter.Create("OEBPS/content.html") - if err != nil { - return book, errors.Wrap(err, "can't create content.xml") - } - _, err = contentHtmlWriter.Write([]byte("\n\n\n\t" + book.Title + "\n\t\n\n\n\t

" + book.Title + "

" + "\n\n" + html + "\n" + "\n")) - if err != nil { - return book, errors.Wrap(err, "can't write into content.html") - } - // close epub and tmpFile - err = epubWriter.Close() - if err != nil { - return book, errors.Wrap(err, "failed to close EPUB writer") - } - err = tmpFile.Close() - if err != nil { - return book, errors.Wrap(err, "failed to close temporary EPUB file") - } - // open temporary file again - tmpFile, err = os.Open(tmpFile.Name()) - if err != nil { - return book, errors.Wrap(err, "can't open temporary EPUB file") - } defer tmpFile.Close() - // if everitings go well we start move ebook to dstPath + + // If everything go well we move ebook to dstPath err = MoveFileToDestination(dstPath, tmpFile) if err != nil { return book, errors.Wrap(err, "failed move ebook to destination") @@ -239,29 +85,3 @@ img { book.HasEbook = true return book, nil } - -// function get html and return list of image url inside html file -func GetImages(html string) (map[string]string, error) { - // Regular expression to match image tags and their URLs - imageTagRegex := regexp.MustCompile(``) - - // Find all matches in the HTML string - imageTagMatches := imageTagRegex.FindAllStringSubmatch(html, -1) - // Create a dictionary to store the image URLs - images := make(map[string]string) - - // Check if there are any matches - if len(imageTagMatches) == 0 { - return nil, nil - } - - // Loop through all the matches and add them to the dictionary - for _, match := range imageTagMatches { - imageURL := match[1] - if !strings.HasPrefix(imageURL, "data:image/") { - images[imageURL] = match[0] - } - } - - return images, nil -} diff --git a/internal/core/ebook_test.go b/internal/core/ebook_test.go index 25b0eca0d..da7072021 100644 --- a/internal/core/ebook_test.go +++ b/internal/core/ebook_test.go @@ -171,74 +171,3 @@ func TestGenerateEbook(t *testing.T) { }) }) } - -// Add more unit tests for other scenarios that missing specialy -// can't create ebook directory and can't write situatuin -// writing inside zip file -// html variable that not export and image download loop - -func TestGetImages(t *testing.T) { - // Test case 1: HTML with no image tags - html1 := `

Hello, World!

` - expected1 := make(map[string]string) - result1, err1 := core.GetImages(html1) - if err1 != nil { - t.Errorf("Unexpected error: %v", err1) - } - if len(result1) != len(expected1) { - t.Errorf("Expected %d images, but got %d", len(expected1), len(result1)) - } - - // Test case 2: HTML with one image tag - html2 := `` - expected2 := map[string]string{"image1.jpg": ""} - result2, err2 := core.GetImages(html2) - if err2 != nil { - t.Errorf("Unexpected error: %v", err2) - } - if len(result2) != len(expected2) { - t.Errorf("Expected %d images, but got %d", len(expected2), len(result2)) - } - for key, value := range expected2 { - if result2[key] != value { - t.Errorf("Expected image URL %s with tag %s, but got %s", key, value, result2[key]) - } - } - - // Test case 3: HTML with multiple image tags - html3 := `` - expected3 := map[string]string{ - "image1.jpg": "", - "image2.jpg": "", - } - result3, err3 := core.GetImages(html3) - if err3 != nil { - t.Errorf("Unexpected error: %v", err3) - } - if len(result3) != len(expected3) { - t.Errorf("Expected %d images, but got %d", len(expected3), len(result3)) - } - for key, value := range expected3 { - if result3[key] != value { - t.Errorf("Expected image URL %s with tag %s, but got %s", key, value, result3[key]) - } - } - // Test case 4: HTML with multiple image tags with duplicayr - html4 := `` - expected4 := map[string]string{ - "image1.jpg": "", - "image2.jpg": "", - } - result4, err4 := core.GetImages(html4) - if err4 != nil { - t.Errorf("Unexpected error: %v", err4) - } - if len(result4) != len(expected4) { - t.Errorf("Expected %d images, but got %d", len(expected4), len(result4)) - } - for key, value := range expected4 { - if result4[key] != value { - t.Errorf("Expected image URL %s with tag %s, but got %s", key, value, result4[key]) - } - } -}