Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: get image blob in backend #495

Merged
merged 2 commits into from
Nov 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions plugin/crawler/website.go → plugin/http_getter/html_meta.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package crawler
package getter

import (
"fmt"
"io"
"net/http"
urlUtil "net/url"
"net/url"

"golang.org/x/net/html"
"golang.org/x/net/html/atom"
Expand All @@ -15,19 +16,26 @@ type HTMLMeta struct {
Image string `json:"image"`
}

func GetWebsiteMeta(url string) (*HTMLMeta, error) {
if _, err := urlUtil.Parse(url); err != nil {
func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
if _, err := url.Parse(urlStr); err != nil {
return nil, err
}

response, err := http.Get(url)
response, err := http.Get(urlStr)

Check failure

Code scanning / CodeQL

Uncontrolled data used in network request

The [URL](1) of this request depends on a [user-provided value](2).
if err != nil {
return nil, err
}
defer response.Body.Close()

htmlMeta := extractHTMLMeta(response.Body)
mediatype, err := getMediatype(response)
if err != nil {
return nil, err
}
if mediatype != "text/html" {
return nil, fmt.Errorf("Wrong website mediatype")
}

htmlMeta := extractHTMLMeta(response.Body)
return htmlMeta, nil
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
package crawler
package getter

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestGetWebsiteMeta(t *testing.T) {
func TestGetHTMLMeta(t *testing.T) {
tests := []struct {
url string
urlStr string
htmlMeta HTMLMeta
}{
{
url: "https://baidu.com",
urlStr: "https://baidu.com",
htmlMeta: HTMLMeta{
Title: "百度一下,你就知道",
},
},
{
url: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
urlStr: "https://www.bytebase.com/blog/sql-review-tool-for-devs",
htmlMeta: HTMLMeta{
Title: "The SQL Review Tool for Developers",
Description: "Reviewing SQL can be somewhat tedious, yet is essential to keep your database fleet reliable. At Bytebase, we are building a developer-first SQL review tool to empower the DevOps system.",
Expand All @@ -27,7 +27,7 @@ func TestGetWebsiteMeta(t *testing.T) {
},
}
for _, test := range tests {
metadata, err := GetWebsiteMeta(test.url)
metadata, err := GetHTMLMeta(test.urlStr)
require.NoError(t, err)
require.Equal(t, test.htmlMeta, *metadata)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// crawler is using to get resources from url.
// getter is using to get resources from url.
// * Get metadata for website;
// * Get image blob to avoid CORS;
package crawler
package getter
45 changes: 45 additions & 0 deletions plugin/http_getter/image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package getter

import (
"fmt"
"io"
"net/http"
"net/url"
"strings"
)

type Image struct {
Blob []byte
Mediatype string
}

func GetImage(urlStr string) (*Image, error) {
if _, err := url.Parse(urlStr); err != nil {
return nil, err
}

response, err := http.Get(urlStr)

Check failure

Code scanning / CodeQL

Uncontrolled data used in network request

The [URL](1) of this request depends on a [user-provided value](2).
if err != nil {
return nil, err
}
defer response.Body.Close()

mediatype, err := getMediatype(response)
if err != nil {
return nil, err
}
if !strings.HasPrefix(mediatype, "image/") {
return nil, fmt.Errorf("Wrong image mediatype")
}

bodyBytes, err := io.ReadAll(response.Body)
if err != nil {
return nil, err
}

image := &Image{
Blob: bodyBytes,
Mediatype: mediatype,
}
return image, nil
}
21 changes: 21 additions & 0 deletions plugin/http_getter/image_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package getter

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestGetImage(t *testing.T) {
tests := []struct {
urlStr string
}{
{
urlStr: "https://star-history.com/bytebase.webp",
},
}
for _, test := range tests {
_, err := GetImage(test.urlStr)
require.NoError(t, err)
}
}
15 changes: 15 additions & 0 deletions plugin/http_getter/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package getter

import (
"mime"
"net/http"
)

func getMediatype(response *http.Response) (string, error) {
contentType := response.Header.Get("content-type")
mediatype, _, err := mime.ParseMediaType(contentType)
if err != nil {
return "", err
}
return mediatype, nil
}
38 changes: 0 additions & 38 deletions server/crawler.go

This file was deleted.

70 changes: 70 additions & 0 deletions server/http_getter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package server

import (
"encoding/json"
"fmt"
"net/http"
"net/url"

"github.com/labstack/echo/v4"
getter "github.com/usememos/memos/plugin/http_getter"
metric "github.com/usememos/memos/plugin/metrics"
)

func (s *Server) registerCrawlerPublicRoutes(g *echo.Group) {
g.GET("/get/httpmeta", func(c echo.Context) error {
ctx := c.Request().Context()
urlStr := c.QueryParam("url")
if urlStr == "" {
return echo.NewHTTPError(http.StatusBadRequest, "Missing website url")
}
if _, err := url.Parse(urlStr); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
}

htmlMeta, err := getter.GetHTMLMeta(urlStr)
if err != nil {
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get website meta with url: %s", urlStr)).SetInternal(err)
}
s.Collector.Collect(ctx, &metric.Metric{
Name: "getter used",
Labels: map[string]string{
"type": "httpmeta",
},
})

c.Response().Header().Set(echo.HeaderContentType, echo.MIMEApplicationJSONCharsetUTF8)
if err := json.NewEncoder(c.Response().Writer).Encode(composeResponse(htmlMeta)); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to encode website HTML meta").SetInternal(err)
}
return nil
})
g.GET("/get/image", func(c echo.Context) error {
ctx := c.Request().Context()
urlStr := c.QueryParam("url")
if urlStr == "" {
return echo.NewHTTPError(http.StatusBadRequest, "Missing image url")
}
if _, err := url.Parse(urlStr); err != nil {
return echo.NewHTTPError(http.StatusBadRequest, "Wrong url").SetInternal(err)
}

image, err := getter.GetImage(urlStr)
if err != nil {
return echo.NewHTTPError(http.StatusNotAcceptable, fmt.Sprintf("Failed to get image url: %s", urlStr)).SetInternal(err)
}
s.Collector.Collect(ctx, &metric.Metric{
Name: "getter used",
Labels: map[string]string{
"type": "image",
},
})

c.Response().Writer.WriteHeader(http.StatusOK)
c.Response().Writer.Header().Set("Content-Type", image.Mediatype)
if _, err := c.Response().Writer.Write(image.Blob); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write image blob").SetInternal(err)
}
return nil
})
}
1 change: 0 additions & 1 deletion server/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ func (s *Server) registerResourceRoutes(g *echo.Group) {
if _, err := c.Response().Writer.Write(resource.Blob); err != nil {
return echo.NewHTTPError(http.StatusInternalServerError, "Failed to write resource blob").SetInternal(err)
}

return nil
})

Expand Down
3 changes: 2 additions & 1 deletion web/src/labs/marked/parser/Image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ const renderer = (rawStr: string): string => {
return rawStr;
}

return `<img class='img' src='${escape(matchResult[1])}' />`;
// NOTE: Get image blob from backend to avoid CORS.
return `<img class='img' src='/o/get/image?url=${escape(matchResult[1])}' />`;
};

export default {
Expand Down