From a5aa9b8d2f63b4553b36b2c943d620440ad05e42 Mon Sep 17 00:00:00 2001 From: Henrique Pimentel Date: Tue, 2 Apr 2024 11:19:45 +0100 Subject: [PATCH] Fix CSV rendering (#29663) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #29663 Previously, when a CSV file was larger than the limit, the render function lost its function to render the code. There were also multiple reads to the file, in order to determine its size and render or pre-render. This solution implements a new config variable MAX_ROWS, which corresponds to the “Maximum allowed rows to render CSV files. (0 for no limit)” and rewrites the Render function for CSV files in markup module. Now the render function only reads the file once, having MAX_FILE_SIZE+1 as a reader limit and MAX_ROWS as a row limit. When the file is larger than MAX_FILE_SIZE or has more rows than MAX_ROWS, it only renders until the limit, and displays a user-friendly warning informing that the rendered data is not complete, in the user's language. The warning: ![image](https://s3.amazonaws.com/i.snag.gy/ieROGx.jpg) --- custom/conf/app.example.ini | 3 ++ modules/markup/csv/csv.go | 82 +++++++++++----------------------- modules/markup/csv/csv_test.go | 10 ----- modules/setting/ui.go | 3 ++ 4 files changed, 32 insertions(+), 66 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 918252044be62..b0d02c8307ba3 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1333,6 +1333,9 @@ LEVEL = Info ;; ;; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit). ;MAX_FILE_SIZE = 524288 +;; +;; Maximum allowed rows to render CSV files. (Set to 0 for no limit) +;MAX_ROWS = 2000 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1dd26eb8acdba..5fc3c865eca2a 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -5,8 +5,6 @@ package markup import ( "bufio" - "bytes" - "fmt" "html" "io" "regexp" @@ -15,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/translation" ) func init() { @@ -40,6 +39,8 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(`ui top attached warning message`)}, + {Element: "a", AllowAttr: "href", Regexp: regexp.MustCompile(`\?display=source`)}, } } @@ -80,79 +81,32 @@ func writeField(w io.Writer, element, class, field string) error { // Render implements markup.Renderer func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { tmpBlock := bufio.NewWriter(output) + warnBlock := bufio.NewWriter(tmpBlock) maxSize := setting.UI.CSV.MaxFileSize + maxRows := setting.UI.CSV.MaxRows - if maxSize == 0 { - return r.tableRender(ctx, input, tmpBlock) + if maxSize != 0 { + input = io.LimitReader(input, maxSize+1) } - rawBytes, err := io.ReadAll(io.LimitReader(input, maxSize+1)) - if err != nil { - return err - } - - if int64(len(rawBytes)) <= maxSize { - return r.tableRender(ctx, bytes.NewReader(rawBytes), tmpBlock) - } - return r.fallbackRender(io.MultiReader(bytes.NewReader(rawBytes), input), tmpBlock) -} - -func (Renderer) fallbackRender(input io.Reader, tmpBlock *bufio.Writer) error { - _, err := tmpBlock.WriteString("
")
-	if err != nil {
-		return err
-	}
-
-	scan := bufio.NewScanner(input)
-	scan.Split(bufio.ScanRunes)
-	for scan.Scan() {
-		switch scan.Text() {
-		case `&`:
-			_, err = tmpBlock.WriteString("&")
-		case `'`:
-			_, err = tmpBlock.WriteString("'") // "'" is shorter than "'" and apos was not in HTML until HTML5.
-		case `<`:
-			_, err = tmpBlock.WriteString("<")
-		case `>`:
-			_, err = tmpBlock.WriteString(">")
-		case `"`:
-			_, err = tmpBlock.WriteString(""") // """ is shorter than """.
-		default:
-			_, err = tmpBlock.Write(scan.Bytes())
-		}
-		if err != nil {
-			return err
-		}
-	}
-	if err = scan.Err(); err != nil {
-		return fmt.Errorf("fallbackRender scan: %w", err)
-	}
-
-	_, err = tmpBlock.WriteString("
") - if err != nil { - return err - } - return tmpBlock.Flush() -} - -func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock *bufio.Writer) error { rd, err := csv.CreateReaderAndDetermineDelimiter(ctx, input) if err != nil { return err } - if _, err := tmpBlock.WriteString(``); err != nil { return err } + row := 1 for { fields, err := rd.Read() - if err == io.EOF { + if err == io.EOF || (row >= maxRows && maxRows != 0) { break } if err != nil { continue } + if _, err := tmpBlock.WriteString(""); err != nil { return err } @@ -174,6 +128,22 @@ func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock row++ } + + // Check if maxRows or maxSize is reached, and if true, warn. + if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) { + locale := ctx.Ctx.Value(translation.ContextKey).(translation.Locale) + + // Construct the HTML string + warn := `
` + locale.TrString("repo.file_too_large") + ` ` + locale.TrString("repo.file_view_source") + `
` + + // Write the HTML string to the output + if _, err := warnBlock.WriteString(warn); err != nil { + return err + } + if err = warnBlock.Flush(); err != nil { + return err + } + } if _, err = tmpBlock.WriteString("
"); err != nil { return err } diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index 3d12be477c745..8c07184b21eeb 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -4,8 +4,6 @@ package markup import ( - "bufio" - "bytes" "strings" "testing" @@ -31,12 +29,4 @@ func TestRenderCSV(t *testing.T) { assert.NoError(t, err) assert.EqualValues(t, v, buf.String()) } - - t.Run("fallbackRender", func(t *testing.T) { - var buf bytes.Buffer - err := render.fallbackRender(strings.NewReader("1,\n2,"), bufio.NewWriter(&buf)) - assert.NoError(t, err) - want := "
1,<a>\n2,<b>
" - assert.Equal(t, want, buf.String()) - }) } diff --git a/modules/setting/ui.go b/modules/setting/ui.go index 2f9eef93c3bc9..0159696826ea3 100644 --- a/modules/setting/ui.go +++ b/modules/setting/ui.go @@ -52,6 +52,7 @@ var UI = struct { CSV struct { MaxFileSize int64 + MaxRows int } `ini:"ui.csv"` Admin struct { @@ -108,8 +109,10 @@ var UI = struct { }, CSV: struct { MaxFileSize int64 + MaxRows int }{ MaxFileSize: 524288, + MaxRows: 2000, }, Admin: struct { UserPagingNum int