-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add warning for BIDI characters in page renders and in diffs #17562
Merged
6543
merged 47 commits into
go-gitea:main
from
zeripath:fix-17514-add-warning-bidi-characters
Jan 7, 2022
Merged
Changes from all commits
Commits
Show all changes
47 commits
Select commit
Hold shift + click to select a range
dca05ee
Add warning for BIDI characters in page renders and in diffs
zeripath 449cb26
as per review
zeripath 40b8628
Adjust to only put the warning on BIDI lines without RTL chars
zeripath 3a63d9d
Another attempt.
zeripath cd0bb29
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath 5f481cf
placate lint
zeripath c89c678
another placation
zeripath 7e9871c
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath f563ee9
as per review
zeripath 70d446b
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath 65dcc39
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath 62345ba
fix broken merge
zeripath 831f189
as per silverwind
zeripath 5a9759c
as per silverwind
zeripath 006a5cd
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath 63a5e0f
fix class
silverwind 8a01b22
make message header colors work on both themes
silverwind 6449cad
minor styling tweaks
silverwind ab03673
fix border-radius on unescape button
silverwind 06b4146
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath b93d0bf
drop buttons as per silverwind
zeripath cf04f2e
as per fnetx
zeripath aa4fc5a
hide the unescape button in the wiki
zeripath 62f557d
add warning triangles to view and blame
zeripath b6ba958
Add warning triangles to diff
zeripath ea7a04a
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath 36dd4bf
Merge branch 'main' into fix-17514-add-warning-bidi-characters
zeripath 19aed47
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath 6a2e274
ensure buttons work on loaded diffs
zeripath 0d6e8f6
move escape functions into their own files
zeripath cb7d19d
extract out functions
zeripath c55394d
Apply suggestions from code review
zeripath ae19a60
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath c11bd34
Update options/locale/locale_en-US.ini
zeripath 58a4fcc
move warning triangle to another column
zeripath 3f6057e
Merge remote-tracking branch 'origin/main' into fix-17514-add-warning…
zeripath 67d00b5
Merge branch 'main' into fix-17514-add-warning-bidi-characters
6543 51a1bf1
Merge branch 'main' into fix-17514-add-warning-bidi-characters
6543 d8ab670
Merge branch 'master' into fix-17514-add-warning-bidi-characters
6543 0fc5af7
linter ignore bool "suspicious assignment to a by-value method receiv…
6543 1dc8a21
fix lint
wxiaoguang 6f99bfd
refactoring
wxiaoguang ab6db78
refactor
wxiaoguang 4e1b449
Apply suggestions from code review
zeripath aac0e1d
Merge pull request #10 from wxiaoguang/fix-17514-add-warning-bidi-cha…
zeripath f66923f
Merge branch 'main' into fix-17514-add-warning-bidi-characters
6543 a28bbbc
Merge branch 'main' into fix-17514-add-warning-bidi-characters
wxiaoguang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
// Copyright 2021 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package charset | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"strings" | ||
"unicode" | ||
"unicode/utf8" | ||
|
||
"golang.org/x/text/unicode/bidi" | ||
) | ||
|
||
// EscapeStatus represents the findings of the unicode escaper | ||
type EscapeStatus struct { | ||
Escaped bool | ||
HasError bool | ||
HasBadRunes bool | ||
HasControls bool | ||
HasSpaces bool | ||
HasMarks bool | ||
HasBIDI bool | ||
BadBIDI bool | ||
HasRTLScript bool | ||
HasLTRScript bool | ||
} | ||
|
||
// Or combines two EscapeStatus structs into one representing the conjunction of the two | ||
func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus { | ||
st := status | ||
st.Escaped = st.Escaped || other.Escaped | ||
st.HasError = st.HasError || other.HasError | ||
st.HasBadRunes = st.HasBadRunes || other.HasBadRunes | ||
st.HasControls = st.HasControls || other.HasControls | ||
st.HasSpaces = st.HasSpaces || other.HasSpaces | ||
st.HasMarks = st.HasMarks || other.HasMarks | ||
st.HasBIDI = st.HasBIDI || other.HasBIDI | ||
st.BadBIDI = st.BadBIDI || other.BadBIDI | ||
st.HasRTLScript = st.HasRTLScript || other.HasRTLScript | ||
st.HasLTRScript = st.HasLTRScript || other.HasLTRScript | ||
return st | ||
} | ||
|
||
// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string | ||
func EscapeControlString(text string) (EscapeStatus, string) { | ||
sb := &strings.Builder{} | ||
escaped, _ := EscapeControlReader(strings.NewReader(text), sb) | ||
return escaped, sb.String() | ||
} | ||
|
||
// EscapeControlBytes escapes the unicode control sequences a provided []byte and returns the findings as an EscapeStatus and the escaped []byte | ||
func EscapeControlBytes(text []byte) (EscapeStatus, []byte) { | ||
buf := &bytes.Buffer{} | ||
escaped, _ := EscapeControlReader(bytes.NewReader(text), buf) | ||
return escaped, buf.Bytes() | ||
} | ||
|
||
// EscapeControlReader escapes the unicode control sequences a provided Reader writing the escaped output to the output and returns the findings as an EscapeStatus and an error | ||
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) { | ||
buf := make([]byte, 4096) | ||
readStart := 0 | ||
var n int | ||
var writePos int | ||
|
||
lineHasBIDI := false | ||
lineHasRTLScript := false | ||
lineHasLTRScript := false | ||
|
||
readingloop: | ||
for err == nil { | ||
n, err = text.Read(buf[readStart:]) | ||
bs := buf[:n+readStart] | ||
i := 0 | ||
|
||
for i < len(bs) { | ||
r, size := utf8.DecodeRune(bs[i:]) | ||
// Now handle the codepoints | ||
switch { | ||
case r == utf8.RuneError: | ||
if writePos < i { | ||
if _, err = output.Write(bs[writePos:i]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
writePos = i | ||
} | ||
// runes can be at most 4 bytes - so... | ||
if len(bs)-i <= 3 { | ||
// if not request more data | ||
copy(buf, bs[i:]) | ||
readStart = n - i | ||
writePos = 0 | ||
continue readingloop | ||
} | ||
// this is a real broken rune | ||
escaped.HasBadRunes = true | ||
escaped.Escaped = true | ||
if err = writeBroken(output, bs[i:i+size]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
writePos += size | ||
case r == '\n': | ||
if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { | ||
escaped.BadBIDI = true | ||
} | ||
lineHasBIDI = false | ||
lineHasRTLScript = false | ||
lineHasLTRScript = false | ||
|
||
case r == '\r' || r == '\t' || r == ' ': | ||
// These are acceptable control characters and space characters | ||
case unicode.IsSpace(r): | ||
escaped.HasSpaces = true | ||
escaped.Escaped = true | ||
if writePos < i { | ||
if _, err = output.Write(bs[writePos:i]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
} | ||
if err = writeEscaped(output, r); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
writePos = i + size | ||
case unicode.Is(unicode.Bidi_Control, r): | ||
escaped.Escaped = true | ||
escaped.HasBIDI = true | ||
if writePos < i { | ||
if _, err = output.Write(bs[writePos:i]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
} | ||
lineHasBIDI = true | ||
if err = writeEscaped(output, r); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
writePos = i + size | ||
case unicode.Is(unicode.C, r): | ||
escaped.Escaped = true | ||
escaped.HasControls = true | ||
if writePos < i { | ||
if _, err = output.Write(bs[writePos:i]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
} | ||
if err = writeEscaped(output, r); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
writePos = i + size | ||
case unicode.Is(unicode.M, r): | ||
escaped.Escaped = true | ||
escaped.HasMarks = true | ||
if writePos < i { | ||
if _, err = output.Write(bs[writePos:i]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
} | ||
if err = writeEscaped(output, r); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
writePos = i + size | ||
default: | ||
p, _ := bidi.Lookup(bs[i : i+size]) | ||
c := p.Class() | ||
if c == bidi.R || c == bidi.AL { | ||
lineHasRTLScript = true | ||
escaped.HasRTLScript = true | ||
} else if c == bidi.L { | ||
lineHasLTRScript = true | ||
escaped.HasLTRScript = true | ||
} | ||
} | ||
i += size | ||
} | ||
if n > 0 { | ||
// we read something... | ||
// write everything unwritten | ||
if writePos < i { | ||
if _, err = output.Write(bs[writePos:i]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
} | ||
|
||
// reset the starting positions for the next read | ||
readStart = 0 | ||
writePos = 0 | ||
} | ||
} | ||
if readStart > 0 { | ||
// this means that there is an incomplete or broken rune at 0-readStart and we read nothing on the last go round | ||
escaped.Escaped = true | ||
escaped.HasBadRunes = true | ||
if err = writeBroken(output, buf[:readStart]); err != nil { | ||
escaped.HasError = true | ||
return | ||
} | ||
} | ||
if err == io.EOF { | ||
if lineHasBIDI && !lineHasRTLScript && lineHasLTRScript { | ||
escaped.BadBIDI = true | ||
} | ||
err = nil | ||
return | ||
} | ||
escaped.HasError = true | ||
return | ||
} | ||
|
||
func writeBroken(output io.Writer, bs []byte) (err error) { | ||
_, err = fmt.Fprintf(output, `<span class="broken-code-point"><%X></span>`, bs) | ||
return | ||
} | ||
|
||
func writeEscaped(output io.Writer, r rune) (err error) { | ||
_, err = fmt.Fprintf(output, `<span class="escaped-code-point" data-escaped="[U+%04X]"><span class="char">%c</span></span>`, r, r) | ||
return | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could be simplified to a uint32
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think flagging this would be a good idea.