Skip to content

Commit

Permalink
Improve SHA1 link detection
Browse files Browse the repository at this point in the history
This improves the SHA1 link detection to not pick up extraneous
non-whitespace characters at the end of the URL. The '.' is a special
case handled in code itself because of missing regexp lookahead
support.

Regex test cases: https://regex101.com/r/xUMlqh/3
  • Loading branch information
silverwind committed Apr 5, 2019
1 parent 0bdd81d commit 3e400a2
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 21 deletions.
47 changes: 31 additions & 16 deletions modules/markup/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ var (
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)

// anySHA1Pattern allows to split url containing SHA into parts
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`)
anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)

validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)

Expand Down Expand Up @@ -594,31 +594,46 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
if m == nil {
return
}
// take out what's relevant

urlFull := node.Data[m[0]:m[1]]
hash := node.Data[m[2]:m[3]]
text := base.ShortSha(node.Data[m[2]:m[3]])

var subtree, line string
// 3rd capture group matches a optional path
subpath := ""
if m[5] > 0 {
subpath = node.Data[m[4]:m[5]]
}

// optional, we do them depending on the length.
// 4th capture group matches a optional url hash
hash := ""
if m[7] > 0 {
line = node.Data[m[6]:m[7]]
hash = node.Data[m[6]:m[7]][1:]
}
if m[5] > 0 {
subtree = node.Data[m[4]:m[5]]

start := m[0]
end := m[1]

// If url ends in '.', it's very likely that it is not part of the
// actual url but used to finish a sentence.
if strings.HasSuffix(urlFull, ".") {
end--
urlFull = urlFull[:len(urlFull)-1]
if hash != "" {
hash = hash[:len(hash)-1]
} else if subpath != "" {
subpath = subpath[:len(subpath)-1]
}
}

text := base.ShortSha(hash)
if subtree != "" {
text += "/" + subtree
if subpath != "" {
text += subpath
}
if line != "" {
text += " ("
text += line
text += ")"

if hash != "" {
text += " (" + hash + ")"
}

replaceContent(node, m[0], m[1], createLink(urlFull, text))
replaceContent(node, start, end, createLink(urlFull, text))
}

// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
Expand Down
10 changes: 5 additions & 5 deletions modules/markup/html_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,12 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
testCases := map[string][]string{
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
"test/unit/event.js",
"L2703",
"/test/unit/event.js",
"#L2703",
},
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
"test/unit/event.js",
"/test/unit/event.js",
"",
},
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
Expand All @@ -288,13 +288,13 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
},
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
"0705be475092aede1eddae01319ec931fb9c65fc",
"src",
"/src",
"",
},
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"",
"diff-2",
"#diff-2",
},
}

Expand Down

0 comments on commit 3e400a2

Please sign in to comment.