Skip to content

Commit

Permalink
Add more checks in migration code (go-gitea#21011)
Browse files Browse the repository at this point in the history
Backport go-gitea#21011

When migrating add several more important sanity checks:

* SHAs must be SHAs
* Refs must be valid Refs
* URLs must be reasonable

Signed-off-by: Andrew Thornton <[email protected]>
  • Loading branch information
zeripath committed Sep 4, 2022
1 parent bd1412c commit a46aaa5
Show file tree
Hide file tree
Showing 24 changed files with 693 additions and 281 deletions.
2 changes: 1 addition & 1 deletion models/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ func (a *Action) GetRefLink() string {
return a.GetRepoLink() + "/src/branch/" + util.PathEscapeSegments(strings.TrimPrefix(a.RefName, git.BranchPrefix))
case strings.HasPrefix(a.RefName, git.TagPrefix):
return a.GetRepoLink() + "/src/tag/" + util.PathEscapeSegments(strings.TrimPrefix(a.RefName, git.TagPrefix))
case len(a.RefName) == 40 && git.SHAPattern.MatchString(a.RefName):
case len(a.RefName) == 40 && git.IsValidSHAPattern(a.RefName):
return a.GetRepoLink() + "/src/commit/" + a.RefName
default:
// FIXME: we will just assume it's a branch - this was the old way - at some point we may want to enforce that there is always a ref here.
Expand Down
28 changes: 27 additions & 1 deletion modules/git/ref.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

package git

import "strings"
import (
"regexp"
"strings"
)

const (
// RemotePrefix is the base directory of the remotes information of git.
Expand All @@ -15,6 +18,29 @@ const (
pullLen = len(PullPrefix)
)

// refNamePatternInvalid is regular expression with unallowed characters in git reference name
// They cannot have ASCII control characters (i.e. bytes whose values are lower than \040, or \177 DEL), space, tilde ~, caret ^, or colon : anywhere.
// They cannot have question-mark ?, asterisk *, or open bracket [ anywhere
var refNamePatternInvalid = regexp.MustCompile(
`[\000-\037\177 \\~^:?*[]|` + // No absolutely invalid characters
`(?:^[/.])|` + // Not HasPrefix("/") or "."
`(?:/\.)|` + // no "/."
`(?:\.lock$)|(?:\.lock/)|` + // No ".lock/"" or ".lock" at the end
`(?:\.\.)|` + // no ".." anywhere
`(?://)|` + // no "//" anywhere
`(?:@{)|` + // no "@{"
`(?:[/.]$)|` + // no terminal '/' or '.'
`(?:^@$)`) // Not "@"

// IsValidRefPattern ensures that the provided string could be a valid reference
func IsValidRefPattern(name string) bool {
return !refNamePatternInvalid.MatchString(name)
}

func SanitizeRefPattern(name string) string {
return refNamePatternInvalid.ReplaceAllString(name, "_")
}

// Reference represents a Git ref.
type Reference struct {
Name string
Expand Down
2 changes: 1 addition & 1 deletion modules/git/repo_commit_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ func (repo *Repository) getCommitFromBatchReader(rd *bufio.Reader, id SHA1) (*Co

// ConvertToSHA1 returns a Hash object from a potential ID string
func (repo *Repository) ConvertToSHA1(commitID string) (SHA1, error) {
if len(commitID) == 40 && SHAPattern.MatchString(commitID) {
if len(commitID) == 40 && IsValidSHAPattern(commitID) {
sha1, err := NewIDFromString(commitID)
if err == nil {
return sha1, nil
Expand Down
7 changes: 6 additions & 1 deletion modules/git/sha1.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ const EmptySHA = "0000000000000000000000000000000000000000"
const EmptyTreeSHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"

// SHAPattern can be used to determine if a string is an valid sha
var SHAPattern = regexp.MustCompile(`^[0-9a-f]{4,40}$`)
var shaPattern = regexp.MustCompile(`^[0-9a-f]{4,40}$`)

// IsValidSHAPattern will check if the provided string matches the SHA Pattern
func IsValidSHAPattern(sha string) bool {
return shaPattern.MatchString(sha)
}

// MustID always creates a new SHA1 from a [20]byte array with no validation of input.
func MustID(b []byte) SHA1 {
Expand Down
9 changes: 5 additions & 4 deletions modules/migration/pullrequest.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type PullRequest struct {
Updated time.Time
Closed *time.Time
Labels []*Label
PatchURL string `yaml:"patch_url"`
PatchURL string `yaml:"patch_url"` // SECURITY: This must be safe to download directly from
Merged bool
MergedTime *time.Time `yaml:"merged_time"`
MergeCommitSHA string `yaml:"merge_commit_sha"`
Expand All @@ -37,6 +37,7 @@ type PullRequest struct {
Reactions []*Reaction
ForeignIndex int64
Context DownloaderContext `yaml:"-"`
EnsuredSafe bool `yaml:"ensured_safe"`
}

func (p *PullRequest) GetLocalIndex() int64 { return p.Number }
Expand All @@ -55,9 +56,9 @@ func (p PullRequest) GetGitRefName() string {

// PullRequestBranch represents a pull request branch
type PullRequestBranch struct {
CloneURL string `yaml:"clone_url"`
Ref string
SHA string
CloneURL string `yaml:"clone_url"` // SECURITY: This must be safe to download from
Ref string // SECURITY: this must be a git.IsValidRefPattern
SHA string // SECURITY: this must be a git.IsValidSHAPattern
RepoName string `yaml:"repo_name"`
OwnerName string `yaml:"owner_name"`
}
Expand Down
9 changes: 5 additions & 4 deletions modules/migration/release.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ type ReleaseAsset struct {
DownloadCount *int `yaml:"download_count"`
Created time.Time
Updated time.Time
DownloadURL *string `yaml:"download_url"`

DownloadURL *string `yaml:"download_url"` // SECURITY: It is the responsibility of downloader to make sure this is safe
// if DownloadURL is nil, the function should be invoked
DownloadFunc func() (io.ReadCloser, error) `yaml:"-"`
DownloadFunc func() (io.ReadCloser, error) `yaml:"-"` // SECURITY: It is the responsibility of downloader to make sure this is safe
}

// Release represents a release
type Release struct {
TagName string `yaml:"tag_name"`
TargetCommitish string `yaml:"target_commitish"`
TagName string `yaml:"tag_name"` // SECURITY: This must pass git.IsValidRefPattern
TargetCommitish string `yaml:"target_commitish"` // SECURITY: This must pass git.IsValidRefPattern
Name string
Body string
Draft bool
Expand Down
2 changes: 1 addition & 1 deletion modules/migration/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ type Repository struct {
IsPrivate bool `yaml:"is_private"`
IsMirror bool `yaml:"is_mirror"`
Description string
CloneURL string `yaml:"clone_url"`
CloneURL string `yaml:"clone_url"` // SECURITY: This must be checked to ensure that is safe to be used
OriginalURL string `yaml:"original_url"`
DefaultBranch string
}
34 changes: 3 additions & 31 deletions modules/validation/binding.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"regexp"
"strings"

"code.gitea.io/gitea/modules/git"

"gitea.com/go-chi/binding"
"github.com/gobwas/glob"
)
Expand All @@ -24,30 +26,6 @@ const (
ErrRegexPattern = "RegexPattern"
)

// GitRefNamePatternInvalid is regular expression with unallowed characters in git reference name
// They cannot have ASCII control characters (i.e. bytes whose values are lower than \040, or \177 DEL), space, tilde ~, caret ^, or colon : anywhere.
// They cannot have question-mark ?, asterisk *, or open bracket [ anywhere
var GitRefNamePatternInvalid = regexp.MustCompile(`[\000-\037\177 \\~^:?*[]+`)

// CheckGitRefAdditionalRulesValid check name is valid on additional rules
func CheckGitRefAdditionalRulesValid(name string) bool {
// Additional rules as described at https://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
if strings.HasPrefix(name, "/") || strings.HasSuffix(name, "/") ||
strings.HasSuffix(name, ".") || strings.Contains(name, "..") ||
strings.Contains(name, "//") || strings.Contains(name, "@{") ||
name == "@" {
return false
}
parts := strings.Split(name, "/")
for _, part := range parts {
if strings.HasSuffix(part, ".lock") || strings.HasPrefix(part, ".") {
return false
}
}

return true
}

// AddBindingRules adds additional binding rules
func AddBindingRules() {
addGitRefNameBindingRule()
Expand All @@ -67,16 +45,10 @@ func addGitRefNameBindingRule() {
IsValid: func(errs binding.Errors, name string, val interface{}) (bool, binding.Errors) {
str := fmt.Sprintf("%v", val)

if GitRefNamePatternInvalid.MatchString(str) {
if !git.IsValidRefPattern(str) {
errs.Add([]string{name}, ErrGitRefName, "GitRefName")
return false, errs
}

if !CheckGitRefAdditionalRulesValid(str) {
errs.Add([]string{name}, ErrGitRefName, "GitRefName")
return false, errs
}

return true, errs
},
})
Expand Down
3 changes: 1 addition & 2 deletions routers/api/v1/repo/commits.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/validation"
"code.gitea.io/gitea/routers/api/v1/utils"
)

Expand Down Expand Up @@ -53,7 +52,7 @@ func GetSingleCommit(ctx *context.APIContext) {
// "$ref": "#/responses/notFound"

sha := ctx.Params(":sha")
if (validation.GitRefNamePatternInvalid.MatchString(sha) || !validation.CheckGitRefAdditionalRulesValid(sha)) && !git.SHAPattern.MatchString(sha) {
if !git.IsValidRefPattern(sha) {
ctx.Error(http.StatusUnprocessableEntity, "no valid ref or sha", fmt.Sprintf("no valid ref or sha: %s", sha))
return
}
Expand Down
3 changes: 1 addition & 2 deletions routers/api/v1/repo/notes.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"code.gitea.io/gitea/modules/convert"
"code.gitea.io/gitea/modules/git"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/validation"
)

// GetNote Get a note corresponding to a single commit from a repository
Expand Down Expand Up @@ -47,7 +46,7 @@ func GetNote(ctx *context.APIContext) {
// "$ref": "#/responses/notFound"

sha := ctx.Params(":sha")
if (validation.GitRefNamePatternInvalid.MatchString(sha) || !validation.CheckGitRefAdditionalRulesValid(sha)) && !git.SHAPattern.MatchString(sha) {
if !git.IsValidRefPattern(sha) {
ctx.Error(http.StatusUnprocessableEntity, "no valid ref or sha", fmt.Sprintf("no valid ref or sha: %s", sha))
return
}
Expand Down
22 changes: 20 additions & 2 deletions services/migrations/codebase.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,24 @@ func NewCodebaseDownloader(ctx context.Context, projectURL *url.URL, project, re
commitMap: make(map[string]string),
}

log.Trace("Create Codebase downloader. BaseURL: %s Project: %s RepoName: %s", baseURL, project, repoName)
return downloader
}

// String implements Stringer
func (d *CodebaseDownloader) String() string {
return fmt.Sprintf("migration from codebase server %s %s/%s", d.baseURL, d.project, d.repoName)
}

// ColorFormat provides a basic color format for a GogsDownloader
func (d *CodebaseDownloader) ColorFormat(s fmt.State) {
if d == nil {
log.ColorFprintf(s, "<nil: CodebaseDownloader>")
return
}
log.ColorFprintf(s, "migration from codebase server %s %s/%s", d.baseURL, d.project, d.repoName)
}

// FormatCloneURL add authentication into remote URLs
func (d *CodebaseDownloader) FormatCloneURL(opts base.MigrateOptions, remoteAddr string) (string, error) {
return opts.CloneAddr, nil
Expand Down Expand Up @@ -451,8 +466,8 @@ func (d *CodebaseDownloader) GetPullRequests(page, perPage int) ([]*base.PullReq
Value int64 `xml:",chardata"`
Type string `xml:"type,attr"`
} `xml:"id"`
SourceRef string `xml:"source-ref"`
TargetRef string `xml:"target-ref"`
SourceRef string `xml:"source-ref"` // NOTE: from the documentation these are actually just branches NOT full refs
TargetRef string `xml:"target-ref"` // NOTE: from the documentation these are actually just branches NOT full refs
Subject string `xml:"subject"`
Status string `xml:"status"`
UserID struct {
Expand Down Expand Up @@ -564,6 +579,9 @@ func (d *CodebaseDownloader) GetPullRequests(page, perPage int) ([]*base.PullReq
Comments: comments[1:],
},
})

// SECURITY: Ensure that the PR is safe
_ = CheckAndEnsureSafePR(pullRequests[len(pullRequests)-1], d.baseURL.String(), d)
}

return pullRequests, true, nil
Expand Down
82 changes: 82 additions & 0 deletions services/migrations/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package migrations

import (
"fmt"
"strings"

admin_model "code.gitea.io/gitea/models/admin"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
base "code.gitea.io/gitea/modules/migration"
)

// WarnAndNotice will log the provided message and send a repository notice
func WarnAndNotice(fmtStr string, args ...interface{}) {
log.Warn(fmtStr, args...)
if err := admin_model.CreateRepositoryNotice(fmt.Sprintf(fmtStr, args...)); err != nil {
log.Error("create repository notice failed: ", err)
}
}

func hasBaseURL(toCheck, baseURL string) bool {
if len(baseURL) > 0 && baseURL[len(baseURL)-1] != '/' {
baseURL += "/"
}
return strings.HasPrefix(toCheck, baseURL)
}

// CheckAndEnsureSafePR will check that a given PR is safe to download
func CheckAndEnsureSafePR(pr *base.PullRequest, commonCloneBaseURL string, g base.Downloader) bool {
valid := true
// SECURITY: the patchURL must be checked to have the same baseURL as the current to prevent open redirect
if pr.PatchURL != "" && !hasBaseURL(pr.PatchURL, commonCloneBaseURL) {
// TODO: Should we check that this url has the expected format for a patch url?
WarnAndNotice("PR #%d in %s has invalid PatchURL: %s baseURL: %s", pr.Number, g, pr.PatchURL, commonCloneBaseURL)
pr.PatchURL = ""
valid = false
}

// SECURITY: the headCloneURL must be checked to have the same baseURL as the current to prevent open redirect
if pr.Head.CloneURL != "" && !hasBaseURL(pr.Head.CloneURL, commonCloneBaseURL) {
// TODO: Should we check that this url has the expected format for a patch url?
WarnAndNotice("PR #%d in %s has invalid HeadCloneURL: %s baseURL: %s", pr.Number, g, pr.Head.CloneURL, commonCloneBaseURL)
pr.Head.CloneURL = ""
valid = false
}

// SECURITY: SHAs Must be a SHA
if pr.MergeCommitSHA != "" && !git.IsValidSHAPattern(pr.MergeCommitSHA) {
WarnAndNotice("PR #%d in %s has invalid MergeCommitSHA: %s", pr.Number, g, pr.MergeCommitSHA)
pr.MergeCommitSHA = ""
}
if pr.Head.SHA != "" && !git.IsValidSHAPattern(pr.Head.SHA) {
WarnAndNotice("PR #%d in %s has invalid HeadSHA: %s", pr.Number, g, pr.Head.SHA)
pr.Head.SHA = ""
valid = false
}
if pr.Base.SHA != "" && !git.IsValidSHAPattern(pr.Base.SHA) {
WarnAndNotice("PR #%d in %s has invalid BaseSHA: %s", pr.Number, g, pr.Base.SHA)
pr.Base.SHA = ""
valid = false
}

// SECURITY: Refs must be valid refs or SHAs
if pr.Head.Ref != "" && !git.IsValidRefPattern(pr.Head.Ref) {
WarnAndNotice("PR #%d in %s has invalid HeadRef: %s", pr.Number, g, pr.Head.Ref)
pr.Head.Ref = ""
valid = false
}
if pr.Base.Ref != "" && !git.IsValidRefPattern(pr.Base.Ref) {
WarnAndNotice("PR #%d in %s has invalid BaseRef: %s", pr.Number, g, pr.Base.Ref)
pr.Base.Ref = ""
valid = false
}

pr.EnsuredSafe = true

return valid
}
Loading

0 comments on commit a46aaa5

Please sign in to comment.