Skip to content

Commit

Permalink
Merge remote-tracking branch 'giteaofficial/main'
Browse files Browse the repository at this point in the history
* giteaofficial/main:
  Add new index for action to resolve the performance problem (go-gitea#32333)
  Include file extension checks in attachment API (go-gitea#32151)
  Updated tokenizer to better matching when search for code snippets (go-gitea#32261)
  Correctly query the primary button in a form (go-gitea#32438)

# Conflicts:
#	web_src/js/utils/dom.ts
  • Loading branch information
zjjhot committed Nov 7, 2024
2 parents 952637f + 913be9e commit 3927178
Show file tree
Hide file tree
Showing 30 changed files with 289 additions and 31 deletions.
5 changes: 4 additions & 1 deletion models/activities/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,10 @@ func (a *Action) TableIndices() []*schemas.Index {
cudIndex := schemas.NewIndex("c_u_d", schemas.IndexType)
cudIndex.AddColumn("created_unix", "user_id", "is_deleted")

indices := []*schemas.Index{actUserIndex, repoIndex, cudIndex}
cuIndex := schemas.NewIndex("c_u", schemas.IndexType)
cuIndex.AddColumn("user_id", "is_deleted")

indices := []*schemas.Index{actUserIndex, repoIndex, cudIndex, cuIndex}

return indices
}
Expand Down
1 change: 1 addition & 0 deletions models/migrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ func prepareMigrationTasks() []*migration {
newMigration(305, "Add Repository Licenses", v1_23.AddRepositoryLicenses),
newMigration(306, "Add BlockAdminMergeOverride to ProtectedBranch", v1_23.AddBlockAdminMergeOverrideBranchProtection),
newMigration(307, "Fix milestone deadline_unix when there is no due date", v1_23.FixMilestoneNoDueDate),
newMigration(308, "Add index(user_id, is_deleted) for action table", v1_23.AddNewIndexForUserDashboard),
}
return preparedMigrations
}
Expand Down
52 changes: 52 additions & 0 deletions models/migrations/v1_23/v308.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package v1_23 //nolint

import (
"code.gitea.io/gitea/modules/timeutil"

"xorm.io/xorm"
"xorm.io/xorm/schemas"
)

type improveActionTableIndicesAction struct {
ID int64 `xorm:"pk autoincr"`
UserID int64 `xorm:"INDEX"` // Receiver user id.
OpType int
ActUserID int64 // Action user id.
RepoID int64
CommentID int64 `xorm:"INDEX"`
IsDeleted bool `xorm:"NOT NULL DEFAULT false"`
RefName string
IsPrivate bool `xorm:"NOT NULL DEFAULT false"`
Content string `xorm:"TEXT"`
CreatedUnix timeutil.TimeStamp `xorm:"created"`
}

// TableName sets the name of this table
func (*improveActionTableIndicesAction) TableName() string {
return "action"
}

func (a *improveActionTableIndicesAction) TableIndices() []*schemas.Index {
repoIndex := schemas.NewIndex("r_u_d", schemas.IndexType)
repoIndex.AddColumn("repo_id", "user_id", "is_deleted")

actUserIndex := schemas.NewIndex("au_r_c_u_d", schemas.IndexType)
actUserIndex.AddColumn("act_user_id", "repo_id", "created_unix", "user_id", "is_deleted")

cudIndex := schemas.NewIndex("c_u_d", schemas.IndexType)
cudIndex.AddColumn("created_unix", "user_id", "is_deleted")

cuIndex := schemas.NewIndex("c_u", schemas.IndexType)
cuIndex.AddColumn("user_id", "is_deleted")

indices := []*schemas.Index{actUserIndex, repoIndex, cudIndex, cuIndex}

return indices
}

func AddNewIndexForUserDashboard(x *xorm.Engine) error {
return x.Sync(new(improveActionTableIndicesAction))
}
5 changes: 3 additions & 2 deletions modules/indexer/code/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search/query"
Expand Down Expand Up @@ -69,7 +70,7 @@ const (
filenameIndexerAnalyzer = "filenameIndexerAnalyzer"
filenameIndexerTokenizer = "filenameIndexerTokenizer"
repoIndexerDocType = "repoIndexerDocType"
repoIndexerLatestVersion = 7
repoIndexerLatestVersion = 8
)

// generateBleveIndexMapping generates a bleve index mapping for the repo indexer
Expand Down Expand Up @@ -105,7 +106,7 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) {
} else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]any{
"type": analyzer_custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"tokenizer": letter.Name,
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
}); err != nil {
return nil, err
Expand Down
13 changes: 11 additions & 2 deletions modules/indexer/code/elasticsearch/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
)

const (
esRepoIndexerLatestVersion = 2
esRepoIndexerLatestVersion = 3
// multi-match-types, currently only 2 types are used
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
esMultiMatchTypeBestFields = "best_fields"
Expand Down Expand Up @@ -60,6 +60,10 @@ const (
"settings": {
"analysis": {
"analyzer": {
"content_analyzer": {
"tokenizer": "content_tokenizer",
"filter" : ["lowercase"]
},
"filename_path_analyzer": {
"tokenizer": "path_tokenizer"
},
Expand All @@ -68,6 +72,10 @@ const (
}
},
"tokenizer": {
"content_tokenizer": {
"type": "simple_pattern_split",
"pattern": "[^a-zA-Z0-9]"
},
"path_tokenizer": {
"type": "path_hierarchy",
"delimiter": "/"
Expand Down Expand Up @@ -104,7 +112,8 @@ const (
"content": {
"type": "text",
"term_vector": "with_positions_offsets",
"index": true
"index": true,
"analyzer": "content_analyzer"
},
"commit_id": {
"type": "keyword",
Expand Down
49 changes: 49 additions & 0 deletions modules/indexer/code/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,55 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
},
},
},
// Search for matches on the contents of files regardless of case.
{
RepoIDs: nil,
Keyword: "dESCRIPTION",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "README.md",
Content: "# repo1\n\nDescription for repo1",
},
},
},
// Search for an exact match on the filename within the repo '62' (case insenstive).
// This scenario yields a single result (the file avocado.md on the repo '62')
{
RepoIDs: []int64{62},
Keyword: "AVOCADO.MD",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "avocado.md",
Content: "# repo1\n\npineaple pie of cucumber juice",
},
},
},
// Search for matches on the contents of files when the criteria is a expression.
{
RepoIDs: []int64{62},
Keyword: "console.log",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "example-file.js",
Content: "console.log(\"Hello, World!\")",
},
},
},
// Search for matches on the contents of files when the criteria is part of a expression.
{
RepoIDs: []int64{62},
Keyword: "log",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "example-file.js",
Content: "console.log(\"Hello, World!\")",
},
},
},
}

for _, kw := range keywords {
Expand Down
9 changes: 6 additions & 3 deletions modules/indexer/internal/bleve/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ package bleve
import (
"errors"
"os"
"unicode"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"

"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
unicode_tokenizer "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/ethantkoenig/rupture"
)
Expand Down Expand Up @@ -57,7 +58,7 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
// may be different on two string and they still be considered equivalent.
// Given a phrasse, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
func GuessFuzzinessByKeyword(s string) int {
tokenizer := unicode.NewUnicodeTokenizer()
tokenizer := unicode_tokenizer.NewUnicodeTokenizer()
tokens := tokenizer.Tokenize([]byte(s))

if len(tokens) > 0 {
Expand All @@ -77,8 +78,10 @@ func guessFuzzinessByKeyword(s string) int {
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
// Likewise, queries whose terms contains characters that are *not* letters should not use fuzziness

for _, r := range s {
if r >= 128 {
if r >= 128 || !unicode.IsLetter(r) {
return 0
}
}
Expand Down
8 changes: 8 additions & 0 deletions modules/indexer/internal/bleve/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
Input: "갃갃갃",
Fuzziness: 0,
},
{
Input: "repo1",
Fuzziness: 0,
},
{
Input: "avocado.md",
Fuzziness: 0,
},
}

for _, scenario := range scenarios {
Expand Down
13 changes: 10 additions & 3 deletions routers/api/v1/repo/issue_attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/web"
"code.gitea.io/gitea/services/attachment"
attachment_service "code.gitea.io/gitea/services/attachment"
"code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/context/upload"
"code.gitea.io/gitea/services/convert"
Expand Down Expand Up @@ -181,7 +181,7 @@ func CreateIssueAttachment(ctx *context.APIContext) {
filename = query
}

attachment, err := attachment.UploadAttachment(ctx, file, setting.Attachment.AllowedTypes, header.Size, &repo_model.Attachment{
attachment, err := attachment_service.UploadAttachment(ctx, file, setting.Attachment.AllowedTypes, header.Size, &repo_model.Attachment{
Name: filename,
UploaderID: ctx.Doer.ID,
RepoID: ctx.Repo.Repository.ID,
Expand Down Expand Up @@ -247,6 +247,8 @@ func EditIssueAttachment(ctx *context.APIContext) {
// "$ref": "#/responses/Attachment"
// "404":
// "$ref": "#/responses/error"
// "422":
// "$ref": "#/responses/validationError"
// "423":
// "$ref": "#/responses/repoArchivedError"

Expand All @@ -261,8 +263,13 @@ func EditIssueAttachment(ctx *context.APIContext) {
attachment.Name = form.Name
}

if err := repo_model.UpdateAttachment(ctx, attachment); err != nil {
if err := attachment_service.UpdateAttachment(ctx, setting.Attachment.AllowedTypes, attachment); err != nil {
if upload.IsErrFileTypeForbidden(err) {
ctx.Error(http.StatusUnprocessableEntity, "", err)
return
}
ctx.Error(http.StatusInternalServerError, "UpdateAttachment", err)
return
}

ctx.JSON(http.StatusCreated, convert.ToAPIAttachment(ctx.Repo.Repository, attachment))
Expand Down
13 changes: 10 additions & 3 deletions routers/api/v1/repo/issue_comment_attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/web"
"code.gitea.io/gitea/services/attachment"
attachment_service "code.gitea.io/gitea/services/attachment"
"code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/context/upload"
"code.gitea.io/gitea/services/convert"
Expand Down Expand Up @@ -189,7 +189,7 @@ func CreateIssueCommentAttachment(ctx *context.APIContext) {
filename = query
}

attachment, err := attachment.UploadAttachment(ctx, file, setting.Attachment.AllowedTypes, header.Size, &repo_model.Attachment{
attachment, err := attachment_service.UploadAttachment(ctx, file, setting.Attachment.AllowedTypes, header.Size, &repo_model.Attachment{
Name: filename,
UploaderID: ctx.Doer.ID,
RepoID: ctx.Repo.Repository.ID,
Expand Down Expand Up @@ -263,6 +263,8 @@ func EditIssueCommentAttachment(ctx *context.APIContext) {
// "$ref": "#/responses/Attachment"
// "404":
// "$ref": "#/responses/error"
// "422":
// "$ref": "#/responses/validationError"
// "423":
// "$ref": "#/responses/repoArchivedError"
attach := getIssueCommentAttachmentSafeWrite(ctx)
Expand All @@ -275,8 +277,13 @@ func EditIssueCommentAttachment(ctx *context.APIContext) {
attach.Name = form.Name
}

if err := repo_model.UpdateAttachment(ctx, attach); err != nil {
if err := attachment_service.UpdateAttachment(ctx, setting.Attachment.AllowedTypes, attach); err != nil {
if upload.IsErrFileTypeForbidden(err) {
ctx.Error(http.StatusUnprocessableEntity, "", err)
return
}
ctx.Error(http.StatusInternalServerError, "UpdateAttachment", attach)
return
}
ctx.JSON(http.StatusCreated, convert.ToAPIAttachment(ctx.Repo.Repository, attach))
}
Expand Down
13 changes: 10 additions & 3 deletions routers/api/v1/repo/release_attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/web"
"code.gitea.io/gitea/services/attachment"
attachment_service "code.gitea.io/gitea/services/attachment"
"code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/context/upload"
"code.gitea.io/gitea/services/convert"
Expand Down Expand Up @@ -234,7 +234,7 @@ func CreateReleaseAttachment(ctx *context.APIContext) {
}

// Create a new attachment and save the file
attach, err := attachment.UploadAttachment(ctx, content, setting.Repository.Release.AllowedTypes, size, &repo_model.Attachment{
attach, err := attachment_service.UploadAttachment(ctx, content, setting.Repository.Release.AllowedTypes, size, &repo_model.Attachment{
Name: filename,
UploaderID: ctx.Doer.ID,
RepoID: ctx.Repo.Repository.ID,
Expand Down Expand Up @@ -291,6 +291,8 @@ func EditReleaseAttachment(ctx *context.APIContext) {
// responses:
// "201":
// "$ref": "#/responses/Attachment"
// "422":
// "$ref": "#/responses/validationError"
// "404":
// "$ref": "#/responses/notFound"

Expand Down Expand Up @@ -322,8 +324,13 @@ func EditReleaseAttachment(ctx *context.APIContext) {
attach.Name = form.Name
}

if err := repo_model.UpdateAttachment(ctx, attach); err != nil {
if err := attachment_service.UpdateAttachment(ctx, setting.Repository.Release.AllowedTypes, attach); err != nil {
if upload.IsErrFileTypeForbidden(err) {
ctx.Error(http.StatusUnprocessableEntity, "", err)
return
}
ctx.Error(http.StatusInternalServerError, "UpdateAttachment", attach)
return
}
ctx.JSON(http.StatusCreated, convert.ToAPIAttachment(ctx.Repo.Repository, attach))
}
Expand Down
9 changes: 9 additions & 0 deletions services/attachment/attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,12 @@ func UploadAttachment(ctx context.Context, file io.Reader, allowedTypes string,

return NewAttachment(ctx, attach, io.MultiReader(bytes.NewReader(buf), file), fileSize)
}

// UpdateAttachment updates an attachment, verifying that its name is among the allowed types.
func UpdateAttachment(ctx context.Context, allowedTypes string, attach *repo_model.Attachment) error {
if err := upload.Verify(nil, attach.Name, allowedTypes); err != nil {
return err
}

return repo_model.UpdateAttachment(ctx, attach)
}
Loading

0 comments on commit 3927178

Please sign in to comment.