From a6a02b37cfce1e28194cb6b54b46574022b56b48 Mon Sep 17 00:00:00 2001 From: "alessandro.pinna" Date: Mon, 22 Nov 2021 14:46:06 +0100 Subject: [PATCH] enhancement: automatic cleanup of old repos/branches --- internal/services/config/config.go | 7 + internal/services/gitserver/gitserver_test.go | 201 ++++++++++++++++++ internal/services/gitserver/main.go | 3 + internal/services/gitserver/repo-cleaner.go | 163 ++++++++++++++ tests/setup_test.go | 1 + 5 files changed, 375 insertions(+) create mode 100644 internal/services/gitserver/gitserver_test.go create mode 100644 internal/services/gitserver/repo-cleaner.go diff --git a/internal/services/config/config.go b/internal/services/config/config.go index 8308ca142..0a11e3fc3 100644 --- a/internal/services/config/config.go +++ b/internal/services/config/config.go @@ -162,6 +162,9 @@ type Gitserver struct { Web Web `yaml:"web"` Etcd Etcd `yaml:"etcd"` ObjectStorage ObjectStorage `yaml:"objectStorage"` + + RepositoryCleanupInterval time.Duration `yaml:"repositoryCleanupInterval"` + RepositoryRefsExpireInterval time.Duration `yaml:"repositoryRefsExpireInterval"` } type Web struct { @@ -261,6 +264,10 @@ var defaultConfig = Config{ }, ActiveTasksLimit: 2, }, + Gitserver: Gitserver{ + RepositoryCleanupInterval: 24 * time.Hour, + RepositoryRefsExpireInterval: 30 * 24 * time.Hour, + }, } func Parse(configFile string, componentsNames []string) (*Config, error) { diff --git a/internal/services/gitserver/gitserver_test.go b/internal/services/gitserver/gitserver_test.go new file mode 100644 index 000000000..8ef9252dc --- /dev/null +++ b/internal/services/gitserver/gitserver_test.go @@ -0,0 +1,201 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package gitserver + +import ( + "context" + "errors" + "io/ioutil" + "os" + "path/filepath" + "testing" + "time" + + "agola.io/agola/internal/services/config" + "agola.io/agola/internal/util" +) + +const ( + branchName = "master" + tagName = "v1.0" +) + +func createTag(t *testing.T, ctx context.Context, git *util.Git, committerTime time.Time) { + if _, err := git.Output(ctx, nil, "branch", "test"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if _, err := git.Output(ctx, nil, "checkout", "test"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + + git.Env = append(git.Env, "GIT_COMMITTER_DATE='"+committerTime.String()+"'") + if _, err := git.Output(ctx, nil, "commit", "--allow-empty", "-m", "root commit"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if _, err := git.Output(ctx, nil, "tag", tagName, "-m", "tag test"); err != nil { + t.Fatalf("unexpected err: %v", err) + } +} + +func createBranch(t *testing.T, ctx context.Context, git *util.Git, committerTime time.Time) { + git.Env = append(git.Env, "GIT_COMMITTER_DATE='"+committerTime.String()+"'") + if _, err := git.Output(ctx, nil, "commit", "--allow-empty", "-m", "'root commit'"); err != nil { + t.Fatalf("unexpected err: %v", err) + } +} + +func TestRepoCleaner(t *testing.T) { + tests := []struct { + name string + branchOldTime bool + tagOldTime bool + }{ + { + name: "test delete branch", + branchOldTime: true, + tagOldTime: false, + }, + { + name: "test delete tag", + branchOldTime: false, + tagOldTime: true, + }, + { + name: "test delete repository dir", + branchOldTime: true, + tagOldTime: true, + }, + } + + oldCommitterTime := time.Date(2015, time.January, 15, 1, 1, 1, 1, time.UTC) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dir, err := ioutil.TempDir("", "agola") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + defer os.RemoveAll(dir) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + gitDataDir := filepath.Join(dir, "gitserver") + + config := &config.Gitserver{ + DataDir: gitDataDir, + RepositoryCleanupInterval: 10 * time.Second, + RepositoryRefsExpireInterval: 24 * time.Hour, + } + + gs, err := NewGitserver(ctx, logger, config) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + userDirRepo := filepath.Join(gitDataDir, "user01", "repo01") + err = os.MkdirAll(userDirRepo, os.ModePerm) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + git := &util.Git{GitDir: userDirRepo} + + if _, err := git.Output(ctx, nil, "init"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + if _, err := git.Output(ctx, nil, "config", "--unset", "core.bare"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + if _, err := git.Output(ctx, nil, "config", "user.email", "user01@example.com"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + if _, err := git.Output(ctx, nil, "config", "user.name", "user01"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + + var committerTime time.Time + if tt.branchOldTime { + committerTime = oldCommitterTime + } else { + committerTime = time.Now() + } + createBranch(t, ctx, git, committerTime) + + if tt.tagOldTime { + committerTime = oldCommitterTime + } else { + committerTime = time.Now() + } + createTag(t, ctx, git, committerTime) + + if _, err := git.Output(ctx, nil, "config", "--bool", "core.bare", "true"); err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if err := gs.scanRepos(ctx); err != nil { + t.Fatalf("unexpected err: %v", err) + } + + if tt.branchOldTime && tt.tagOldTime { + _, err = os.Open(userDirRepo) + if !errors.Is(err, os.ErrNotExist) { + t.Fatalf("got %v error, want error: %v", err, os.ErrNotExist) + } + + return + } + + branches, err := gs.getBranches(git, ctx) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + found := false + for _, b := range branches { + if b == branchName { + found = true + break + } + } + if tt.branchOldTime && found { + t.Fatalf("expected branch %s deleted", branchName) + } + if !tt.branchOldTime && !found { + t.Fatalf("expected branch %s", branchName) + } + + tags, err := gs.getTags(git, ctx) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + found = false + for _, b := range tags { + if b == tagName { + found = true + break + } + } + if tt.tagOldTime && found { + t.Fatalf("expected tag %s deleted", tagName) + } + if !tt.tagOldTime && !found { + t.Fatalf("expected tag %s", tagName) + } + }) + } +} diff --git a/internal/services/gitserver/main.go b/internal/services/gitserver/main.go index 03d6ce895..aa6ac18c6 100644 --- a/internal/services/gitserver/main.go +++ b/internal/services/gitserver/main.go @@ -179,6 +179,9 @@ func (s *Gitserver) Run(ctx context.Context) error { } }() + //TODO a lock is needed or it'll cause some concurrency issues if repo cleaner runs when someone at the same time is pushing + go s.repoCleanerLoop(ctx) + select { case <-ctx.Done(): log.Infof("gitserver exiting") diff --git a/internal/services/gitserver/repo-cleaner.go b/internal/services/gitserver/repo-cleaner.go new file mode 100644 index 000000000..2f1e15892 --- /dev/null +++ b/internal/services/gitserver/repo-cleaner.go @@ -0,0 +1,163 @@ +package gitserver + +import ( + "context" + "errors" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "time" + + "agola.io/agola/internal/util" +) + +func (s *Gitserver) repoCleanerLoop(ctx context.Context) { + for { + select { + case <-ctx.Done(): + log.Info("repoCleaner exiting") + + return + case <-time.After(s.c.RepositoryCleanupInterval): + if err := s.scanRepos(ctx); err != nil { + log.Errorf("scanRepos error: %w", err) + } + } + } +} + +func (s *Gitserver) scanRepos(ctx context.Context) error { + log.Info("repoCleaner scanRepos start") + + usersDir, err := ioutil.ReadDir(s.c.DataDir) + if err != nil { + return err + } + + for _, u := range usersDir { + if !u.IsDir() { + continue + } + + reposDir, _ := ioutil.ReadDir(filepath.Join(s.c.DataDir, u.Name())) + for _, r := range reposDir { + if !r.IsDir() { + continue + } + + if err := s.scanRepo(ctx, filepath.Join(s.c.DataDir, u.Name(), r.Name())); err != nil { + log.Errorf("scanRepo error: %w", err) + } + } + } + + log.Info("repoCleaner scanRepos end") + + return nil +} + +func (s *Gitserver) scanRepo(ctx context.Context, repoDir string) error { + git := &util.Git{GitDir: repoDir} + + branches, _ := s.getBranches(git, ctx) + for _, b := range branches { + committerTime, err := s.getLastCommiterTime(ctx, git, "refs/heads/"+b) + if err != nil { + return fmt.Errorf("return failed to get last commit time: %w", err) + } + + if time.Since(committerTime) >= s.c.RepositoryRefsExpireInterval { + if err := s.deleteBranch(ctx, git, b); err != nil { + return fmt.Errorf("failed to delete git branch: %w", err) + } + } + } + + tags, _ := s.getTags(git, ctx) + for _, tag := range tags { + committerTime, err := s.getLastCommiterTime(ctx, git, "refs/tags/"+tag) + if err != nil { + return fmt.Errorf("failed to get last commit time: %w", err) + } + + if time.Since(committerTime) >= s.c.RepositoryRefsExpireInterval { + if err := s.deleteTag(ctx, git, tag); err != nil { + return fmt.Errorf("failed to delete git tag: %w", err) + } + } + } + + if _, err := git.Output(ctx, nil, "prune"); err != nil { + return fmt.Errorf("git prune failed: %w", err) + } + + b, err := s.getBranches(git, ctx) + if err != nil { + return fmt.Errorf("failed to get git branches: %w", err) + } + + t, err := s.getTags(git, ctx) + if err != nil { + return fmt.Errorf("failed to get git tags: %w", err) + } + + if len(b) == 0 && len(t) == 0 { + log.Info("deleting repo:", repoDir) + if err := s.deleteRepo(ctx, repoDir); err != nil { + return fmt.Errorf("failed to delete repository: %w", err) + } + } + + return nil +} + +func (s *Gitserver) getBranches(git *util.Git, ctx context.Context) ([]string, error) { + branches, err := git.OutputLines(ctx, nil, "for-each-ref", "--format=%(refname:short)", "refs/heads/") + if err != nil { + return nil, err + } + + return branches, nil +} + +func (s *Gitserver) getTags(git *util.Git, ctx context.Context) ([]string, error) { + tags, err := git.OutputLines(ctx, nil, "for-each-ref", "--format=%(refname:short)", "refs/tags/") + if err != nil { + return nil, err + } + + return tags, nil +} + +func (s *Gitserver) getLastCommiterTime(ctx context.Context, git *util.Git, ref string) (time.Time, error) { + output, err := git.OutputLines(ctx, nil, "log", "-1", "--format=%cI", ref) + if err != nil { + return time.Time{}, err + } + + if len(output) != 1 { + return time.Time{}, errors.New("git log error: must return one line") + } + + committerTime, err := time.Parse(time.RFC3339, output[0]) + if err != nil { + return time.Time{}, err + } + + return committerTime, nil +} + +func (s *Gitserver) deleteBranch(ctx context.Context, git *util.Git, branch string) error { + _, err := git.Output(ctx, nil, "branch", "-D", branch) + return err +} + +func (s *Gitserver) deleteTag(ctx context.Context, git *util.Git, tag string) error { + _, err := git.Output(ctx, nil, "tag", "-d", tag) + return err +} + +func (s *Gitserver) deleteRepo(ctx context.Context, repoDir string) error { + return os.RemoveAll(repoDir) +} diff --git a/tests/setup_test.go b/tests/setup_test.go index a9776c65b..92712129a 100644 --- a/tests/setup_test.go +++ b/tests/setup_test.go @@ -293,6 +293,7 @@ func setup(ctx context.Context, t *testing.T, dir string) (*testutil.TestEmbedde Etcd: config.Etcd{ Endpoints: "", }, + RepositoryCleanupInterval: 24 * time.Hour, }, }