Skip to content

Commit

Permalink
Merge pull request #50 from camptocamp/imp-commits-data-cache
Browse files Browse the repository at this point in the history
utils.git.Commit: leverage user's cache to get commit file paths
  • Loading branch information
sebalix authored Aug 20, 2024
2 parents 3b65f69 + ab55314 commit 25a21e5
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 27 deletions.
6 changes: 4 additions & 2 deletions oca_port/port_addon_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,8 @@ def __init__(self, app):
)
self.commits_diff = self.get_commits_diff()
self.serialized_diff = self._serialize_diff(self.commits_diff)
# Once the analyze is done, we store the cache on disk
self.app.cache.save()

def _serialize_diff(self, commits_diff):
data = {}
Expand All @@ -453,7 +455,7 @@ def _get_branch_commits(self, branch, path="."):
for commit in commits:
if self.app.cache.is_commit_ported(commit.hexsha):
continue
com = g.Commit(commit)
com = g.Commit(commit, cache=self.app.cache)
if self._skip_commit(com):
continue
commits_list.append(com)
Expand Down Expand Up @@ -551,7 +553,7 @@ def get_commits_diff(self):
# Ignore commits referenced by a PR but not present
# in the stable branches
continue
pr_commit = g.Commit(raw_commit)
pr_commit = g.Commit(raw_commit, cache=self.app.cache)
if self._skip_commit(pr_commit):
continue
pr_commit_paths = {
Expand Down
17 changes: 17 additions & 0 deletions oca_port/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import unittest
from unittest.mock import patch

from oca_port.app import App

import git


Expand Down Expand Up @@ -103,6 +105,21 @@ def _commit_change_on_branch(self, repo_path, branch):
commit = repo.index.commit(f"[FIX] {self._settings['addon']}: fix dependency")
return commit.hexsha

def _create_app(self, from_branch, to_branch, **kwargs):
params = {
"from_branch": from_branch,
"to_branch": to_branch,
"addon": self._settings["addon"],
"from_org": self._settings["from_org"],
"from_remote": self._settings["from_remote"],
"repo_path": self.repo_path,
"repo_name": "test",
"user_org": self._settings["user_org"],
"no_cache": self._settings["no_cache"],
}
params.update(kwargs)
return App(**params)

def tearDown(self):
# Clean up the Git repository
shutil.rmtree(self.repo_upstream_path)
Expand Down
17 changes: 0 additions & 17 deletions oca_port/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,9 @@
import json

from oca_port.app import App

from . import common


class TestApp(common.CommonCase):
def _create_app(self, from_branch, to_branch, **kwargs):
params = {
"from_branch": from_branch,
"to_branch": to_branch,
"addon": self._settings["addon"],
"from_org": self._settings["from_org"],
"from_remote": self._settings["from_remote"],
"repo_path": self.repo_path,
"repo_name": "test",
"user_org": self._settings["user_org"],
"no_cache": self._settings["no_cache"],
}
params.update(kwargs)
return App(**params)

def test_app_nothing_to_port(self):
app = self._create_app(
self._settings["remote_branch1"], self._settings["remote_branch2"]
Expand Down
40 changes: 40 additions & 0 deletions oca_port/tests/test_utils_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2023 Camptocamp SA
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl)

from . import common

from oca_port.utils import cache


class TestUserCache(common.CommonCase):
def setUp(self):
super().setUp()
app = self._create_app(
self._settings["remote_branch1"],
self._settings["remote_branch2"],
from_org="TEST",
)
self.cache = cache.UserCache(app)

def test_commit_ported(self):
sha = "TEST"
self.assertFalse(self.cache.is_commit_ported(sha))
self.cache.mark_commit_as_ported(sha)
self.assertTrue(self.cache.is_commit_ported(sha))

def test_commit_pr(self):
sha = "TEST"
pr_data = {
"number": 10,
"title": "TEST",
}
self.assertFalse(self.cache.get_pr_from_commit(sha))
self.cache.store_commit_pr(sha, pr_data)
self.assertDictEqual(self.cache.get_pr_from_commit(sha), pr_data)

def test_commit_files(self):
sha = "TEST"
files = ["a/b/test", "a/data"]
self.assertFalse(self.cache.get_commit_files(sha))
self.cache.set_commit_files(sha, files)
self.assertEqual(self.cache.get_commit_files(sha), files)
64 changes: 59 additions & 5 deletions oca_port/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ def get_pr_from_commit(self, commit_sha: str):
# No PR data to return
return {}

def get_commit_files(self, commit_sha: str):
# No commit files to return
return set()

def set_commit_files(self, commit_sha: str, files: list):
# Do nothing
pass

def save(self):
# Do nothing
pass

def clear(self):
# Do nothing
pass
Expand All @@ -79,6 +91,7 @@ class UserCache:
_cache_dirname = "oca-port"
_ported_dirname = "ported"
_to_port_dirname = "to_port"
_commits_data_dirname = "commits_data"

def __init__(self, app):
"""Initialize user's cache manager."""
Expand All @@ -88,6 +101,8 @@ def __init__(self, app):
self._ported_commits = self._get_ported_commits()
self._commits_to_port_path = self._get_commits_to_port_path()
self._commits_to_port = self._get_commits_to_port()
self._commits_data_path = self._get_commits_data_path()
self._commits_data = self._get_commits_data()

@classmethod
def _get_dir_path(cls):
Expand Down Expand Up @@ -123,6 +138,15 @@ def _get_commits_to_port_path(self):
file_name,
)

def _get_commits_data_path(self):
"""Return the file path storing commits cached data."""
file_name = f"{self.app.repo_name}.json"
return self.dir_path.joinpath(
self._commits_data_dirname,
self.app.from_org,
file_name,
)

def _get_ported_commits(self):
self._ported_commits_path.parent.mkdir(parents=True, exist_ok=True)
self._ported_commits_path.touch(exist_ok=True)
Expand All @@ -141,6 +165,19 @@ def _get_commits_to_port(self):
nested_dict = lambda: defaultdict(nested_dict) # noqa
return nested_dict()

def _get_commits_data(self):
self._commits_data_path.parent.mkdir(parents=True, exist_ok=True)
self._commits_data_path.touch(exist_ok=True)
try:
with self._commits_data_path.open() as file_:
return json.load(file_, object_hook=misc.defaultdict_from_dict)
except json.JSONDecodeError:
# Mainly to handle empty files (first initialization of the cache)
# but also to not crash if JSON files get corrupted.
# Returns a "nested dict" object to not worry about checking keys
nested_dict = lambda: defaultdict(nested_dict) # noqa
return nested_dict()

def mark_commit_as_ported(self, commit_sha: str):
"""Mark commit as ported."""
if self.is_commit_ported(commit_sha):
Expand All @@ -158,11 +195,6 @@ def store_commit_pr(self, commit_sha: str, data):
pr_number = data["number"]
self._commits_to_port["pull_requests"][str(pr_number)] = data
self._commits_to_port["commits"][commit_sha]["pr"] = pr_number
try:
with self._commits_to_port_path.open(mode="w") as file_:
json.dump(self._commits_to_port, file_, indent=2)
except Exception:
pass

def get_pr_from_commit(self, commit_sha: str):
"""Return the original PR data of a commit."""
Expand All @@ -171,6 +203,28 @@ def get_pr_from_commit(self, commit_sha: str):
return self._commits_to_port["pull_requests"][str(pr_number)]
return {}

def get_commit_files(self, commit_sha: str):
"""Return file paths modified by a commit."""
return self._commits_data[commit_sha].get("files", set())

def set_commit_files(self, commit_sha: str, files: list):
"""Set file paths modified by a commit."""
self._commits_data[commit_sha]["files"] = list(files)

def save(self):
"""Save cache files."""
# commits/PRs to port
self._save_cache(self._commits_to_port, self._commits_to_port_path)
# commits data file
self._save_cache(self._commits_data, self._commits_data_path)

def _save_cache(self, cache, path):
try:
with path.open(mode="w") as file_:
json.dump(cache, file_, indent=2)
except Exception:
pass

def clear(self):
"""Clear the cache by removing the content of the cache directory."""
if self._cache_dirname and str(self.dir_path).endswith(self._cache_dirname):
Expand Down
42 changes: 39 additions & 3 deletions oca_port/utils/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ class Commit:
other_equality_attrs = ("paths",)
eq_strict = True

def __init__(self, commit):
def __init__(self, commit, cache=None):
"""Initializes a new Commit instance from a GitPython Commit object."""
self.raw_commit = commit
self.cache = cache
self.author_name = commit.author.name
self.author_email = commit.author.email
self.authored_datetime = commit.authored_datetime.replace(
Expand All @@ -71,10 +72,45 @@ def __init__(self, commit):
self.hexsha = commit.hexsha
self.committed_datetime = commit.committed_datetime.replace(tzinfo=None)
self.parents = [parent.hexsha for parent in commit.parents]
self.files = {f for f in set(commit.stats.files.keys()) if "=>" not in f}
self.paths = {CommitPath(f) for f in self.files}
self._files = set()
self._paths = set()
self.ported_commits = []

@property
def files(self):
"""Returns modified file paths."""
# Access git storage or cache only on demand to avoid too much IO
files = self._get_files()
if not self._files:
self._files = files
return self._files

@property
def paths(self):
"""Returns list of `CommitPath` objects."""
# Access git storage or cache only on demand to avoid too much IO
paths = {CommitPath(f) for f in self.files}
if not self._paths:
self._paths = paths
return self._paths

def _get_files(self):
"""Retrieve file paths modified by this commit.
Leverage the user's cache if one is provided as git can be quite slow
to retrieve such data from big repository.
"""
files = set()
if self.cache:
files = self.cache.get_commit_files(self.hexsha)
if not files:
files = {
f for f in set(self.raw_commit.stats.files.keys()) if "=>" not in f
}
if self.cache:
self.cache.set_commit_files(self.hexsha, files)
return files

def _get_equality_attrs(self):
return [attr for attr in self.base_equality_attrs if hasattr(self, attr)] + [
attr
Expand Down

0 comments on commit 25a21e5

Please sign in to comment.