Skip to content

Commit

Permalink
Merge pull request #108 from swiekr/ks/parse_roles
Browse files Browse the repository at this point in the history
scrape_role_files: add support for nested role directories
  • Loading branch information
felixedel authored Feb 8, 2024
2 parents 73ad419 + 1dc0fed commit 81decf3
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 35 deletions.
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ def repo_data():
},
"changelog_file": None,
},
"foobar/baz": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/foobar/baz/README.rst",
"content": raw_file("repo_files/roles/foobar/baz/README.rst"),
},
"changelog_file": None,
},
}

return repo, tenants, job_files, role_files
Expand Down
53 changes: 39 additions & 14 deletions tests/scraper/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class MockContents:
DIR = "dir"

def __init__(self, path, type):
self.name = path.split("/")[-1]
self.path = path
self.type = type

Expand Down Expand Up @@ -99,9 +100,10 @@ class MockGitHubRepository(GitHubRepository):
"zuul.d/jobs.yaml": MOCKED_JOB_CONTENT,
"roles": {"docker-run": MockContents("roles/docker-run", MockContents.DIR)},
"roles/docker-run": {
"tasks": MockContents("roles/docker-run/tasks", MockContents.DIR),
"README.rst": MockContents(
"roles/docker-run/README.rst", MockContents.FILE
)
),
},
"roles/docker-run/README.rst": MOCKED_ROLE_DESCRIPTION,
"roles/ignored": MockContents("not a valid role", MockContents.FILE),
Expand All @@ -115,25 +117,40 @@ class MockGitHubRepository(GitHubRepository):
"bar": MockContents("roles/bar", MockContents.DIR),
"foobar": MockContents("roles/foobar", MockContents.DIR),
"empty-dir": MockContents("roles/empty-dir", MockContents.DIR),
"foobaz": MockContents("roles/foobaz", MockContents.DIR),
},
"roles/foo": {
"README.md": MockContents("roles/foo/README.md", MockContents.FILE)
"defaults": MockContents("roles/foo/defaults", MockContents.DIR),
"README.md": MockContents("roles/foo/README.md", MockContents.FILE),
},
"roles/bar": {
"README.txt": MockContents("roles/bar/README.txt", MockContents.FILE)
"library": MockContents("roles/bar/library", MockContents.DIR),
"README.txt": MockContents("roles/bar/README.txt", MockContents.FILE),
},
"roles/foobar": {
"README": MockContents("roles/foobar/README", MockContents.FILE)
"handlers": MockContents("roles/foobar/handlers", MockContents.DIR),
"README": MockContents("roles/foobar/README", MockContents.FILE),
},
"roles/empty-dir": {
"meta": MockContents("roles/empty-dir/meta", MockContents.DIR),
"README.whatever": MockContents(
"roles/empty-dir/README.whatever", MockContents.FILE
)
),
},
"roles/foobaz": {
"baz": MockContents("roles/foobaz/baz", MockContents.DIR),
},
"roles/foobaz/baz": {
"vars": MockContents("roles/foobaz/baz/vars", MockContents.DIR),
"README.rst": MockContents(
"roles/foobaz/baz/README.rst", MockContents.FILE
),
},
"roles/foo/README.md": "# Just some Markdown",
"roles/bar/README.txt": "Just some simple text\nwith a line break",
"roles/foobar/README": "Simple text in a file without extension",
"roles/empty-dir/REAMDE.whatever": "This file won't be checked out",
"roles/empty-dir/README.whatever": "This file won't be checked out",
"roles/foobaz/baz/README.rst": "Simple readme file from nested role",
},
"orga1/repo3": {
REPO_ROOT: {
Expand Down Expand Up @@ -213,19 +230,24 @@ def test_scrape():
"orga1/repo2": (
{},
{
"foo": {
"bar": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/foo/README.md",
"content": "# Just some Markdown",
"path": "roles/bar/README.txt",
"content": "Just some simple text\nwith a line break",
},
"changelog_file": None,
},
"bar": {
"empty-dir": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": None,
"changelog_file": None,
},
"foo": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/bar/README.txt",
"content": "Just some simple text\nwith a line break",
"path": "roles/foo/README.md",
"content": "# Just some Markdown",
},
"changelog_file": None,
},
Expand All @@ -237,9 +259,12 @@ def test_scrape():
},
"changelog_file": None,
},
"empty-dir": {
"foobaz/baz": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": None,
"readme_file": {
"path": "roles/foobaz/baz/README.rst",
"content": "Simple readme file from nested role",
},
"changelog_file": None,
},
},
Expand Down
35 changes: 35 additions & 0 deletions tests/scraper/test_repo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def test_parse(repo_data):
job_5 = jobs[4]
role_1 = [r for r in roles if r["role_name"] == "foo"][0]
role_2 = [r for r in roles if r["role_name"] == "bar"][0]
role_3 = [r for r in roles if r["role_name"] == "foobar/baz"][0]

expected_job_1 = {
"job_name": "my-cool-new-job",
Expand Down Expand Up @@ -215,6 +216,39 @@ def test_parse(repo_data):
"last_updated": "2018-09-17 15:15:15",
}

expected_role_3 = {
"role_name": "foobar/baz",
"repo": "my/project",
"tenants": ["foo", "bar"],
"description": (
"Yet another simple description\n\n"
"**Role variables**\n\n"
".. zuul:rolevar:: my_mandatory_variable\n\n"
" This variable is mandatory.\n"
),
"description_html": (
"<p>Yet another simple description</p>\n"
"<p><strong>Role variables</strong></p>\n"
'<dl class="zuul rolevar">\n'
'<dt class="sig sig-object zuul" id="rolevar-my_mandatory_variable">\n'
'<span class="sig-name descname">'
'<span class="pre">my_mandatory_variable</span>'
"</span>"
'<a class="headerlink" href="#rolevar-my_mandatory_variable" '
'title="Link to this definition">'
"¶</a><br /></dt>\n"
"<dd><p>This variable is mandatory.</p>\n"
"</dd></dl>\n"
"\n"
),
"url": "https://github/my/project/tree/master/roles/foobar/baz",
"private": False,
"platforms": [],
"reusable": False,
"scrape_time": scrape_time,
"last_updated": "2018-09-17 15:15:15",
}

# NOTE (fschmidt): Without the skip_empty flag, empty (= None) keys will
# be stripped from the resulting dict.
assert job_1.to_dict(skip_empty=False) == expected_job_1
Expand All @@ -224,6 +258,7 @@ def test_parse(repo_data):
assert job_5.to_dict(skip_empty=False) == expected_job_5
assert role_1.to_dict(skip_empty=False) == expected_role_1
assert role_2.to_dict(skip_empty=False) == expected_role_2
assert role_3.to_dict(skip_empty=False) == expected_role_3


def test_parse_reusable_repo(repo_data):
Expand Down
7 changes: 7 additions & 0 deletions tests/testdata/repo_files/roles/foobar/baz/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Yet another simple description

**Role variables**

.. zuul:rolevar:: my_mandatory_variable
This variable is mandatory.
72 changes: 51 additions & 21 deletions zubbi/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import logging
from pathlib import Path

from zubbi.scraper.exceptions import CheckoutError

Expand All @@ -29,6 +30,19 @@

ROLES_DIRECTORY = "roles"

# Role needs to contains at least one of those directories to be considered as role
# https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_reuse_roles.html#role-directory-structure
ROLE_MANDATORY_DIRS = [
"tasks",
"handlers",
"library",
"defaults",
"vars",
"files",
"templates",
"meta",
]

REPO_ROOT = "/"


Expand Down Expand Up @@ -145,29 +159,44 @@ def get_file_info(self, path):

def scrape_role_files(self):
role_files = {}
# We are only interested in the role name (=> directory name)
# and the README and CHANGELOG files. Thus, we don't need to
# iterate recursively over the roles directory as those files
# should be on the top-level per role.
# Roles might be grouped in some parent directory, so we need to
# recursively check nested directories.

# Listing repository files is based on the example in pyGithub doc:
# https://pygithub.readthedocs.io/en/v2.1.1/examples/Repository.html#get-all-of-the-contents-of-the-repository-recursively
try:
roles = self.repo.directory_contents(ROLES_DIRECTORY)
for role_name, remote_file in roles.items():
# Ansible requires roles to be defined in a certain directory structure.
# Thus, we are only interested in directories
dirs_to_search = [
item
for item in self.repo.directory_contents(ROLES_DIRECTORY).values()
if item.type == "dir"
]
while dirs_to_search:
try:
if remote_file.type != "dir":
# Ansible requires the role to be defined in a
# certain directory structure. Thus, we can
# ignore it in case it's not a directory.
continue
last_changed = self.repo.last_changed(remote_file.path)
existing_files = self.repo.directory_contents(remote_file.path)
# Skip empty directories or files
if not existing_files:
dir = dirs_to_search.pop(0)
dir_items = self.repo.directory_contents(dir.path)

subdirs = [
item for item in dir_items.values() if item.type == "dir"
]
# When directory does not contain one of role mandatory directories
# it is not a role and its subdirectories (if any) should be further
# scanned.
# This is done by appending those subdirectories to 'dirs_to_search'
# search list (this implements the recursive search)
if set([d.name for d in subdirs]).isdisjoint(ROLE_MANDATORY_DIRS):
dirs_to_search.extend(subdirs)
continue
readme_file = self.find_matching_file(README_FILES, existing_files)
changelog_file = self.find_matching_file(
CHANGELOG_FILES, existing_files
)
role_files[role_name] = {

# Once the role is found, we are only interested in the timestamp of
# the latest update (the last git change), README and CHANGELOG files
# Those files should be on the top-level per role.
last_changed = self.repo.last_changed(dir.path)
readme_file = self.find_matching_file(README_FILES, dir_items)
changelog_file = self.find_matching_file(CHANGELOG_FILES, dir_items)
# role name is the directory path relative to ROLES_DIRECTORY
role_files[str(Path(dir.path).relative_to(ROLES_DIRECTORY))] = {
"last_changed": last_changed,
"readme_file": readme_file,
"changelog_file": changelog_file,
Expand All @@ -177,7 +206,8 @@ def scrape_role_files(self):
except CheckoutError as e:
LOGGER.debug(e)

return role_files
# sort keys (role names) alphabetically
return {key: value for key, value in sorted(role_files.items())}

def find_matching_file(self, file_filter, existing_files):
for filename, file_content in existing_files.items():
Expand Down

0 comments on commit 81decf3

Please sign in to comment.