Skip to content

Commit

Permalink
scrape_role_files: add support for nested role directories
Browse files Browse the repository at this point in the history
Changing the way how roles are scraped. Previously directories present in
default roles directory were treated as roles (dir name is role name).

Now scraper enters each directory looking for one of 8 specified subdirs.
When at least one such subdir is found, its parent is treated as role, else
subdir is added to search list (this is equivalent of traversing repository)

Adjusting tests to utilize nested role directories
  • Loading branch information
Krzysztof Swietlicki authored and Krzysztof Swietlicki committed Feb 8, 2024
1 parent bb31afe commit 1dc0fed
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 35 deletions.
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@ def repo_data():
},
"changelog_file": None,
},
"foobar/baz": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/foobar/baz/README.rst",
"content": raw_file("repo_files/roles/foobar/baz/README.rst"),
},
"changelog_file": None,
},
}

return repo, tenants, job_files, role_files
Expand Down
53 changes: 39 additions & 14 deletions tests/scraper/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class MockContents:
DIR = "dir"

def __init__(self, path, type):
self.name = path.split("/")[-1]
self.path = path
self.type = type

Expand Down Expand Up @@ -101,9 +102,10 @@ class MockGitHubRepository(GitHubRepository):
"zuul.d/jobs.yaml": MOCKED_JOB_CONTENT,
"roles": {"docker-run": MockContents("roles/docker-run", MockContents.DIR)},
"roles/docker-run": {
"tasks": MockContents("roles/docker-run/tasks", MockContents.DIR),
"README.rst": MockContents(
"roles/docker-run/README.rst", MockContents.FILE
)
),
},
"roles/docker-run/README.rst": MOCKED_ROLE_DESCRIPTION,
"roles/ignored": MockContents("not a valid role", MockContents.FILE),
Expand All @@ -117,25 +119,40 @@ class MockGitHubRepository(GitHubRepository):
"bar": MockContents("roles/bar", MockContents.DIR),
"foobar": MockContents("roles/foobar", MockContents.DIR),
"empty-dir": MockContents("roles/empty-dir", MockContents.DIR),
"foobaz": MockContents("roles/foobaz", MockContents.DIR),
},
"roles/foo": {
"README.md": MockContents("roles/foo/README.md", MockContents.FILE)
"defaults": MockContents("roles/foo/defaults", MockContents.DIR),
"README.md": MockContents("roles/foo/README.md", MockContents.FILE),
},
"roles/bar": {
"README.txt": MockContents("roles/bar/README.txt", MockContents.FILE)
"library": MockContents("roles/bar/library", MockContents.DIR),
"README.txt": MockContents("roles/bar/README.txt", MockContents.FILE),
},
"roles/foobar": {
"README": MockContents("roles/foobar/README", MockContents.FILE)
"handlers": MockContents("roles/foobar/handlers", MockContents.DIR),
"README": MockContents("roles/foobar/README", MockContents.FILE),
},
"roles/empty-dir": {
"meta": MockContents("roles/empty-dir/meta", MockContents.DIR),
"README.whatever": MockContents(
"roles/empty-dir/README.whatever", MockContents.FILE
)
),
},
"roles/foobaz": {
"baz": MockContents("roles/foobaz/baz", MockContents.DIR),
},
"roles/foobaz/baz": {
"vars": MockContents("roles/foobaz/baz/vars", MockContents.DIR),
"README.rst": MockContents(
"roles/foobaz/baz/README.rst", MockContents.FILE
),
},
"roles/foo/README.md": "# Just some Markdown",
"roles/bar/README.txt": "Just some simple text\nwith a line break",
"roles/foobar/README": "Simple text in a file without extension",
"roles/empty-dir/REAMDE.whatever": "This file won't be checked out",
"roles/empty-dir/README.whatever": "This file won't be checked out",
"roles/foobaz/baz/README.rst": "Simple readme file from nested role",
},
"orga1/repo3": {
REPO_ROOT: {
Expand Down Expand Up @@ -215,19 +232,24 @@ def test_scrape():
"orga1/repo2": (
{},
{
"foo": {
"bar": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/foo/README.md",
"content": "# Just some Markdown",
"path": "roles/bar/README.txt",
"content": "Just some simple text\nwith a line break",
},
"changelog_file": None,
},
"bar": {
"empty-dir": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": None,
"changelog_file": None,
},
"foo": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": {
"path": "roles/bar/README.txt",
"content": "Just some simple text\nwith a line break",
"path": "roles/foo/README.md",
"content": "# Just some Markdown",
},
"changelog_file": None,
},
Expand All @@ -239,9 +261,12 @@ def test_scrape():
},
"changelog_file": None,
},
"empty-dir": {
"foobaz/baz": {
"last_changed": "2018-09-17 15:15:15",
"readme_file": None,
"readme_file": {
"path": "roles/foobaz/baz/README.rst",
"content": "Simple readme file from nested role",
},
"changelog_file": None,
},
},
Expand Down
35 changes: 35 additions & 0 deletions tests/scraper/test_repo_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def test_parse(repo_data):
job_5 = jobs[4]
role_1 = [r for r in roles if r["role_name"] == "foo"][0]
role_2 = [r for r in roles if r["role_name"] == "bar"][0]
role_3 = [r for r in roles if r["role_name"] == "foobar/baz"][0]

expected_job_1 = {
"job_name": "my-cool-new-job",
Expand Down Expand Up @@ -215,6 +216,39 @@ def test_parse(repo_data):
"last_updated": "2018-09-17 15:15:15",
}

expected_role_3 = {
"role_name": "foobar/baz",
"repo": "my/project",
"tenants": ["foo", "bar"],
"description": (
"Yet another simple description\n\n"
"**Role variables**\n\n"
".. zuul:rolevar:: my_mandatory_variable\n\n"
" This variable is mandatory.\n"
),
"description_html": (
"<p>Yet another simple description</p>\n"
"<p><strong>Role variables</strong></p>\n"
'<dl class="zuul rolevar">\n'
'<dt class="sig sig-object zuul" id="rolevar-my_mandatory_variable">\n'
'<span class="sig-name descname">'
'<span class="pre">my_mandatory_variable</span>'
"</span>"
'<a class="headerlink" href="#rolevar-my_mandatory_variable" '
'title="Link to this definition">'
"¶</a><br /></dt>\n"
"<dd><p>This variable is mandatory.</p>\n"
"</dd></dl>\n"
"\n"
),
"url": "https://github/my/project/tree/master/roles/foobar/baz",
"private": False,
"platforms": [],
"reusable": False,
"scrape_time": scrape_time,
"last_updated": "2018-09-17 15:15:15",
}

# NOTE (fschmidt): Without the skip_empty flag, empty (= None) keys will
# be stripped from the resulting dict.
assert job_1.to_dict(skip_empty=False) == expected_job_1
Expand All @@ -224,6 +258,7 @@ def test_parse(repo_data):
assert job_5.to_dict(skip_empty=False) == expected_job_5
assert role_1.to_dict(skip_empty=False) == expected_role_1
assert role_2.to_dict(skip_empty=False) == expected_role_2
assert role_3.to_dict(skip_empty=False) == expected_role_3


def test_parse_reusable_repo(repo_data):
Expand Down
7 changes: 7 additions & 0 deletions tests/testdata/repo_files/roles/foobar/baz/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Yet another simple description

**Role variables**

.. zuul:rolevar:: my_mandatory_variable
This variable is mandatory.
72 changes: 51 additions & 21 deletions zubbi/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import logging
from pathlib import Path

from zubbi.scraper.exceptions import CheckoutError

Expand All @@ -29,6 +30,19 @@

ROLES_DIRECTORY = "roles"

# Role needs to contains at least one of those directories to be considered as role
# https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_reuse_roles.html#role-directory-structure
ROLE_MANDATORY_DIRS = [
"tasks",
"handlers",
"library",
"defaults",
"vars",
"files",
"templates",
"meta",
]

REPO_ROOT = "/"


Expand Down Expand Up @@ -146,29 +160,44 @@ def get_file_info(self, path):

def scrape_role_files(self):
role_files = {}
# We are only interested in the role name (=> directory name)
# and the README and CHANGELOG files. Thus, we don't need to
# iterate recursively over the roles directory as those files
# should be on the top-level per role.
# Roles might be grouped in some parent directory, so we need to
# recursively check nested directories.

# Listing repository files is based on the example in pyGithub doc:
# https://pygithub.readthedocs.io/en/v2.1.1/examples/Repository.html#get-all-of-the-contents-of-the-repository-recursively
try:
roles = self.repo.directory_contents(ROLES_DIRECTORY)
for role_name, remote_file in roles.items():
# Ansible requires roles to be defined in a certain directory structure.
# Thus, we are only interested in directories
dirs_to_search = [
item
for item in self.repo.directory_contents(ROLES_DIRECTORY).values()
if item.type == "dir"
]
while dirs_to_search:
try:
if remote_file.type != "dir":
# Ansible requires the role to be defined in a
# certain directory structure. Thus, we can
# ignore it in case it's not a directory.
continue
last_changed = self.repo.last_changed(remote_file.path)
existing_files = self.repo.directory_contents(remote_file.path)
# Skip empty directories or files
if not existing_files:
dir = dirs_to_search.pop(0)
dir_items = self.repo.directory_contents(dir.path)

subdirs = [
item for item in dir_items.values() if item.type == "dir"
]
# When directory does not contain one of role mandatory directories
# it is not a role and its subdirectories (if any) should be further
# scanned.
# This is done by appending those subdirectories to 'dirs_to_search'
# search list (this implements the recursive search)
if set([d.name for d in subdirs]).isdisjoint(ROLE_MANDATORY_DIRS):
dirs_to_search.extend(subdirs)
continue
readme_file = self.find_matching_file(README_FILES, existing_files)
changelog_file = self.find_matching_file(
CHANGELOG_FILES, existing_files
)
role_files[role_name] = {

# Once the role is found, we are only interested in the timestamp of
# the latest update (the last git change), README and CHANGELOG files
# Those files should be on the top-level per role.
last_changed = self.repo.last_changed(dir.path)
readme_file = self.find_matching_file(README_FILES, dir_items)
changelog_file = self.find_matching_file(CHANGELOG_FILES, dir_items)
# role name is the directory path relative to ROLES_DIRECTORY
role_files[str(Path(dir.path).relative_to(ROLES_DIRECTORY))] = {
"last_changed": last_changed,
"readme_file": readme_file,
"changelog_file": changelog_file,
Expand All @@ -178,7 +207,8 @@ def scrape_role_files(self):
except CheckoutError as e:
LOGGER.debug(e)

return role_files
# sort keys (role names) alphabetically
return {key: value for key, value in sorted(role_files.items())}

def find_matching_file(self, file_filter, existing_files):
for filename, file_content in existing_files.items():
Expand Down

0 comments on commit 1dc0fed

Please sign in to comment.