From 33b876186021f87a8650396c0d90d137f4db0e07 Mon Sep 17 00:00:00 2001 From: inishchith Date: Fri, 14 Jun 2019 10:56:01 +0530 Subject: [PATCH] [graal] Add support of Graal's CoCom Backend to ELK Add support of Graal's CoCom backend to ELK and it's corresponding tests Signed-off-by: inishchith --- grimoire_elk/enriched/cocom.py | 171 +++++++++++++++++++++++++++++++++ grimoire_elk/raw/cocom.py | 71 ++++++++++++++ grimoire_elk/utils.py | 5 + requirements.txt | 1 + tests/data/cocom.json | 143 +++++++++++++++++++++++++++ tests/test_cocom.py | 111 +++++++++++++++++++++ 6 files changed, 502 insertions(+) create mode 100644 grimoire_elk/enriched/cocom.py create mode 100644 grimoire_elk/raw/cocom.py create mode 100644 tests/data/cocom.json create mode 100644 tests/test_cocom.py diff --git a/grimoire_elk/enriched/cocom.py b/grimoire_elk/enriched/cocom.py new file mode 100644 index 000000000..b44e8418f --- /dev/null +++ b/grimoire_elk/enriched/cocom.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2019 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Authors: +# Valerio Cosentino +# Nishchith Shetty +# + +import logging + +from .enrich import Enrich, metadata +from grimoirelab_toolkit.datetime import str_to_datetime + +MAX_SIZE_BULK_ENRICHED_ITEMS = 200 + +logger = logging.getLogger(__name__) + + +class CocomEnrich(Enrich): + + def get_identities(self, item): + """ Return the identities from an item """ + identities = [] + + return identities + + def has_identities(self): + """ Return whether the enriched items contains identities """ + + return False + + def get_field_unique_id(self): + return "id" + + def extract_modules(self, file_path): + """ Extracts module path from the given file path """ + path_chunks = file_path.split('/') + + modules = [] + for idx in range(len(path_chunks)): + sub_path = '/'.join(path_chunks[:idx]) + + if sub_path: + modules.append(sub_path) + + return modules + + @metadata + def get_rich_item(self, file_analysis): + + eitem = {} + + eitem['ccn'] = file_analysis.get("ccn", None) + eitem['num_funs'] = file_analysis.get("num_funs", None) + eitem['tokens'] = file_analysis.get("tokens", None) + eitem['loc'] = file_analysis.get("loc", None) + eitem['ext'] = file_analysis.get("ext", None) + eitem['in_commit'] = file_analysis.get("in_commit", None) + eitem['blanks'] = file_analysis.get("blanks", None) + eitem['comments'] = file_analysis.get("comments", None) + eitem['file_path'] = file_analysis.get("file_path", None) + eitem['modules'] = self.extract_modules(eitem['file_path']) + eitem = self.__add_derived_metrics(file_analysis, eitem) + + return eitem + + def get_rich_items(self, item): + # The real data + entry = item['data'] + + enriched_items = [] + + for file_analysis in entry["analysis"]: + eitem = self.get_rich_item(file_analysis) + + for f in self.RAW_FIELDS_COPY: + if f in item: + eitem[f] = item[f] + else: + eitem[f] = None + + # common attributes + eitem['commit_sha'] = entry['commit'] + eitem['author'] = entry['Author'] + eitem['committer'] = entry['Commit'] + eitem['commit'] = entry['commit'] + eitem['message'] = entry['message'] + eitem['author_date'] = self.__fix_field_date(entry['AuthorDate']) + eitem['commit_date'] = self.__fix_field_date(entry['CommitDate']) + + if self.prjs_map: + eitem.update(self.get_item_project(eitem)) + + # uuid + eitem['id'] = "{}_{}".format(eitem['commit_sha'], eitem['file_path']) + + eitem.update(self.get_grimoire_fields(entry["AuthorDate"], "file")) + + self.add_repository_labels(eitem) + self.add_metadata_filter_raw(eitem) + + enriched_items.append(eitem) + + return enriched_items + + def __add_derived_metrics(self, file_analysis, eitem): + """ Add derived metrics fields """ + if eitem['loc']: + total_lines = eitem['loc'] + eitem['comments'] + eitem['blanks'] + eitem["comments_ratio"] = eitem['comments'] / total_lines + eitem["blanks_ratio"] = eitem['blanks'] / total_lines + else: + eitem["comments_ratio"] = eitem['comments'] + eitem["blanks_ratio"] = eitem['blanks'] + + return eitem + + def enrich_items(self, ocean_backend, events=False): + items_to_enrich = [] + num_items = 0 + ins_items = 0 + + for item in ocean_backend.fetch(): + rich_items = self.get_rich_items(item) + + items_to_enrich.extend(rich_items) + if len(items_to_enrich) < MAX_SIZE_BULK_ENRICHED_ITEMS: + continue + + num_items += len(items_to_enrich) + ins_items += self.elastic.bulk_upload(items_to_enrich, self.get_field_unique_id()) + items_to_enrich = [] + + if len(items_to_enrich) > 0: + num_items += len(items_to_enrich) + ins_items += self.elastic.bulk_upload(items_to_enrich, self.get_field_unique_id()) + + if num_items != ins_items: + missing = num_items - ins_items + logger.error("%s/%s missing items for Cocom", str(missing), str(num_items)) + else: + logger.info("%s items inserted for Cocom", str(num_items)) + + return num_items + + def __fix_field_date(self, date_value): + """Fix possible errors in the field date""" + + field_date = str_to_datetime(date_value) + + try: + _ = int(field_date.strftime("%z")[0:3]) + except ValueError: + field_date = field_date.replace(tzinfo=None) + + return field_date.isoformat() diff --git a/grimoire_elk/raw/cocom.py b/grimoire_elk/raw/cocom.py new file mode 100644 index 000000000..f32af8ebd --- /dev/null +++ b/grimoire_elk/raw/cocom.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2019 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Authors: +# Valerio Cosentino +# Nishchith Shetty +# + +from .elastic import ElasticOcean +from ..elastic_mapping import Mapping as BaseMapping + + +class Mapping(BaseMapping): + + @staticmethod + def get_elastic_mappings(es_major): + """Get Elasticsearch mapping. + + Ensure data.message is string, since it can be very large + + :param es_major: major version of Elasticsearch, as string + :returns: dictionary with a key, 'items', with the mapping + """ + + mapping = ''' + { + "dynamic":true, + "properties": { + "data": { + "properties": { + "message": { + "type": "text", + "index": true + } + } + } + } + } + ''' + + return {"items": mapping} + + +class CocomOcean(ElasticOcean): + """CoCom Ocean feeder""" + + mapping = Mapping + + @classmethod + def get_perceval_params_from_url(cls, url): + params = [] + tokens = url.split(' ', 1) # Just split the URL not the filter + url = tokens[0] + params.append(url) + + return params diff --git a/grimoire_elk/utils.py b/grimoire_elk/utils.py index d4fa7dd16..c714b4b3a 100755 --- a/grimoire_elk/utils.py +++ b/grimoire_elk/utils.py @@ -29,6 +29,8 @@ from grimoire_elk.elastic import ElasticConnectException from grimoire_elk.elastic import ElasticSearch +# Connectors for Graal +from graal.backends.core.cocom import CoCom, CoComCommand # Connectors for Perceval from grimoire_elk.raw.hyperkitty import HyperKittyOcean from perceval.backends.core.askbot import Askbot, AskbotCommand @@ -68,6 +70,7 @@ from perceval.backends.mozilla.remo import ReMo, ReMoCommand from perceval.backends.opnfv.functest import Functest, FunctestCommand # Connectors for EnrichOcean +from .enriched.cocom import CocomEnrich from .enriched.askbot import AskbotEnrich from .enriched.bugzilla import BugzillaEnrich from .enriched.bugzillarest import BugzillaRESTEnrich @@ -105,6 +108,7 @@ from .enriched.telegram import TelegramEnrich from .enriched.twitter import TwitterEnrich # Connectors for Ocean +from .raw.cocom import CocomOcean from .raw.askbot import AskbotOcean from .raw.bugzilla import BugzillaOcean from .raw.bugzillarest import BugzillaRESTOcean @@ -200,6 +204,7 @@ def get_connectors(): return {"askbot": [Askbot, AskbotOcean, AskbotEnrich, AskbotCommand], "bugzilla": [Bugzilla, BugzillaOcean, BugzillaEnrich, BugzillaCommand], "bugzillarest": [BugzillaREST, BugzillaRESTOcean, BugzillaRESTEnrich, BugzillaRESTCommand], + "cocom": [CoCom, CocomOcean, CocomEnrich, CoComCommand], "confluence": [Confluence, ConfluenceOcean, ConfluenceEnrich, ConfluenceCommand], "crates": [Crates, CratesOcean, CratesEnrich, CratesCommand], "discourse": [Discourse, DiscourseOcean, DiscourseEnrich, DiscourseCommand], diff --git a/requirements.txt b/requirements.txt index 05c5b3b15..3a340b687 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ urllib3==1.24.3 -e git+https://github.com/chaoss/grimoirelab-cereslib/#egg=grimoirelab-cereslib -e git+https://github.com/chaoss/grimoirelab-kingarthur/#egg=grimoirelab-kingarthur -e git+https://github.com/chaoss/grimoirelab-perceval/#egg=grimoirelab-perceval +-e git+https://github.com/chaoss/grimoirelab-graal/#egg=grimoirelab-graal -e git+https://github.com/chaoss/grimoirelab-perceval-mozilla/#egg=grimoirelab-perceval-mozilla -e git+https://github.com/chaoss/grimoirelab-perceval-opnfv/#egg=grimoirelab-perceval-opnfv -e git+https://github.com/chaoss/grimoirelab-perceval-puppet/#egg=grimoirelab-perceval-puppet diff --git a/tests/data/cocom.json b/tests/data/cocom.json new file mode 100644 index 000000000..49e8a744f --- /dev/null +++ b/tests/data/cocom.json @@ -0,0 +1,143 @@ +[{ + "backend_name": "CoCom", + "backend_version": "0.2.5", + "category": "code_complexity_lizard_file", + "data": { + "Author": "Valerio Cosentino ", + "AuthorDate": "Sun Jun 2 18:34:23 2019 +0200", + "Commit": "Valerio Cosentino ", + "CommitDate": "Sun Jun 2 18:34:23 2019 +0200", + "Signed-off-by": ["Valerio Cosentino "], + "analysis": [{ + "avg_ccn": 2.4193548387096775, + "avg_loc": 8.419354838709678, + "avg_tokens": 60.96774193548387, + "blanks": 158, + "ccn": 75, + "comments": 193, + "ext": "py", + "file_path": "graal/graal.py", + "loc": 372, + "num_funs": 31, + "tokens": 2207 + }], + "analyzer": "lizard_file", + "commit": "692ed86f888d2e7a5ce81a5b8a90f47d05cc5588", + "message": "[graal] Derive `git_path` from `uri`\n \n This code derives the `git_path` of a target repository\n based on its `uri`. This change is needed to allow the\n execution from mordred/ELK, as done with Perceval.\n\n Signed-off-by: Valerio Cosentino " + }, + "graal_version": "0.2.1", + "origin": "https://github.com/chaoss/grimoirelab-graal", + "tag": "https://github.com/chaoss/grimoirelab-graal", + "timestamp": 1562053790.544543, + "updated_on": 1559493263.0, + "uuid": "f86b37d493386ec7467976ff5a707d9c72c54cf9" + }, + { + "backend_name": "CoCom", + "backend_version": "0.2.5", + "category": "code_complexity_lizard_file", + "data": { + "Author": "inishchith ", + "AuthorDate": "Mon Jun 3 22:44:15 2019 +0530", + "Commit": "inishchith ", + "CommitDate": "Mon Jun 3 22:47:27 2019 +0530", + "Signed-off-by": ["inishchith "], + "analysis": [{ + "avg_ccn": 1.3461538461538463, + "avg_loc": 8.826923076923077, + "avg_tokens": 81.92307692307692, + "blanks": 204, + "ccn": 70, + "comments": 77, + "ext": "py", + "file_path": "tests/test_graal.py", + "loc": 527, + "num_funs": 52, + "tokens": 4623 + }], + "analyzer": "lizard_file", + "commit": "41f207a9349ae497055ac03157d9915ae81031e0", + "message": "[tests] Add test for deriving `git_path` from `uri`\n \n Signed-off-by: inishchith " + }, + "graal_version": "0.2.1", + "origin": "https://github.com/chaoss/grimoirelab-graal", + "tag": "https://github.com/chaoss/grimoirelab-graal", + "timestamp": 1562053790.902134, + "updated_on": 1559582247.0, + "uuid": "fc17ad9f41767d66c4d2aed6d4b0ba5d072c9980" + }, + { + "backend_name": "CoCom", + "backend_version": "0.2.5", + "category": "code_complexity_lizard_file", + "data": { + "Author": "Valerio Cosentino ", + "AuthorDate": "Thu Jun 27 09:25:50 2019 +0200", + "Commit": "Valerio Cosentino ", + "CommitDate": "Thu Jun 27 09:25:50 2019 +0200", + "Merge": "5a526a6 26921fe", + "analysis": [{ + "avg_ccn": 2.6666666666666665, + "avg_loc": 19.333333333333332, + "avg_tokens": 129.66666666666666, + "blanks": 26, + "ccn": 8, + "comments": 63, + "ext": "py", + "file_path": "graal/backends/core/analyzers/lizard.py", + "loc": 80, + "num_funs": 3, + "tokens": 421 + }, + { + "avg_ccn": 2.6363636363636362, + "avg_loc": 8.818181818181818, + "avg_tokens": 57.63636363636363, + "blanks": 58, + "ccn": 29, + "comments": 107, + "ext": "py", + "file_path": "graal/backends/core/cocom.py", + "loc": 178, + "num_funs": 11, + "tokens": 938 + }, + { + "avg_ccn": 1.4, + "avg_loc": 13.533333333333333, + "avg_tokens": 127.26666666666667, + "blanks": 71, + "ccn": 21, + "comments": 39, + "ext": "py", + "file_path": "tests/test_cocom.py", + "loc": 234, + "num_funs": 15, + "tokens": 2056 + }, + { + "avg_ccn": 1.3333333333333333, + "avg_loc": 26.666666666666668, + "avg_tokens": 269.3333333333333, + "blanks": 17, + "ccn": 4, + "comments": 25, + "ext": "py", + "file_path": "tests/test_lizard.py", + "loc": 89, + "num_funs": 3, + "tokens": 852 + } + ], + "analyzer": "lizard_file", + "commit": "bfe91c3f9ca046084143f15e117bdd691e0fe12f", + "message": "Merge branch repository_level_cocom_lizard of https: //github.com/inishchith/graal\n \nMerges #39" + }, + "graal_version": "0.2.1", + "origin": "https://github.com/chaoss/grimoirelab-graal", + "tag": "https://github.com/chaoss/grimoirelab-graal", + "timestamp": 1562053800.728394, + "updated_on": 1561620350.0, + "uuid": "49a416e4ab44e6f3b02eb96b08a026abdb6afa96" + } +] \ No newline at end of file diff --git a/tests/test_cocom.py b/tests/test_cocom.py new file mode 100644 index 000000000..62fab32c5 --- /dev/null +++ b/tests/test_cocom.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2015-2019 Bitergia +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Authors: +# Nishchith Shetty +# +import logging +import unittest + +from base import TestBaseBackend + + +HEADER_JSON = {"Content-Type": "application/json"} + + +class TestCoCom(TestBaseBackend): + """Test CoCom backend""" + + connector = "cocom" + ocean_index = "test_" + connector + enrich_index = "test_" + connector + "_enrich" + + def test_has_identites(self): + """Test value of has_identities method""" + + enrich_backend = self.connectors[self.connector][2]() + self.assertFalse(enrich_backend.has_identities()) + + def test_items_to_raw(self): + """Test whether JSON items are properly inserted into ES""" + + result = self._test_items_to_raw() + + self.assertGreater(result['items'], 0) + self.assertGreater(result['raw'], 0) + self.assertGreaterEqual(result['items'], result['raw']) + + def test_raw_to_enrich(self): + """Test whether the raw index is properly enriched""" + + result = self._test_raw_to_enrich() + + self.assertGreater(result['raw'], 0) + self.assertGreater(result['enrich'], 0) + self.assertGreaterEqual(result['enrich'], result['raw']) + + enrich_backend = self.connectors[self.connector][2]() + + item = self.items[0] + eitem = enrich_backend.get_rich_items(item)[0] + self.assertEqual(eitem['ccn'], 75) + self.assertEqual(eitem['num_funs'], 31) + self.assertEqual(eitem['tokens'], 2207) + self.assertEqual(eitem['loc'], 372) + self.assertEqual(eitem['ext'], "py") + self.assertEqual(eitem['blanks'], 158) + self.assertEqual(eitem['comments'], 193) + self.assertEqual(eitem['file_path'], "graal/graal.py") + self.assertEqual(eitem['modules'], ["graal"]) + self.assertEqual(eitem["comments_ratio"], 0.2669432918395574) + self.assertEqual(eitem["blanks_ratio"], 0.21853388658367912) + + item = self.items[1] + eitem = enrich_backend.get_rich_items(item)[0] + self.assertEqual(eitem['ccn'], 70) + self.assertEqual(eitem['num_funs'], 52) + self.assertEqual(eitem['tokens'], 4623) + self.assertEqual(eitem['loc'], 527) + self.assertEqual(eitem['ext'], "py") + self.assertEqual(eitem['blanks'], 204) + self.assertEqual(eitem['comments'], 77) + self.assertEqual(eitem['file_path'], "tests/test_graal.py") + self.assertEqual(eitem['modules'], ["tests"]) + self.assertEqual(eitem["comments_ratio"], 0.0952970297029703) + self.assertEqual(eitem["blanks_ratio"], 0.2524752475247525) + + item = self.items[2] + eitem = enrich_backend.get_rich_items(item)[0] + self.assertEqual(eitem['ccn'], 8) + self.assertEqual(eitem['num_funs'], 3) + self.assertEqual(eitem['tokens'], 421) + self.assertEqual(eitem['loc'], 80) + self.assertEqual(eitem['ext'], "py") + self.assertEqual(eitem['blanks'], 26) + self.assertEqual(eitem['comments'], 63) + self.assertEqual(eitem['file_path'], "graal/backends/core/analyzers/lizard.py") + self.assertEqual(eitem['modules'], ["graal", "graal/backends", "graal/backends/core", "graal/backends/core/analyzers"]) + self.assertEqual(eitem["comments_ratio"], 0.3727810650887574) + self.assertEqual(eitem["blanks_ratio"], 0.15384615384615385) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + logging.getLogger("urllib3").setLevel(logging.WARNING) + logging.getLogger("requests").setLevel(logging.WARNING) + unittest.main(warnings='ignore')