From 701fb4a71763f67891fa3e1e16db100fe9b50837 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Tue, 28 May 2024 19:47:23 -0300 Subject: [PATCH 1/4] allowed i7 harvesting without checking certs --- i7Import/get_islandora_7_content.py | 4 +++- i7Import/i7ImportUtilities.py | 14 +++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/i7Import/get_islandora_7_content.py b/i7Import/get_islandora_7_content.py index aec95fa..5cbaeb9 100755 --- a/i7Import/get_islandora_7_content.py +++ b/i7Import/get_islandora_7_content.py @@ -22,7 +22,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--config", required=True, help="Configuration file to use.") parser.add_argument( - "--metadata_solr_request", required=False, help="Option to solr metadata request." + "--metadata_solr_request", required=False, help="Option to supply solr metadata request." ) args = parser.parse_args() utils = i7ImportUtilities(args.config) @@ -44,6 +44,8 @@ metadata_solr_request = utils.get_metadata_solr_request(args.metadata_solr_request) else: metadata_solr_request = utils.get_default_metadata_solr_request() +if config['secure_ssl_only']: + requests.packages.urllib3.disable_warnings() if config["debug"]: pretty_print = metadata_solr_request.replace("&", "\n&") print(f"Solr request: {pretty_print}") diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 8aab6f2..e60b4c1 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -55,6 +55,7 @@ def __init__(self, config_location): "solr_filters": False, "start": 0, "rows": 100000, + "secure_ssl_only": True, } def get_config(self): @@ -97,7 +98,9 @@ def parse_rels_ext(self, pid): print(f"\n{rels_ext_url}") try: rels_ext_download_response = requests.get( - url=rels_ext_url, allow_redirects=True + verify=self.config["secure_ssl_only"], + url=rels_ext_url, + allow_redirects=True ) if rels_ext_download_response.ok: rel_ext = {} @@ -136,7 +139,9 @@ def get_default_metadata_solr_request(self): # then used in another query to get the populated CSV data. try: field_list_response = requests.get( - url=fields_solr_url, allow_redirects=True + verify=self.config["secure_ssl_only"], + url=fields_solr_url, + allow_redirects=True ) raw_field_list = field_list_response.content.decode() except requests.exceptions.RequestException as e: @@ -196,7 +201,10 @@ def get_i7_asset(self, pid, datastream): if self.config["get_file_url"]: obj_download_response = requests.head(url=obj_url, allow_redirects=True) else: - obj_download_response = requests.get(url=obj_url, allow_redirects=True) + obj_download_response = requests.get( + verify=self.config["secure_ssl_only"], + url=obj_url, + allow_redirects=True) if obj_download_response.status_code == 200: # Get MIMETYPE from 'Content-Type' header obj_mimetype = obj_download_response.headers["content-type"] From 26b863988b464d391aad764884247cabfeabd321 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Tue, 28 May 2024 19:56:54 -0300 Subject: [PATCH 2/4] formatting --- i7Import/get_islandora_7_content.py | 2 +- i7Import/i7ImportUtilities.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/i7Import/get_islandora_7_content.py b/i7Import/get_islandora_7_content.py index 5cbaeb9..aafd7bd 100755 --- a/i7Import/get_islandora_7_content.py +++ b/i7Import/get_islandora_7_content.py @@ -85,7 +85,7 @@ row_count = 0 pbar = InitBar() num_csv_rows = len(rows) -print(f"Processing {num_csv_rows -1}.") +print(f"Processing {num_csv_rows - 1}.") with open(config["csv_output_path"], "w", newline="") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=headers) writer.writeheader() diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index e60b4c1..5477b92 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -112,11 +112,11 @@ def parse_rels_ext(self, pid): ".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description" ) for x in description: - tag = x.tag[x.tag.find("}") + 1 :] + tag = x.tag[x.tag.find("}") + 1:] text = x.text if x.attrib.items(): text = next(iter(x.attrib.items()))[1] - text = text[text.find("/") + 1 :] + text = text[text.find("/") + 1:] rel_ext[tag] = text return rel_ext else: From 5862b07934e8d52f0635fd75a9839ac1ce143938 Mon Sep 17 00:00:00 2001 From: ajstanley Date: Tue, 28 May 2024 20:01:41 -0300 Subject: [PATCH 3/4] more formatting --- i7Import/get_islandora_7_content.py | 6 ++++-- i7Import/i7ImportUtilities.py | 11 ++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/i7Import/get_islandora_7_content.py b/i7Import/get_islandora_7_content.py index aafd7bd..65605cc 100755 --- a/i7Import/get_islandora_7_content.py +++ b/i7Import/get_islandora_7_content.py @@ -22,7 +22,9 @@ parser = argparse.ArgumentParser() parser.add_argument("--config", required=True, help="Configuration file to use.") parser.add_argument( - "--metadata_solr_request", required=False, help="Option to supply solr metadata request." + "--metadata_solr_request", + required=False, + help="Option to supply solr metadata request.", ) args = parser.parse_args() utils = i7ImportUtilities(args.config) @@ -44,7 +46,7 @@ metadata_solr_request = utils.get_metadata_solr_request(args.metadata_solr_request) else: metadata_solr_request = utils.get_default_metadata_solr_request() -if config['secure_ssl_only']: +if config["secure_ssl_only"]: requests.packages.urllib3.disable_warnings() if config["debug"]: pretty_print = metadata_solr_request.replace("&", "\n&") diff --git a/i7Import/i7ImportUtilities.py b/i7Import/i7ImportUtilities.py index 5477b92..e60b45e 100644 --- a/i7Import/i7ImportUtilities.py +++ b/i7Import/i7ImportUtilities.py @@ -100,7 +100,7 @@ def parse_rels_ext(self, pid): rels_ext_download_response = requests.get( verify=self.config["secure_ssl_only"], url=rels_ext_url, - allow_redirects=True + allow_redirects=True, ) if rels_ext_download_response.ok: rel_ext = {} @@ -112,11 +112,11 @@ def parse_rels_ext(self, pid): ".//{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description" ) for x in description: - tag = x.tag[x.tag.find("}") + 1:] + tag = x.tag[x.tag.find("}") + 1 :] text = x.text if x.attrib.items(): text = next(iter(x.attrib.items()))[1] - text = text[text.find("/") + 1:] + text = text[text.find("/") + 1 :] rel_ext[tag] = text return rel_ext else: @@ -141,7 +141,7 @@ def get_default_metadata_solr_request(self): field_list_response = requests.get( verify=self.config["secure_ssl_only"], url=fields_solr_url, - allow_redirects=True + allow_redirects=True, ) raw_field_list = field_list_response.content.decode() except requests.exceptions.RequestException as e: @@ -204,7 +204,8 @@ def get_i7_asset(self, pid, datastream): obj_download_response = requests.get( verify=self.config["secure_ssl_only"], url=obj_url, - allow_redirects=True) + allow_redirects=True, + ) if obj_download_response.status_code == 200: # Get MIMETYPE from 'Content-Type' header obj_mimetype = obj_download_response.headers["content-type"] From 7cdc8e69447801d588e98680b19d01aabfaec39d Mon Sep 17 00:00:00 2001 From: ajstanley Date: Wed, 29 May 2024 00:01:20 -0300 Subject: [PATCH 4/4] noobie logic error --- i7Import/get_islandora_7_content.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/i7Import/get_islandora_7_content.py b/i7Import/get_islandora_7_content.py index 65605cc..c2ae749 100755 --- a/i7Import/get_islandora_7_content.py +++ b/i7Import/get_islandora_7_content.py @@ -46,7 +46,7 @@ metadata_solr_request = utils.get_metadata_solr_request(args.metadata_solr_request) else: metadata_solr_request = utils.get_default_metadata_solr_request() -if config["secure_ssl_only"]: +if config["secure_ssl_only"] is False: requests.packages.urllib3.disable_warnings() if config["debug"]: pretty_print = metadata_solr_request.replace("&", "\n&")