diff --git a/oc_meta/core/curator.py b/oc_meta/core/curator.py index 9a9e4b7d..7ec26e91 100644 --- a/oc_meta/core/curator.py +++ b/oc_meta/core/curator.py @@ -116,9 +116,9 @@ def split_identifiers(self, field_value): else: return re.split(one_or_more_spaces, re.sub(colon_and_spaces, ':', field_value)) - def curator(self, filename:str=None, path_csv:str=None, path_index:str=None, worker_number: int = None): - identifiers, metavals, vvis = self.collect_identifiers(valid_dois_cache=self.valid_dois_cache) - self.finder.get_everything_about_res(identifiers, metavals, vvis, worker_number=worker_number) + def curator(self, filename:str=None, path_csv:str=None, path_index:str=None): + metavals, identifiers, vvis = self.collect_identifiers(valid_dois_cache=self.valid_dois_cache) + self.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis) for row in self.data: self.log[self.rowcnt] = { 'id': {}, diff --git a/oc_meta/lib/finder.py b/oc_meta/lib/finder.py index 94adedd3..9a7447bc 100644 --- a/oc_meta/lib/finder.py +++ b/oc_meta/lib/finder.py @@ -685,7 +685,7 @@ def retrieve_publisher_from_br_metaid(self, metaid:str): publishers_output.append(pub_full) return '; '.join(publishers_output) - def get_everything_about_res(self, metavals: set, identifiers: set, vvis: set, worker_number: int = None, max_depth: int = 4) -> None: + def get_everything_about_res(self, metavals: set, identifiers: set, vvis: set, max_depth: int = 4) -> None: BATCH_SIZE = None use_text_search = self.blazegraph_full_text_search def batch_process(input_set, batch_size): diff --git a/oc_meta/plugins/multiprocess/resp_agents_curator.py b/oc_meta/plugins/multiprocess/resp_agents_curator.py index 6b2776d9..1d0a74de 100644 --- a/oc_meta/plugins/multiprocess/resp_agents_curator.py +++ b/oc_meta/plugins/multiprocess/resp_agents_curator.py @@ -48,8 +48,8 @@ def __init__(self, data:List[dict], ts:str, prov_config:str, info_dir:str, base_ self.preexisting_entities = set() def curator(self, filename:str=None, path_csv:str=None, path_index:str=None): - identifiers, metavals, vvis = self.collect_identifiers(valid_dois_cache=dict()) - self.finder.get_everything_about_res(identifiers, metavals, vvis) + metavals, identifiers, vvis = self.collect_identifiers(valid_dois_cache=dict()) + self.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis) for row in self.data: self.log[self.rowcnt] = { 'id': {}, diff --git a/oc_meta/run/meta_process.py b/oc_meta/run/meta_process.py index e9a808ef..9e759c9b 100644 --- a/oc_meta/run/meta_process.py +++ b/oc_meta/run/meta_process.py @@ -135,7 +135,7 @@ def curate_and_create(self, filename:str, cache_path:str, errors_path:str, worke silencer=self.silencer, meta_config_path=meta_config_path) name = f"{filename.replace('.csv', '')}_{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}" - curator_obj.curator(filename=name, path_csv=self.output_csv_dir, path_index=self.indexes_dir, worker_number=int(worker_number) if worker_number else None) + curator_obj.curator(filename=name, path_csv=self.output_csv_dir, path_index=self.indexes_dir) # Creator creator_info_dir = os.path.join(self.info_dir, 'creator' + os.sep) if resp_agents_only: diff --git a/test/creator_test.py b/test/creator_test.py index abfa2f1f..a64c78ce 100644 --- a/test/creator_test.py +++ b/test/creator_test.py @@ -1,7 +1,6 @@ import json import os import unittest -from test.curator_test import reset_server from rdflib import XSD, Graph, compare from rdflib.term import _toPythonMapping @@ -9,8 +8,16 @@ from oc_meta.core.creator import * from oc_meta.lib.file_manager import get_csv_data from oc_meta.plugins.multiprocess.resp_agents_creator import RespAgentsCreator +from SPARQLWrapper import POST, SPARQLWrapper SERVER = 'http://127.0.0.1:9999/blazegraph/sparql' +SERVER = 'http://127.0.0.1:9999/blazegraph/sparql' + +def reset_server(server:str=SERVER) -> None: + ts = SPARQLWrapper(server) + ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}') + ts.setMethod(POST) + ts.query() # The following function has been added for handling gYear and gYearMonth correctly. # Source: https://github.com/opencitations/script/blob/master/ocdm/storer.py diff --git a/test/curator_test.py b/test/curator_test.py index d54a2a42..b9761de2 100644 --- a/test/curator_test.py +++ b/test/curator_test.py @@ -284,7 +284,6 @@ def test_clean_vvi_new_volume_and_issue(self): } } } - print(curator.vvi) self.assertEqual(curator.vvi, expected_output) def test_clean_ra_overlapping_surnames(self): diff --git a/test/editor_test.py b/test/editor_test.py index 33931a98..313ad1e8 100644 --- a/test/editor_test.py +++ b/test/editor_test.py @@ -18,7 +18,6 @@ import os import unittest from shutil import rmtree -from test.curator_test import reset_server import yaml from oc_ocdm import Storer @@ -28,11 +27,19 @@ from rdflib import URIRef from oc_meta.plugins.editor import MetaEditor -from oc_meta.run.meta_process import MetaProcess, run_meta_process +from oc_meta.run.meta_process import run_meta_process +from SPARQLWrapper import POST, SPARQLWrapper BASE = os.path.join('test', 'editor') OUTPUT = os.path.join(BASE, 'output') META_CONFIG = os.path.join(BASE, 'meta_config.yaml') +SERVER = 'http://127.0.0.1:9999/blazegraph/sparql' + +def reset_server(server:str=SERVER) -> None: + ts = SPARQLWrapper(server) + ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}') + ts.setMethod(POST) + ts.query() class TestEditor(unittest.TestCase): def setUp(self): diff --git a/test/endgame_test.py b/test/endgame_test.py index 7c555a37..91efdc0f 100644 --- a/test/endgame_test.py +++ b/test/endgame_test.py @@ -14,16 +14,18 @@ # SOFTWARE. import os -import subprocess import unittest -from sys import platform -from test.curator_test import reset_server +from SPARQLWrapper import POST, SPARQLWrapper -import yaml - -from oc_meta.run.meta_process import MetaProcess, run_meta_process BASE_DIR = os.path.join('test', 'endgame') +SERVER = 'http://127.0.0.1:9999/blazegraph/sparql' + +def reset_server(server:str=SERVER) -> None: + ts = SPARQLWrapper(server) + ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}') + ts.setMethod(POST) + ts.query() # class test_Endgame(unittest.TestCase): # def test_endgame(self): diff --git a/test/meta_process_test.py b/test/meta_process_test.py index b9dafd13..c36eb6d7 100644 --- a/test/meta_process_test.py +++ b/test/meta_process_test.py @@ -5,18 +5,24 @@ import sys import unittest from datetime import datetime -from test.curator_test import reset_server from zipfile import ZipFile import rdflib import yaml from rdflib import ConjunctiveGraph, Graph, Literal, Namespace, URIRef -from SPARQLWrapper import JSON, SPARQLWrapper +from SPARQLWrapper import JSON, POST, SPARQLWrapper from oc_meta.lib.file_manager import get_csv_data from oc_meta.run.meta_process import merge_rdf_files, run_meta_process BASE_DIR = os.path.join('test', 'meta_process') +SERVER = 'http://127.0.0.1:9999/blazegraph/sparql' + +def reset_server(server:str=SERVER) -> None: + ts = SPARQLWrapper(server) + ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}') + ts.setMethod(POST) + ts.query() def delete_output_zip(base_dir:str, start_time:datetime) -> None: for file in os.listdir(base_dir):