Skip to content

Commit

Permalink
respagentcurator fix
Browse files Browse the repository at this point in the history
  • Loading branch information
arcangelo7 committed May 16, 2024
1 parent 886c43e commit 8588bdb
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 19 deletions.
6 changes: 3 additions & 3 deletions oc_meta/core/curator.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ def split_identifiers(self, field_value):
else:
return re.split(one_or_more_spaces, re.sub(colon_and_spaces, ':', field_value))

def curator(self, filename:str=None, path_csv:str=None, path_index:str=None, worker_number: int = None):
identifiers, metavals, vvis = self.collect_identifiers(valid_dois_cache=self.valid_dois_cache)
self.finder.get_everything_about_res(identifiers, metavals, vvis, worker_number=worker_number)
def curator(self, filename:str=None, path_csv:str=None, path_index:str=None):
metavals, identifiers, vvis = self.collect_identifiers(valid_dois_cache=self.valid_dois_cache)
self.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
for row in self.data:
self.log[self.rowcnt] = {
'id': {},
Expand Down
2 changes: 1 addition & 1 deletion oc_meta/lib/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def retrieve_publisher_from_br_metaid(self, metaid:str):
publishers_output.append(pub_full)
return '; '.join(publishers_output)

def get_everything_about_res(self, metavals: set, identifiers: set, vvis: set, worker_number: int = None, max_depth: int = 4) -> None:
def get_everything_about_res(self, metavals: set, identifiers: set, vvis: set, max_depth: int = 4) -> None:
BATCH_SIZE = None
use_text_search = self.blazegraph_full_text_search
def batch_process(input_set, batch_size):
Expand Down
4 changes: 2 additions & 2 deletions oc_meta/plugins/multiprocess/resp_agents_curator.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def __init__(self, data:List[dict], ts:str, prov_config:str, info_dir:str, base_
self.preexisting_entities = set()

def curator(self, filename:str=None, path_csv:str=None, path_index:str=None):
identifiers, metavals, vvis = self.collect_identifiers(valid_dois_cache=dict())
self.finder.get_everything_about_res(identifiers, metavals, vvis)
metavals, identifiers, vvis = self.collect_identifiers(valid_dois_cache=dict())
self.finder.get_everything_about_res(metavals=metavals, identifiers=identifiers, vvis=vvis)
for row in self.data:
self.log[self.rowcnt] = {
'id': {},
Expand Down
2 changes: 1 addition & 1 deletion oc_meta/run/meta_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def curate_and_create(self, filename:str, cache_path:str, errors_path:str, worke
silencer=self.silencer,
meta_config_path=meta_config_path)
name = f"{filename.replace('.csv', '')}_{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}"
curator_obj.curator(filename=name, path_csv=self.output_csv_dir, path_index=self.indexes_dir, worker_number=int(worker_number) if worker_number else None)
curator_obj.curator(filename=name, path_csv=self.output_csv_dir, path_index=self.indexes_dir)
# Creator
creator_info_dir = os.path.join(self.info_dir, 'creator' + os.sep)
if resp_agents_only:
Expand Down
9 changes: 8 additions & 1 deletion test/creator_test.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
import json
import os
import unittest
from test.curator_test import reset_server

from rdflib import XSD, Graph, compare
from rdflib.term import _toPythonMapping

from oc_meta.core.creator import *
from oc_meta.lib.file_manager import get_csv_data
from oc_meta.plugins.multiprocess.resp_agents_creator import RespAgentsCreator
from SPARQLWrapper import POST, SPARQLWrapper

SERVER = 'http://127.0.0.1:9999/blazegraph/sparql'
SERVER = 'http://127.0.0.1:9999/blazegraph/sparql'

def reset_server(server:str=SERVER) -> None:
ts = SPARQLWrapper(server)
ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}')
ts.setMethod(POST)
ts.query()

# The following function has been added for handling gYear and gYearMonth correctly.
# Source: https://github.com/opencitations/script/blob/master/ocdm/storer.py
Expand Down
1 change: 0 additions & 1 deletion test/curator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ def test_clean_vvi_new_volume_and_issue(self):
}
}
}
print(curator.vvi)
self.assertEqual(curator.vvi, expected_output)

def test_clean_ra_overlapping_surnames(self):
Expand Down
11 changes: 9 additions & 2 deletions test/editor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import os
import unittest
from shutil import rmtree
from test.curator_test import reset_server

import yaml
from oc_ocdm import Storer
Expand All @@ -28,11 +27,19 @@
from rdflib import URIRef

from oc_meta.plugins.editor import MetaEditor
from oc_meta.run.meta_process import MetaProcess, run_meta_process
from oc_meta.run.meta_process import run_meta_process
from SPARQLWrapper import POST, SPARQLWrapper

BASE = os.path.join('test', 'editor')
OUTPUT = os.path.join(BASE, 'output')
META_CONFIG = os.path.join(BASE, 'meta_config.yaml')
SERVER = 'http://127.0.0.1:9999/blazegraph/sparql'

def reset_server(server:str=SERVER) -> None:
ts = SPARQLWrapper(server)
ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}')
ts.setMethod(POST)
ts.query()

class TestEditor(unittest.TestCase):
def setUp(self):
Expand Down
14 changes: 8 additions & 6 deletions test/endgame_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@
# SOFTWARE.

import os
import subprocess
import unittest
from sys import platform
from test.curator_test import reset_server
from SPARQLWrapper import POST, SPARQLWrapper

import yaml

from oc_meta.run.meta_process import MetaProcess, run_meta_process

BASE_DIR = os.path.join('test', 'endgame')
SERVER = 'http://127.0.0.1:9999/blazegraph/sparql'

def reset_server(server:str=SERVER) -> None:
ts = SPARQLWrapper(server)
ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}')
ts.setMethod(POST)
ts.query()

# class test_Endgame(unittest.TestCase):
# def test_endgame(self):
Expand Down
10 changes: 8 additions & 2 deletions test/meta_process_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,24 @@
import sys
import unittest
from datetime import datetime
from test.curator_test import reset_server
from zipfile import ZipFile

import rdflib
import yaml
from rdflib import ConjunctiveGraph, Graph, Literal, Namespace, URIRef
from SPARQLWrapper import JSON, SPARQLWrapper
from SPARQLWrapper import JSON, POST, SPARQLWrapper

from oc_meta.lib.file_manager import get_csv_data
from oc_meta.run.meta_process import merge_rdf_files, run_meta_process

BASE_DIR = os.path.join('test', 'meta_process')
SERVER = 'http://127.0.0.1:9999/blazegraph/sparql'

def reset_server(server:str=SERVER) -> None:
ts = SPARQLWrapper(server)
ts.setQuery('delete{?x ?y ?z} where{?x ?y ?z}')
ts.setMethod(POST)
ts.query()

def delete_output_zip(base_dir:str, start_time:datetime) -> None:
for file in os.listdir(base_dir):
Expand Down

0 comments on commit 8588bdb

Please sign in to comment.