Skip to content

Commit

Permalink
adjustments merge duplicates #74
Browse files Browse the repository at this point in the history
  • Loading branch information
FedorSteeman committed Dec 9, 2022
1 parent 881fb10 commit 158fd03
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 29 deletions.
39 changes: 21 additions & 18 deletions MassDigitizer/GBIF_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,25 +83,28 @@ def matchName(self, object_name, taxon_name, collection_id, kingdom=''):
# Fetch possible alternatives with matching taxon names
urlString = self.baseURL + f'{object_name}/match?verbose=true&kingdom={kingdom}&name={taxon_name}'
print(urlString)
response = self.spSession.get(urlString)
try:
response = self.spSession.get(urlString)
# If succesful, load response into json object
if response.status_code < 299:
result = json.loads(response.text)

# If succesful, load response into json object
if response.status_code < 299:
result = json.loads(response.text)

# Also add main entry
if 'usageKey' in result:
mainSpecies = self.fetchSpecies(int(result['usageKey']))
acceptedNames.append(mainSpecies)

# Check for suggested alternatives and add to accepted names list, thereby removing synonyms
if 'alternatives' in result:
matches = result['alternatives']
for m in matches:
#print(m['usageKey'])
if 'matchtype' in m and 'status' in m:
if m['matchType'] == 'EXACT' and (m['status'] == 'ACCEPTED' or m['status'] == 'DOUBTFUL'):
acceptedNames.append(self.getSpecies(int(m['usageKey'])))
# Also add main entry
if 'usageKey' in result:
mainSpecies = self.fetchSpecies(int(result['usageKey']))
acceptedNames.append(mainSpecies)

# Check for suggested alternatives and add to accepted names list, thereby removing synonyms
if 'alternatives' in result:
matches = result['alternatives']
for m in matches:
#print(m['usageKey'])
if 'matchtype' in m and 'status' in m:
if m['matchType'] == 'EXACT' and (m['status'] == 'ACCEPTED' or m['status'] == 'DOUBTFUL'):
acceptedNames.append(self.getSpecies(int(m['usageKey'])))
except:
print("Error occurred fetching accepting names at GBIF API!")
pass

return acceptedNames

Expand Down
7 changes: 6 additions & 1 deletion MassDigitizer/models/taxon.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,12 @@ def loadPredefinedData(self):

def getParent(self, specify_interface):
self.parent = Taxon(self.collectionId)
self.parent.fill(specify_interface.getSpecifyObject(self.sptype, self.parentId))
try:
parentTaxonObj = specify_interface.getSpecifyObject(self.sptype, self.parentId)
self.parent.fill(parentTaxonObj)
except:
print("ERROR: Failed to retrieve parent taxon.")
pass
return self.parent

def getParentage(self, specify_interface):
Expand Down
2 changes: 1 addition & 1 deletion MassDigitizer/specify_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def getSpecifyObjects(self, objectName, limit=100, offset=0, filters={}):
for key in filters:
filterString += '&' + key + '=' + filters[key]
apiCallString = f'{gs.baseURL}api/specify/{objectName}/?limit={limit}&offset={offset}{filterString}'
#print(" -> " + apiCallString)
#if limit == 1000: print("" + apiCallString)

response = self.spSession.get(apiCallString, headers=headers)
#print(' - Response: %s %s' %(str(response.status_code), response.reason))
Expand Down
18 changes: 9 additions & 9 deletions MassDigitizer/specify_merge_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
from models import collection as col
from models import discipline as dsc

gs.baseURL = 'https://specify-snm.science.ku.dk/'

class MergeDuplicates():

def __init__(self) -> None:
Expand All @@ -49,16 +47,16 @@ def __init__(self) -> None:
# Set up logging
self.logger = logging.getLogger('MergeDuplicates')
self.logger.setLevel(logging.DEBUG)
logStreamFormatter = logging.Formatter(fmt=f"%(message)s", datefmt="%Y-%m-%d %H:%M:%S")
consoleHandler = logging.StreamHandler(stream=sys.stdout)
consoleHandler.setFormatter(logStreamFormatter)
consoleHandler.setLevel(level=logging.DEBUG)
#logStreamFormatter = logging.Formatter(fmt=f"%(message)s", datefmt="%Y-%m-%d %H:%M:%S")
#consoleHandler = logging.StreamHandler(stream=sys.stdout)
#consoleHandler.setFormatter(logStreamFormatter)
#consoleHandler.setLevel(level=logging.DEBUG)
#self.logger.addHandler(consoleHandler)
logFileFormatter = logging.Formatter(fmt=f"%(message)s", datefmt="%Y-%m-%d %H:%M:%S")
fileHandler = logging.FileHandler(filename=f'log/MergeDuplicates_{time.time()}.log')
fileHandler.setFormatter(logFileFormatter)
fileHandler.setLevel(level=logging.INFO)
self.logger.addHandler(fileHandler)
self.logger.addHandler(consoleHandler)

def main(self):

Expand Down Expand Up @@ -104,6 +102,7 @@ def main(self):
max_tries = max_tries - 1
self.logger.info('Attempts left: %i' % max_tries)
if input('Try again? (y/n)') == 'n' : break

self.logger.info('done')

def scan(self):
Expand All @@ -121,7 +120,7 @@ def scan(self):
self.logger.info(f'RANK "{rankName}" ({rankId})')

# Only look at rank genera and below
if rankId >= 180:
if rankId >= 181:
offset = 0
resultCount = -1
while resultCount != 0:
Expand Down Expand Up @@ -237,7 +236,7 @@ def scan(self):
# Merge taxa
if target is not None and source is not None:
# Stop latch for user interaction
if input(f'Do you want to merge {source.spid} with {target.spid} (y/n)?') == 'y':
if True: # input(f'Do you want to merge {source.spid} with {target.spid} (y/n)?') == 'y':
# Do the actual merging
start = time.time()
response = self.sp.mergeTaxa(source.spid, target.spid)
Expand Down Expand Up @@ -308,5 +307,6 @@ def handleQualifiedTaxa(self):

pass

gs.baseURL = 'https://specify-snm.science.ku.dk/'
md = MergeDuplicates()
md.main()

0 comments on commit 158fd03

Please sign in to comment.