adjustments merge duplicates #74

NHMDenmark · Dec 9, 2022 · 158fd03 · 158fd03
1 parent 881fb10
commit 158fd03
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 29 deletions.
diff --git a/MassDigitizer/GBIF_interface.py b/MassDigitizer/GBIF_interface.py
@@ -83,25 +83,28 @@ def matchName(self, object_name, taxon_name, collection_id, kingdom=''):
     # Fetch possible alternatives with matching taxon names 
     urlString = self.baseURL + f'{object_name}/match?verbose=true&kingdom={kingdom}&name={taxon_name}'
     print(urlString)
-    response = self.spSession.get(urlString)
+    try:
+      response = self.spSession.get(urlString)    
+      # If succesful, load response into json object 
+      if response.status_code < 299:
+        result = json.loads(response.text)
 
-    # If succesful, load response into json object 
-    if response.status_code < 299:
-      result = json.loads(response.text)
-
-    # Also add main entry
-      if 'usageKey' in result: 
-        mainSpecies = self.fetchSpecies(int(result['usageKey']))
-        acceptedNames.append(mainSpecies)
-
-      # Check for suggested alternatives and add to accepted names list, thereby removing synonyms
-      if 'alternatives' in result:
-        matches = result['alternatives']
-        for m in matches: 
-          #print(m['usageKey'])
-          if 'matchtype' in m and 'status' in m: 
-            if m['matchType'] == 'EXACT' and (m['status'] == 'ACCEPTED' or m['status'] == 'DOUBTFUL'):
-              acceptedNames.append(self.getSpecies(int(m['usageKey'])))
+      # Also add main entry
+        if 'usageKey' in result: 
+          mainSpecies = self.fetchSpecies(int(result['usageKey']))
+          acceptedNames.append(mainSpecies)
+
+        # Check for suggested alternatives and add to accepted names list, thereby removing synonyms
+        if 'alternatives' in result:
+          matches = result['alternatives']
+          for m in matches: 
+            #print(m['usageKey'])
+            if 'matchtype' in m and 'status' in m: 
+              if m['matchType'] == 'EXACT' and (m['status'] == 'ACCEPTED' or m['status'] == 'DOUBTFUL'):
+                acceptedNames.append(self.getSpecies(int(m['usageKey'])))
+    except:
+        print("Error occurred fetching accepting names at GBIF API!")
+        pass
 
     return acceptedNames
 

diff --git a/MassDigitizer/models/taxon.py b/MassDigitizer/models/taxon.py
@@ -128,7 +128,12 @@ def loadPredefinedData(self):
 
     def getParent(self, specify_interface):
         self.parent = Taxon(self.collectionId)
-        self.parent.fill(specify_interface.getSpecifyObject(self.sptype, self.parentId))
+        try:
+            parentTaxonObj = specify_interface.getSpecifyObject(self.sptype, self.parentId)
+            self.parent.fill(parentTaxonObj)
+        except:
+            print("ERROR: Failed to retrieve parent taxon.")
+            pass
         return self.parent 
 
     def getParentage(self, specify_interface):

diff --git a/MassDigitizer/specify_interface.py b/MassDigitizer/specify_interface.py
@@ -167,7 +167,7 @@ def getSpecifyObjects(self, objectName, limit=100, offset=0, filters={}):
     for key in filters:
       filterString += '&' + key + '=' + filters[key]
     apiCallString = f'{gs.baseURL}api/specify/{objectName}/?limit={limit}&offset={offset}{filterString}'
-    #print("   -> " + apiCallString)
+    #if limit == 1000: print("" + apiCallString)
 
     response = self.spSession.get(apiCallString, headers=headers)
     #print(' - Response: %s %s' %(str(response.status_code), response.reason))

diff --git a/MassDigitizer/specify_merge_duplicates.py b/MassDigitizer/specify_merge_duplicates.py
@@ -29,8 +29,6 @@
 from models import collection as col
 from models import discipline as dsc
 
-gs.baseURL = 'https://specify-snm.science.ku.dk/'
-
 class MergeDuplicates():
 
     def __init__(self) -> None:
@@ -49,16 +47,16 @@ def __init__(self) -> None:
         # Set up logging
         self.logger = logging.getLogger('MergeDuplicates')
         self.logger.setLevel(logging.DEBUG)
-        logStreamFormatter = logging.Formatter(fmt=f"%(message)s", datefmt="%Y-%m-%d %H:%M:%S")
-        consoleHandler = logging.StreamHandler(stream=sys.stdout)
-        consoleHandler.setFormatter(logStreamFormatter)
-        consoleHandler.setLevel(level=logging.DEBUG)
+        #logStreamFormatter = logging.Formatter(fmt=f"%(message)s", datefmt="%Y-%m-%d %H:%M:%S")
+        #consoleHandler = logging.StreamHandler(stream=sys.stdout)
+        #consoleHandler.setFormatter(logStreamFormatter)
+        #consoleHandler.setLevel(level=logging.DEBUG)
+        #self.logger.addHandler(consoleHandler)
         logFileFormatter = logging.Formatter(fmt=f"%(message)s", datefmt="%Y-%m-%d %H:%M:%S")
         fileHandler = logging.FileHandler(filename=f'log/MergeDuplicates_{time.time()}.log')
         fileHandler.setFormatter(logFileFormatter)
         fileHandler.setLevel(level=logging.INFO)
         self.logger.addHandler(fileHandler)
-        self.logger.addHandler(consoleHandler)
 
     def main(self):
 
@@ -104,6 +102,7 @@ def main(self):
                 max_tries = max_tries - 1
                 self.logger.info('Attempts left: %i' % max_tries)
                 if input('Try again? (y/n)') == 'n' : break
+
         self.logger.info('done')
 
     def scan(self):
@@ -121,7 +120,7 @@ def scan(self):
             self.logger.info(f'RANK "{rankName}" ({rankId})')
 
             # Only look at rank genera and below 
-            if rankId >= 180:
+            if rankId >= 181:
                 offset = 0
                 resultCount = -1
                 while resultCount != 0:
@@ -237,7 +236,7 @@ def scan(self):
                                             # Merge taxa 
                                             if target is not None and source is not None: 
                                                 # Stop latch for user interaction 
-                                                if input(f'Do you want to merge {source.spid} with {target.spid} (y/n)?') == 'y':
+                                                if True: # input(f'Do you want to merge {source.spid} with {target.spid} (y/n)?') == 'y':
                                                     # Do the actual merging 
                                                     start = time.time()
                                                     response = self.sp.mergeTaxa(source.spid, target.spid)
@@ -308,5 +307,6 @@ def handleQualifiedTaxa(self):
 
         pass
 
+gs.baseURL = 'https://specify-snm.science.ku.dk/'
 md = MergeDuplicates()
 md.main()