From 8f0fcab34e43a491b8948d8b51b7b37efbcb1f01 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Fri, 19 Jan 2024 23:26:48 -0500
Subject: [PATCH 01/11] First, awkward, incomplete steps.

---
 src/reports/index_wide_compendia_tests.py | 58 +++++++++++++++++++++++
 src/reports/index_wide_synonym_tests.py   | 58 +++++++++++++++++++++++
 2 files changed, 116 insertions(+)
 create mode 100644 src/reports/index_wide_compendia_tests.py
 create mode 100644 src/reports/index_wide_synonym_tests.py

diff --git a/src/reports/index_wide_compendia_tests.py b/src/reports/index_wide_compendia_tests.py
new file mode 100644
index 00000000..9987a0fc
--- /dev/null
+++ b/src/reports/index_wide_compendia_tests.py
@@ -0,0 +1,58 @@
+"""
+There are some tests we would like to do that apply to the entire Babel compendia.
+
+To do this, our current strategy is to go through the entire Babel compendia and
+add the relevant information into a SQLite database. We can then check with this
+database to look for relevant duplication.
+"""
+import json
+import logging
+import sqlite3
+
+
+def report_on_index_wide_compendia_tests(compendia_files, report_file):
+    # Open the SQLite file that we will use to keep track of duplicates.
+    # Connect to the SQLite database
+    conn = sqlite3.connect('compendia.sqlite3')
+    c = conn.cursor()
+
+    # Create a compendia table if it doesn't exist
+    c.execute('''CREATE TABLE IF NOT EXISTS compendia (
+                        curie TEXT NOT NULL,
+                        label TEXT,
+                        preferred_curie TEXT NOT NULL,
+                    ) STRICT''')
+
+    c.execute('''CREATE INDEX index_preferred_curie ON ''')
+
+    # Start writing the report file.
+    with open(report_file, 'w') as reportfile:
+        # Go through all the compendia files in the order provided.
+        for compendia_file_index, compendia_file in enumerate(compendia_files):
+            # Go through every entry in each compendia_file
+            logging.info(f"Reading {compendia_file} ({compendia_file_index + 1}/{len(compendia_files)})")
+            with open(compendia_file, 'r') as compendiafile:
+                for line in compendiafile:
+                    entry = json.loads(line)
+
+                    # For each entry, we insert
+
+
+            # Write the content into the report file
+            reportfile.write(content)
+
+
+
+        # Insert test data into the table
+        c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)",
+                  ('Test Compendium', 'This is a test compendium', 'Some test data'))
+
+        # Query the table to check if the data was inserted correctly
+        c.execute("SELECT * FROM compendia")
+        result = c.fetchone()
+
+        # Close the database connection
+        conn.close()
+
+        # Assert that the data was inserted correctly
+        assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data')
diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py
new file mode 100644
index 00000000..18e6e79d
--- /dev/null
+++ b/src/reports/index_wide_synonym_tests.py
@@ -0,0 +1,58 @@
+"""
+There are some tests we would like to do that apply to the entire Babel synonyms.
+
+To do this, our current strategy is to go through the entire Babel synonyms and
+add the relevant information into a SQLite database. We can then check with this
+database to look for relevant duplication.
+"""
+import json
+import logging
+import sqlite3
+
+
+def report_on_index_wide_compendia_tests(synonym_files, report_file):
+    
+    # Open the SQLite file that we will use to keep track of duplicates.
+    # Connect to the SQLite database
+    conn = sqlite3.connect('synonyms.sqlite3')
+    c = conn.cursor()
+
+    # Create a compendia table if it doesn't exist
+    c.execute('''CREATE TABLE IF NOT EXISTS synonyms (
+                        curie TEXT NOT NULL PRIMARY KEY UNIQUE,
+                        preferred_name TEXT,
+                        preferred_name_lc TEXT
+                    ) STRICT''')
+
+    # Go through all the compendia files in the order provided.
+    for synonyms_file_index, synonyms_file in enumerate(synonym_files):
+        # Go through every entry in each synonyms_file
+        logging.info(f"Reading synonyms file {synonyms_file} ({synonyms_file_index + 1}/{len(synonym_files)})")
+        with open(synonyms_file, 'r') as compendiafile:
+            for line in compendiafile:
+                entry = json.loads(line)
+
+                curie = entry['curie']
+                preferred_name = entry['preferred_name']
+                preferred_name_lc = preferred_name.lower()
+
+                # This should give us an error if we see the same CURIE in multiple files.
+                c.execute("INSERT INTO synonyms (curie, preferred_name, preferred_name_lc) VALUES (?, ?, ?)",
+                (curie, preferred_name, preferred_name_lc))
+
+
+
+
+        # Insert test data into the table
+        c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)",
+                  ('Test Compendium', 'This is a test compendium', 'Some test data'))
+
+        # Query the table to check if the data was inserted correctly
+        c.execute("SELECT * FROM compendia")
+        result = c.fetchone()
+
+        # Close the database connection
+        conn.close()
+
+        # Assert that the data was inserted correctly
+        assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data')

From 6d9b536955f9d9179620f77eb930b162c5927b7d Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Fri, 19 Jan 2024 23:48:30 -0500
Subject: [PATCH 02/11] First stab at an index-wide synonym test.

---
 src/reports/index_wide_synonym_tests.py | 43 ++++++++++++++++---------
 src/snakefiles/reports.snakefile        | 11 +++++++
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py
index 18e6e79d..115cf153 100644
--- a/src/reports/index_wide_synonym_tests.py
+++ b/src/reports/index_wide_synonym_tests.py
@@ -8,13 +8,17 @@
 import json
 import logging
 import sqlite3
+from pathlib import Path
 
 
-def report_on_index_wide_compendia_tests(synonym_files, report_file):
-    
+def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file):
+    # Start writing to the report file so Snakemake knows we're working.
+    Path(report_file).touch()
+    Path(sqlite_file).touch()
+
     # Open the SQLite file that we will use to keep track of duplicates.
     # Connect to the SQLite database
-    conn = sqlite3.connect('synonyms.sqlite3')
+    conn = sqlite3.connect(sqlite_file + '.db')
     c = conn.cursor()
 
     # Create a compendia table if it doesn't exist
@@ -28,9 +32,12 @@ def report_on_index_wide_compendia_tests(synonym_files, report_file):
     for synonyms_file_index, synonyms_file in enumerate(synonym_files):
         # Go through every entry in each synonyms_file
         logging.info(f"Reading synonyms file {synonyms_file} ({synonyms_file_index + 1}/{len(synonym_files)})")
-        with open(synonyms_file, 'r') as compendiafile:
-            for line in compendiafile:
+
+        count_entries = 0
+        with open(synonyms_file, 'r') as synonymsfile:
+            for line in synonymsfile:
                 entry = json.loads(line)
+                count_entries += 1
 
                 curie = entry['curie']
                 preferred_name = entry['preferred_name']
@@ -40,19 +47,23 @@ def report_on_index_wide_compendia_tests(synonym_files, report_file):
                 c.execute("INSERT INTO synonyms (curie, preferred_name, preferred_name_lc) VALUES (?, ?, ?)",
                 (curie, preferred_name, preferred_name_lc))
 
+        logging.info(f"Read {count_entries} entries from {synonyms_file}.")
+        conn.commit()
 
+        # Count the number of curie values in the synonyms table in SQLite.
+        c.execute("SELECT COUNT(curie) FROM synonyms")
+        curie_count = c.fetchone()
 
+        logging.info(f"{curie_count} CURIEs loaded into {sqlite_file}")
 
-        # Insert test data into the table
-        c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)",
-                  ('Test Compendium', 'This is a test compendium', 'Some test data'))
-
-        # Query the table to check if the data was inserted correctly
-        c.execute("SELECT * FROM compendia")
-        result = c.fetchone()
+    with open(report_file, 'w') as reportfile:
+        # TODO: actually check for duplicate labels here.
+        c.execute("SELECT COUNT(curie) FROM synonyms")
+        curie_count = c.fetchone()
 
-        # Close the database connection
-        conn.close()
+        json.dump({
+            'curie_count': curie_count,
+        }, reportfile)
 
-        # Assert that the data was inserted correctly
-        assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data')
+    # Close the database connection
+    conn.close()
diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile
index 13d58438..b380aeef 100644
--- a/src/snakefiles/reports.snakefile
+++ b/src/snakefiles/reports.snakefile
@@ -2,6 +2,7 @@ import os
 
 from src.reports.compendia_per_file_reports import assert_files_in_directory, \
     generate_content_report_for_compendium, summarize_content_report_for_compendia
+from src.reports.index_wide_synonym_tests import report_on_index_wide_synonym_tests
 
 # Some paths we will use at multiple times in these reports.
 compendia_path = config['output_directory'] + '/compendia'
@@ -91,6 +92,16 @@ rule generate_summary_content_report_for_compendia:
         summarize_content_report_for_compendia(input.expected_content_reports, output.report_path)
 
 
+rule test_synonyms_for_duplication:
+    input:
+        synonyms_files = synonyms_files,
+    output:
+        sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3',
+        report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json',
+    run:
+        report_on_index_wide_synonym_tests(input.synonyms_file, output.sqlite_file, output.report_path)
+
+
 # Check that all the reports were built correctly.
 rule all_reports:
     input:

From 1827247161cfe7392c4ca37daaafcfcec13e4ebb Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Fri, 19 Jan 2024 23:50:25 -0500
Subject: [PATCH 03/11] Added biolink type.

---
 src/reports/index_wide_synonym_tests.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py
index 115cf153..a7b204b5 100644
--- a/src/reports/index_wide_synonym_tests.py
+++ b/src/reports/index_wide_synonym_tests.py
@@ -24,6 +24,7 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file):
     # Create a compendia table if it doesn't exist
     c.execute('''CREATE TABLE IF NOT EXISTS synonyms (
                         curie TEXT NOT NULL PRIMARY KEY UNIQUE,
+                        biolink_type TEXT,
                         preferred_name TEXT,
                         preferred_name_lc TEXT
                     ) STRICT''')
@@ -40,12 +41,14 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file):
                 count_entries += 1
 
                 curie = entry['curie']
+                if len(entry['type']) > 0:
+                    biolink_type = 'biolink:' + entry['type'][0]
                 preferred_name = entry['preferred_name']
                 preferred_name_lc = preferred_name.lower()
 
                 # This should give us an error if we see the same CURIE in multiple files.
-                c.execute("INSERT INTO synonyms (curie, preferred_name, preferred_name_lc) VALUES (?, ?, ?)",
-                (curie, preferred_name, preferred_name_lc))
+                c.execute("INSERT INTO synonyms (curie, biolink_type, preferred_name, preferred_name_lc) VALUES (?, ?, ?, ?)",
+                (curie, biolink_type, preferred_name, preferred_name_lc))
 
         logging.info(f"Read {count_entries} entries from {synonyms_file}.")
         conn.commit()

From da44be92c52dcabcdc53b92a8fda5bbcb2e5ec40 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Fri, 19 Jan 2024 23:54:17 -0500
Subject: [PATCH 04/11] Fixed synonyms path.

---
 src/snakefiles/reports.snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile
index b380aeef..5a86b97c 100644
--- a/src/snakefiles/reports.snakefile
+++ b/src/snakefiles/reports.snakefile
@@ -94,7 +94,7 @@ rule generate_summary_content_report_for_compendia:
 
 rule test_synonyms_for_duplication:
     input:
-        synonyms_files = synonyms_files,
+        synonyms_files = expand("{synonyms_path}/{synonym_file}", synonyms_path=synonyms_path, synonym_file=synonyms_files),
     output:
         sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3',
         report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json',

From 792ca1c13d19984538d54293a64146c21cddc5b7 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Fri, 19 Jan 2024 23:55:14 -0500
Subject: [PATCH 05/11] Fixed typo.

---
 src/snakefiles/reports.snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile
index 5a86b97c..bb19c19f 100644
--- a/src/snakefiles/reports.snakefile
+++ b/src/snakefiles/reports.snakefile
@@ -99,7 +99,7 @@ rule test_synonyms_for_duplication:
         sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3',
         report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json',
     run:
-        report_on_index_wide_synonym_tests(input.synonyms_file, output.sqlite_file, output.report_path)
+        report_on_index_wide_synonym_tests(input.synonyms_files, output.sqlite_file, output.report_path)
 
 
 # Check that all the reports were built correctly.

From af4e7fd08c9c91e12afeed81dbc6312cdee47bb0 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Fri, 19 Jan 2024 23:57:14 -0500
Subject: [PATCH 06/11] Removed STRICT.

---
 src/reports/index_wide_synonym_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py
index a7b204b5..1eda4802 100644
--- a/src/reports/index_wide_synonym_tests.py
+++ b/src/reports/index_wide_synonym_tests.py
@@ -27,7 +27,7 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file):
                         biolink_type TEXT,
                         preferred_name TEXT,
                         preferred_name_lc TEXT
-                    ) STRICT''')
+                    )''')
 
     # Go through all the compendia files in the order provided.
     for synonyms_file_index, synonyms_file in enumerate(synonym_files):

From de1742f8d2992edab9fa049f054859a67a1e0d80 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Fri, 19 Jan 2024 23:58:28 -0500
Subject: [PATCH 07/11] Fixed typo.

---
 src/reports/index_wide_synonym_tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py
index 1eda4802..588ba2ba 100644
--- a/src/reports/index_wide_synonym_tests.py
+++ b/src/reports/index_wide_synonym_tests.py
@@ -41,8 +41,8 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file):
                 count_entries += 1
 
                 curie = entry['curie']
-                if len(entry['type']) > 0:
-                    biolink_type = 'biolink:' + entry['type'][0]
+                if len(entry['types']) > 0:
+                    biolink_type = 'biolink:' + entry['types'][0]
                 preferred_name = entry['preferred_name']
                 preferred_name_lc = preferred_name.lower()
 

From b588ff58618f168a31d85d4d34ed4ceadc3d04ad Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Sat, 20 Jan 2024 00:12:14 -0500
Subject: [PATCH 08/11] Get identical labels.

---
 src/reports/index_wide_synonym_tests.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py
index 588ba2ba..84ec8f37 100644
--- a/src/reports/index_wide_synonym_tests.py
+++ b/src/reports/index_wide_synonym_tests.py
@@ -60,12 +60,17 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file):
         logging.info(f"{curie_count} CURIEs loaded into {sqlite_file}")
 
     with open(report_file, 'w') as reportfile:
-        # TODO: actually check for duplicate labels here.
         c.execute("SELECT COUNT(curie) FROM synonyms")
         curie_count = c.fetchone()
 
+        # Look for identical preferred_name_lc values.
+        c.execute("SELECT preferred_name_lc, COUNT(preferred_name_lc), GROUP_CONCAT(DISTINCT curie) FROM synonyms GROUP BY preferred_name_lc HAVING COUNT(preferred_name_lc) > 1 ORDER BY COUNT(preferred_name_lc) DESC;")
+        results = c.fetchall()
+        duplicates = [{'preferred_name_lc': duplicate[0], 'count': duplicate[1], 'curies': duplicate[2].split(',')} for duplicate in results]
+
         json.dump({
             'curie_count': curie_count,
+            'duplicates': duplicates
         }, reportfile)
 
     # Close the database connection

From 73bcd960a21dc96c15fae9fd837c8e1e1ee882d9 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Sat, 20 Jan 2024 00:36:34 -0500
Subject: [PATCH 09/11] Added an index-wide test for compendia tests.

---
 src/reports/index_wide_compendia_tests.py | 73 +++++++++++++----------
 src/snakefiles/reports.snakefile          |  9 +++
 2 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/src/reports/index_wide_compendia_tests.py b/src/reports/index_wide_compendia_tests.py
index 9987a0fc..6de2c304 100644
--- a/src/reports/index_wide_compendia_tests.py
+++ b/src/reports/index_wide_compendia_tests.py
@@ -8,51 +8,64 @@
 import json
 import logging
 import sqlite3
+from pathlib import Path
 
 
-def report_on_index_wide_compendia_tests(compendia_files, report_file):
+def report_on_index_wide_compendia_tests(compendia_files, sqlite_file, report_file):
+    Path(sqlite_file).touch()
+    Path(report_file).touch()
+
     # Open the SQLite file that we will use to keep track of duplicates.
     # Connect to the SQLite database
-    conn = sqlite3.connect('compendia.sqlite3')
+    conn = sqlite3.connect(sqlite_file + '.db')
     c = conn.cursor()
 
     # Create a compendia table if it doesn't exist
     c.execute('''CREATE TABLE IF NOT EXISTS compendia (
-                        curie TEXT NOT NULL,
-                        label TEXT,
-                        preferred_curie TEXT NOT NULL,
-                    ) STRICT''')
-
-    c.execute('''CREATE INDEX index_preferred_curie ON ''')
+                        preferred_curie TEXT NOT NULL PRIMARY KEY,
+                        curie TEXT NOT NULL
+                    )''')
 
-    # Start writing the report file.
-    with open(report_file, 'w') as reportfile:
-        # Go through all the compendia files in the order provided.
-        for compendia_file_index, compendia_file in enumerate(compendia_files):
-            # Go through every entry in each compendia_file
-            logging.info(f"Reading {compendia_file} ({compendia_file_index + 1}/{len(compendia_files)})")
-            with open(compendia_file, 'r') as compendiafile:
-                for line in compendiafile:
-                    entry = json.loads(line)
+    # Go through all the compendia files in the order provided.
+    for compendia_file_index, compendia_file in enumerate(compendia_files):
+        # Go through every entry in each compendia_file
+        logging.info(f"Reading {compendia_file} ({compendia_file_index + 1}/{len(compendia_files)})")
 
-                    # For each entry, we insert
+        count_curies = 0
+        with open(compendia_file, 'r') as compendiafile:
+            for line in compendiafile:
+                entry = json.loads(line)
+                identifiers = entry['identifiers']
 
+                if len(identifiers) > 0:
+                    preferred_curie = identifiers[0]['i']
+                    for identifier in identifiers:
+                        curie = identifier['i']
+                        count_curies += 1
+                        c.execute("INSERT INTO compendia (preferred_curie, curie) VALUES (?, ?)", (preferred_curie, curie))
 
-            # Write the content into the report file
-            reportfile.write(content)
+        logging.info(f"Read {count_curies} into SQLite database {sqlite_file}.")
 
+        # Query the table to check if the data was inserted correctly
+        c.execute("SELECT COUNT(*) FROM compendia")
+        record_count = c.fetchone()
 
+        logging.info(f"SQLite database contains {record_count} records.")
 
-        # Insert test data into the table
-        c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)",
-                  ('Test Compendium', 'This is a test compendium', 'Some test data'))
+    # Start writing the report file.
+    with open(report_file, 'w') as reportfile:
+        c.execute("SELECT COUNT(curie) FROM compendia")
+        curie_count = c.fetchone()
 
-        # Query the table to check if the data was inserted correctly
-        c.execute("SELECT * FROM compendia")
-        result = c.fetchone()
+        # Look for curies mapped to multiple preferred_curies.
+        c.execute("SELECT curie, COUNT(DISTINCT preferred_curie), GROUP_CONCAT(DISTINCT preferred_curie) FROM compendia GROUP BY curie HAVING COUNT(DISTINCT preferred_curie) > 1 ORDER BY COUNT(DISTINCT preferred_curie) DESC;")
+        results = c.fetchall()
+        duplicates = [{'curie': duplicate[0], 'count': duplicate[1], 'preferred_curies': duplicate[2].split(',')} for duplicate in results]
 
-        # Close the database connection
-        conn.close()
+        json.dump({
+            'curie_count': curie_count,
+            'duplicates': duplicates
+        }, reportfile)
 
-        # Assert that the data was inserted correctly
-        assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data')
+    # Close the database connection
+    conn.close()
diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile
index bb19c19f..e440c26f 100644
--- a/src/snakefiles/reports.snakefile
+++ b/src/snakefiles/reports.snakefile
@@ -91,6 +91,15 @@ rule generate_summary_content_report_for_compendia:
     run:
         summarize_content_report_for_compendia(input.expected_content_reports, output.report_path)
 
+rule test_compendia_for_duplication:
+    input:
+        compendia_files = expand("{compendia_path}/{compendium_file}", compendia_path=compendia_path, compendium_file=compendia_files),
+   output:
+        sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3',
+        report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json',
+   run:
+
+
 
 rule test_synonyms_for_duplication:
     input:

From 24d6462cbd4b6b2fff4981a76a64de58112c48ec Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Sat, 20 Jan 2024 00:38:34 -0500
Subject: [PATCH 10/11] Added report_on_index_wide_compendia_tests to reports.

---
 src/snakefiles/reports.snakefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile
index e440c26f..4dc1c078 100644
--- a/src/snakefiles/reports.snakefile
+++ b/src/snakefiles/reports.snakefile
@@ -3,6 +3,7 @@ import os
 from src.reports.compendia_per_file_reports import assert_files_in_directory, \
     generate_content_report_for_compendium, summarize_content_report_for_compendia
 from src.reports.index_wide_synonym_tests import report_on_index_wide_synonym_tests
+from src.reports.index_wide_compendia_tests import report_on_index_wide_compendia_tests
 
 # Some paths we will use at multiple times in these reports.
 compendia_path = config['output_directory'] + '/compendia'
@@ -98,8 +99,7 @@ rule test_compendia_for_duplication:
         sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3',
         report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json',
    run:
-
-
+        report_on_index_wide_compendia_tests(input.compendia_files, output.sqlite_file, output.report_path)
 
 rule test_synonyms_for_duplication:
     input:
@@ -110,7 +110,6 @@ rule test_synonyms_for_duplication:
     run:
         report_on_index_wide_synonym_tests(input.synonyms_files, output.sqlite_file, output.report_path)
 
-
 # Check that all the reports were built correctly.
 rule all_reports:
     input:

From 9f859dd392d74e95f31278b6b08ccf09846bfb25 Mon Sep 17 00:00:00 2001
From: Gaurav Vaidya <gaurav@ggvaidya.com>
Date: Sat, 20 Jan 2024 00:42:23 -0500
Subject: [PATCH 11/11] Added a commit() after every file.

---
 src/reports/index_wide_compendia_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/reports/index_wide_compendia_tests.py b/src/reports/index_wide_compendia_tests.py
index 6de2c304..b360193a 100644
--- a/src/reports/index_wide_compendia_tests.py
+++ b/src/reports/index_wide_compendia_tests.py
@@ -47,6 +47,7 @@ def report_on_index_wide_compendia_tests(compendia_files, sqlite_file, report_fi
         logging.info(f"Read {count_curies} into SQLite database {sqlite_file}.")
 
         # Query the table to check if the data was inserted correctly
+        conn.commit()
         c.execute("SELECT COUNT(*) FROM compendia")
         record_count = c.fetchone()