From 8f0fcab34e43a491b8948d8b51b7b37efbcb1f01 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 19 Jan 2024 23:26:48 -0500 Subject: [PATCH 01/11] First, awkward, incomplete steps. --- src/reports/index_wide_compendia_tests.py | 58 +++++++++++++++++++++++ src/reports/index_wide_synonym_tests.py | 58 +++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 src/reports/index_wide_compendia_tests.py create mode 100644 src/reports/index_wide_synonym_tests.py diff --git a/src/reports/index_wide_compendia_tests.py b/src/reports/index_wide_compendia_tests.py new file mode 100644 index 00000000..9987a0fc --- /dev/null +++ b/src/reports/index_wide_compendia_tests.py @@ -0,0 +1,58 @@ +""" +There are some tests we would like to do that apply to the entire Babel compendia. + +To do this, our current strategy is to go through the entire Babel compendia and +add the relevant information into a SQLite database. We can then check with this +database to look for relevant duplication. +""" +import json +import logging +import sqlite3 + + +def report_on_index_wide_compendia_tests(compendia_files, report_file): + # Open the SQLite file that we will use to keep track of duplicates. + # Connect to the SQLite database + conn = sqlite3.connect('compendia.sqlite3') + c = conn.cursor() + + # Create a compendia table if it doesn't exist + c.execute('''CREATE TABLE IF NOT EXISTS compendia ( + curie TEXT NOT NULL, + label TEXT, + preferred_curie TEXT NOT NULL, + ) STRICT''') + + c.execute('''CREATE INDEX index_preferred_curie ON ''') + + # Start writing the report file. + with open(report_file, 'w') as reportfile: + # Go through all the compendia files in the order provided. + for compendia_file_index, compendia_file in enumerate(compendia_files): + # Go through every entry in each compendia_file + logging.info(f"Reading {compendia_file} ({compendia_file_index + 1}/{len(compendia_files)})") + with open(compendia_file, 'r') as compendiafile: + for line in compendiafile: + entry = json.loads(line) + + # For each entry, we insert + + + # Write the content into the report file + reportfile.write(content) + + + + # Insert test data into the table + c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)", + ('Test Compendium', 'This is a test compendium', 'Some test data')) + + # Query the table to check if the data was inserted correctly + c.execute("SELECT * FROM compendia") + result = c.fetchone() + + # Close the database connection + conn.close() + + # Assert that the data was inserted correctly + assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data') diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py new file mode 100644 index 00000000..18e6e79d --- /dev/null +++ b/src/reports/index_wide_synonym_tests.py @@ -0,0 +1,58 @@ +""" +There are some tests we would like to do that apply to the entire Babel synonyms. + +To do this, our current strategy is to go through the entire Babel synonyms and +add the relevant information into a SQLite database. We can then check with this +database to look for relevant duplication. +""" +import json +import logging +import sqlite3 + + +def report_on_index_wide_compendia_tests(synonym_files, report_file): + + # Open the SQLite file that we will use to keep track of duplicates. + # Connect to the SQLite database + conn = sqlite3.connect('synonyms.sqlite3') + c = conn.cursor() + + # Create a compendia table if it doesn't exist + c.execute('''CREATE TABLE IF NOT EXISTS synonyms ( + curie TEXT NOT NULL PRIMARY KEY UNIQUE, + preferred_name TEXT, + preferred_name_lc TEXT + ) STRICT''') + + # Go through all the compendia files in the order provided. + for synonyms_file_index, synonyms_file in enumerate(synonym_files): + # Go through every entry in each synonyms_file + logging.info(f"Reading synonyms file {synonyms_file} ({synonyms_file_index + 1}/{len(synonym_files)})") + with open(synonyms_file, 'r') as compendiafile: + for line in compendiafile: + entry = json.loads(line) + + curie = entry['curie'] + preferred_name = entry['preferred_name'] + preferred_name_lc = preferred_name.lower() + + # This should give us an error if we see the same CURIE in multiple files. + c.execute("INSERT INTO synonyms (curie, preferred_name, preferred_name_lc) VALUES (?, ?, ?)", + (curie, preferred_name, preferred_name_lc)) + + + + + # Insert test data into the table + c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)", + ('Test Compendium', 'This is a test compendium', 'Some test data')) + + # Query the table to check if the data was inserted correctly + c.execute("SELECT * FROM compendia") + result = c.fetchone() + + # Close the database connection + conn.close() + + # Assert that the data was inserted correctly + assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data') From 6d9b536955f9d9179620f77eb930b162c5927b7d Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 19 Jan 2024 23:48:30 -0500 Subject: [PATCH 02/11] First stab at an index-wide synonym test. --- src/reports/index_wide_synonym_tests.py | 43 ++++++++++++++++--------- src/snakefiles/reports.snakefile | 11 +++++++ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py index 18e6e79d..115cf153 100644 --- a/src/reports/index_wide_synonym_tests.py +++ b/src/reports/index_wide_synonym_tests.py @@ -8,13 +8,17 @@ import json import logging import sqlite3 +from pathlib import Path -def report_on_index_wide_compendia_tests(synonym_files, report_file): - +def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file): + # Start writing to the report file so Snakemake knows we're working. + Path(report_file).touch() + Path(sqlite_file).touch() + # Open the SQLite file that we will use to keep track of duplicates. # Connect to the SQLite database - conn = sqlite3.connect('synonyms.sqlite3') + conn = sqlite3.connect(sqlite_file + '.db') c = conn.cursor() # Create a compendia table if it doesn't exist @@ -28,9 +32,12 @@ def report_on_index_wide_compendia_tests(synonym_files, report_file): for synonyms_file_index, synonyms_file in enumerate(synonym_files): # Go through every entry in each synonyms_file logging.info(f"Reading synonyms file {synonyms_file} ({synonyms_file_index + 1}/{len(synonym_files)})") - with open(synonyms_file, 'r') as compendiafile: - for line in compendiafile: + + count_entries = 0 + with open(synonyms_file, 'r') as synonymsfile: + for line in synonymsfile: entry = json.loads(line) + count_entries += 1 curie = entry['curie'] preferred_name = entry['preferred_name'] @@ -40,19 +47,23 @@ def report_on_index_wide_compendia_tests(synonym_files, report_file): c.execute("INSERT INTO synonyms (curie, preferred_name, preferred_name_lc) VALUES (?, ?, ?)", (curie, preferred_name, preferred_name_lc)) + logging.info(f"Read {count_entries} entries from {synonyms_file}.") + conn.commit() + # Count the number of curie values in the synonyms table in SQLite. + c.execute("SELECT COUNT(curie) FROM synonyms") + curie_count = c.fetchone() + logging.info(f"{curie_count} CURIEs loaded into {sqlite_file}") - # Insert test data into the table - c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)", - ('Test Compendium', 'This is a test compendium', 'Some test data')) - - # Query the table to check if the data was inserted correctly - c.execute("SELECT * FROM compendia") - result = c.fetchone() + with open(report_file, 'w') as reportfile: + # TODO: actually check for duplicate labels here. + c.execute("SELECT COUNT(curie) FROM synonyms") + curie_count = c.fetchone() - # Close the database connection - conn.close() + json.dump({ + 'curie_count': curie_count, + }, reportfile) - # Assert that the data was inserted correctly - assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data') + # Close the database connection + conn.close() diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile index 13d58438..b380aeef 100644 --- a/src/snakefiles/reports.snakefile +++ b/src/snakefiles/reports.snakefile @@ -2,6 +2,7 @@ import os from src.reports.compendia_per_file_reports import assert_files_in_directory, \ generate_content_report_for_compendium, summarize_content_report_for_compendia +from src.reports.index_wide_synonym_tests import report_on_index_wide_synonym_tests # Some paths we will use at multiple times in these reports. compendia_path = config['output_directory'] + '/compendia' @@ -91,6 +92,16 @@ rule generate_summary_content_report_for_compendia: summarize_content_report_for_compendia(input.expected_content_reports, output.report_path) +rule test_synonyms_for_duplication: + input: + synonyms_files = synonyms_files, + output: + sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3', + report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json', + run: + report_on_index_wide_synonym_tests(input.synonyms_file, output.sqlite_file, output.report_path) + + # Check that all the reports were built correctly. rule all_reports: input: From 1827247161cfe7392c4ca37daaafcfcec13e4ebb Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 19 Jan 2024 23:50:25 -0500 Subject: [PATCH 03/11] Added biolink type. --- src/reports/index_wide_synonym_tests.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py index 115cf153..a7b204b5 100644 --- a/src/reports/index_wide_synonym_tests.py +++ b/src/reports/index_wide_synonym_tests.py @@ -24,6 +24,7 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file): # Create a compendia table if it doesn't exist c.execute('''CREATE TABLE IF NOT EXISTS synonyms ( curie TEXT NOT NULL PRIMARY KEY UNIQUE, + biolink_type TEXT, preferred_name TEXT, preferred_name_lc TEXT ) STRICT''') @@ -40,12 +41,14 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file): count_entries += 1 curie = entry['curie'] + if len(entry['type']) > 0: + biolink_type = 'biolink:' + entry['type'][0] preferred_name = entry['preferred_name'] preferred_name_lc = preferred_name.lower() # This should give us an error if we see the same CURIE in multiple files. - c.execute("INSERT INTO synonyms (curie, preferred_name, preferred_name_lc) VALUES (?, ?, ?)", - (curie, preferred_name, preferred_name_lc)) + c.execute("INSERT INTO synonyms (curie, biolink_type, preferred_name, preferred_name_lc) VALUES (?, ?, ?, ?)", + (curie, biolink_type, preferred_name, preferred_name_lc)) logging.info(f"Read {count_entries} entries from {synonyms_file}.") conn.commit() From da44be92c52dcabcdc53b92a8fda5bbcb2e5ec40 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 19 Jan 2024 23:54:17 -0500 Subject: [PATCH 04/11] Fixed synonyms path. --- src/snakefiles/reports.snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile index b380aeef..5a86b97c 100644 --- a/src/snakefiles/reports.snakefile +++ b/src/snakefiles/reports.snakefile @@ -94,7 +94,7 @@ rule generate_summary_content_report_for_compendia: rule test_synonyms_for_duplication: input: - synonyms_files = synonyms_files, + synonyms_files = expand("{synonyms_path}/{synonym_file}", synonyms_path=synonyms_path, synonym_file=synonyms_files), output: sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3', report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json', From 792ca1c13d19984538d54293a64146c21cddc5b7 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 19 Jan 2024 23:55:14 -0500 Subject: [PATCH 05/11] Fixed typo. --- src/snakefiles/reports.snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile index 5a86b97c..bb19c19f 100644 --- a/src/snakefiles/reports.snakefile +++ b/src/snakefiles/reports.snakefile @@ -99,7 +99,7 @@ rule test_synonyms_for_duplication: sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3', report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json', run: - report_on_index_wide_synonym_tests(input.synonyms_file, output.sqlite_file, output.report_path) + report_on_index_wide_synonym_tests(input.synonyms_files, output.sqlite_file, output.report_path) # Check that all the reports were built correctly. From af4e7fd08c9c91e12afeed81dbc6312cdee47bb0 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 19 Jan 2024 23:57:14 -0500 Subject: [PATCH 06/11] Removed STRICT. --- src/reports/index_wide_synonym_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py index a7b204b5..1eda4802 100644 --- a/src/reports/index_wide_synonym_tests.py +++ b/src/reports/index_wide_synonym_tests.py @@ -27,7 +27,7 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file): biolink_type TEXT, preferred_name TEXT, preferred_name_lc TEXT - ) STRICT''') + )''') # Go through all the compendia files in the order provided. for synonyms_file_index, synonyms_file in enumerate(synonym_files): From de1742f8d2992edab9fa049f054859a67a1e0d80 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Fri, 19 Jan 2024 23:58:28 -0500 Subject: [PATCH 07/11] Fixed typo. --- src/reports/index_wide_synonym_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py index 1eda4802..588ba2ba 100644 --- a/src/reports/index_wide_synonym_tests.py +++ b/src/reports/index_wide_synonym_tests.py @@ -41,8 +41,8 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file): count_entries += 1 curie = entry['curie'] - if len(entry['type']) > 0: - biolink_type = 'biolink:' + entry['type'][0] + if len(entry['types']) > 0: + biolink_type = 'biolink:' + entry['types'][0] preferred_name = entry['preferred_name'] preferred_name_lc = preferred_name.lower() From b588ff58618f168a31d85d4d34ed4ceadc3d04ad Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Sat, 20 Jan 2024 00:12:14 -0500 Subject: [PATCH 08/11] Get identical labels. --- src/reports/index_wide_synonym_tests.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/reports/index_wide_synonym_tests.py b/src/reports/index_wide_synonym_tests.py index 588ba2ba..84ec8f37 100644 --- a/src/reports/index_wide_synonym_tests.py +++ b/src/reports/index_wide_synonym_tests.py @@ -60,12 +60,17 @@ def report_on_index_wide_synonym_tests(synonym_files, sqlite_file, report_file): logging.info(f"{curie_count} CURIEs loaded into {sqlite_file}") with open(report_file, 'w') as reportfile: - # TODO: actually check for duplicate labels here. c.execute("SELECT COUNT(curie) FROM synonyms") curie_count = c.fetchone() + # Look for identical preferred_name_lc values. + c.execute("SELECT preferred_name_lc, COUNT(preferred_name_lc), GROUP_CONCAT(DISTINCT curie) FROM synonyms GROUP BY preferred_name_lc HAVING COUNT(preferred_name_lc) > 1 ORDER BY COUNT(preferred_name_lc) DESC;") + results = c.fetchall() + duplicates = [{'preferred_name_lc': duplicate[0], 'count': duplicate[1], 'curies': duplicate[2].split(',')} for duplicate in results] + json.dump({ 'curie_count': curie_count, + 'duplicates': duplicates }, reportfile) # Close the database connection From 73bcd960a21dc96c15fae9fd837c8e1e1ee882d9 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Sat, 20 Jan 2024 00:36:34 -0500 Subject: [PATCH 09/11] Added an index-wide test for compendia tests. --- src/reports/index_wide_compendia_tests.py | 73 +++++++++++++---------- src/snakefiles/reports.snakefile | 9 +++ 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/src/reports/index_wide_compendia_tests.py b/src/reports/index_wide_compendia_tests.py index 9987a0fc..6de2c304 100644 --- a/src/reports/index_wide_compendia_tests.py +++ b/src/reports/index_wide_compendia_tests.py @@ -8,51 +8,64 @@ import json import logging import sqlite3 +from pathlib import Path -def report_on_index_wide_compendia_tests(compendia_files, report_file): +def report_on_index_wide_compendia_tests(compendia_files, sqlite_file, report_file): + Path(sqlite_file).touch() + Path(report_file).touch() + # Open the SQLite file that we will use to keep track of duplicates. # Connect to the SQLite database - conn = sqlite3.connect('compendia.sqlite3') + conn = sqlite3.connect(sqlite_file + '.db') c = conn.cursor() # Create a compendia table if it doesn't exist c.execute('''CREATE TABLE IF NOT EXISTS compendia ( - curie TEXT NOT NULL, - label TEXT, - preferred_curie TEXT NOT NULL, - ) STRICT''') - - c.execute('''CREATE INDEX index_preferred_curie ON ''') + preferred_curie TEXT NOT NULL PRIMARY KEY, + curie TEXT NOT NULL + )''') - # Start writing the report file. - with open(report_file, 'w') as reportfile: - # Go through all the compendia files in the order provided. - for compendia_file_index, compendia_file in enumerate(compendia_files): - # Go through every entry in each compendia_file - logging.info(f"Reading {compendia_file} ({compendia_file_index + 1}/{len(compendia_files)})") - with open(compendia_file, 'r') as compendiafile: - for line in compendiafile: - entry = json.loads(line) + # Go through all the compendia files in the order provided. + for compendia_file_index, compendia_file in enumerate(compendia_files): + # Go through every entry in each compendia_file + logging.info(f"Reading {compendia_file} ({compendia_file_index + 1}/{len(compendia_files)})") - # For each entry, we insert + count_curies = 0 + with open(compendia_file, 'r') as compendiafile: + for line in compendiafile: + entry = json.loads(line) + identifiers = entry['identifiers'] + if len(identifiers) > 0: + preferred_curie = identifiers[0]['i'] + for identifier in identifiers: + curie = identifier['i'] + count_curies += 1 + c.execute("INSERT INTO compendia (preferred_curie, curie) VALUES (?, ?)", (preferred_curie, curie)) - # Write the content into the report file - reportfile.write(content) + logging.info(f"Read {count_curies} into SQLite database {sqlite_file}.") + # Query the table to check if the data was inserted correctly + c.execute("SELECT COUNT(*) FROM compendia") + record_count = c.fetchone() + logging.info(f"SQLite database contains {record_count} records.") - # Insert test data into the table - c.execute("INSERT INTO compendia (name, description, data) VALUES (?, ?, ?)", - ('Test Compendium', 'This is a test compendium', 'Some test data')) + # Start writing the report file. + with open(report_file, 'w') as reportfile: + c.execute("SELECT COUNT(curie) FROM compendia") + curie_count = c.fetchone() - # Query the table to check if the data was inserted correctly - c.execute("SELECT * FROM compendia") - result = c.fetchone() + # Look for curies mapped to multiple preferred_curies. + c.execute("SELECT curie, COUNT(DISTINCT preferred_curie), GROUP_CONCAT(DISTINCT preferred_curie) FROM compendia GROUP BY curie HAVING COUNT(DISTINCT preferred_curie) > 1 ORDER BY COUNT(DISTINCT preferred_curie) DESC;") + results = c.fetchall() + duplicates = [{'curie': duplicate[0], 'count': duplicate[1], 'preferred_curies': duplicate[2].split(',')} for duplicate in results] - # Close the database connection - conn.close() + json.dump({ + 'curie_count': curie_count, + 'duplicates': duplicates + }, reportfile) - # Assert that the data was inserted correctly - assert result == (1, 'Test Compendium', 'This is a test compendium', 'Some test data') + # Close the database connection + conn.close() diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile index bb19c19f..e440c26f 100644 --- a/src/snakefiles/reports.snakefile +++ b/src/snakefiles/reports.snakefile @@ -91,6 +91,15 @@ rule generate_summary_content_report_for_compendia: run: summarize_content_report_for_compendia(input.expected_content_reports, output.report_path) +rule test_compendia_for_duplication: + input: + compendia_files = expand("{compendia_path}/{compendium_file}", compendia_path=compendia_path, compendium_file=compendia_files), + output: + sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3', + report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json', + run: + + rule test_synonyms_for_duplication: input: From 24d6462cbd4b6b2fff4981a76a64de58112c48ec Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Sat, 20 Jan 2024 00:38:34 -0500 Subject: [PATCH 10/11] Added report_on_index_wide_compendia_tests to reports. --- src/snakefiles/reports.snakefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/snakefiles/reports.snakefile b/src/snakefiles/reports.snakefile index e440c26f..4dc1c078 100644 --- a/src/snakefiles/reports.snakefile +++ b/src/snakefiles/reports.snakefile @@ -3,6 +3,7 @@ import os from src.reports.compendia_per_file_reports import assert_files_in_directory, \ generate_content_report_for_compendium, summarize_content_report_for_compendia from src.reports.index_wide_synonym_tests import report_on_index_wide_synonym_tests +from src.reports.index_wide_compendia_tests import report_on_index_wide_compendia_tests # Some paths we will use at multiple times in these reports. compendia_path = config['output_directory'] + '/compendia' @@ -98,8 +99,7 @@ rule test_compendia_for_duplication: sqlite_file = config['output_directory']+'/reports/duplication/synonyms.sqlite3', report_path = config['output_directory']+'/reports/duplication/synonym_duplication_report.json', run: - - + report_on_index_wide_compendia_tests(input.compendia_files, output.sqlite_file, output.report_path) rule test_synonyms_for_duplication: input: @@ -110,7 +110,6 @@ rule test_synonyms_for_duplication: run: report_on_index_wide_synonym_tests(input.synonyms_files, output.sqlite_file, output.report_path) - # Check that all the reports were built correctly. rule all_reports: input: From 9f859dd392d74e95f31278b6b08ccf09846bfb25 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Sat, 20 Jan 2024 00:42:23 -0500 Subject: [PATCH 11/11] Added a commit() after every file. --- src/reports/index_wide_compendia_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reports/index_wide_compendia_tests.py b/src/reports/index_wide_compendia_tests.py index 6de2c304..b360193a 100644 --- a/src/reports/index_wide_compendia_tests.py +++ b/src/reports/index_wide_compendia_tests.py @@ -47,6 +47,7 @@ def report_on_index_wide_compendia_tests(compendia_files, sqlite_file, report_fi logging.info(f"Read {count_curies} into SQLite database {sqlite_file}.") # Query the table to check if the data was inserted correctly + conn.commit() c.execute("SELECT COUNT(*) FROM compendia") record_count = c.fetchone()