diff --git a/src/dnaapler/utils/all.py b/src/dnaapler/utils/all.py index 4f4ba24..c14543e 100644 --- a/src/dnaapler/utils/all.py +++ b/src/dnaapler/utils/all.py @@ -62,7 +62,11 @@ def all_process_MMseqs2_output_and_reorient( # read in the dataframe from MMseqs2 try: MMseqs2_df = pd.read_csv( - MMseqs2_file, delimiter="\t", index_col=False, names=col_list + MMseqs2_file, + delimiter="\t", + index_col=False, + names=col_list, + dtype={"qseqid": "object"}, ) except Exception: diff --git a/src/dnaapler/utils/bulk.py b/src/dnaapler/utils/bulk.py index d6e783a..b33b7f0 100644 --- a/src/dnaapler/utils/bulk.py +++ b/src/dnaapler/utils/bulk.py @@ -153,7 +153,11 @@ def bulk_process_MMseqs2_output_and_reorient( # read in the dataframe from MMseqs2 try: MMseqs2_df = pd.read_csv( - MMseqs2_file, delimiter="\t", index_col=False, names=col_list + MMseqs2_file, + delimiter="\t", + index_col=False, + names=col_list, + dtype={"qseqid": "object"}, ) except Exception: diff --git a/src/dnaapler/utils/processing.py b/src/dnaapler/utils/processing.py index 86b7a42..6c84089 100644 --- a/src/dnaapler/utils/processing.py +++ b/src/dnaapler/utils/processing.py @@ -50,7 +50,11 @@ def process_MMseqs2_output_and_reorient( # read in the dataframe from MMseqs2 try: MMseqs2_df = pd.read_csv( - MMseqs2_file, delimiter="\t", index_col=False, names=col_list + MMseqs2_file, + delimiter="\t", + index_col=False, + names=col_list, + dtype={"qseqid": "object"}, ) except Exception: logger.error("There was an issue with parsing the MMseqs2 output file.") diff --git a/src/dnaapler/utils/util.py b/src/dnaapler/utils/util.py index a02263f..b61e7ea 100644 --- a/src/dnaapler/utils/util.py +++ b/src/dnaapler/utils/util.py @@ -129,17 +129,23 @@ def check_mmseqs2_version(): else: raise ValueError("MMseqs2 version not found") - mmseqs_major_version = int(mmseqs_version.split(".")[0]) - mmseqs_minor_version = mmseqs_version.split(".")[1] + # The pre-built binary on GitHub reports its version using the commit hash instead of + # a version number. + if mmseqs_version.startswith("45111b6"): + logger.info(f"MMseqs2 version found is {mmseqs_version}") - logger.info( - f"MMseqs2 version found is v{mmseqs_major_version}.{mmseqs_minor_version}" - ) + else: + mmseqs_major_version = int(mmseqs_version.split(".")[0]) + mmseqs_minor_version = mmseqs_version.split(".")[1] + + logger.info( + f"MMseqs2 version found is v{mmseqs_major_version}.{mmseqs_minor_version}" + ) - if mmseqs_major_version != 13: - logger.error("MMseqs2 is the wrong version. Please install v13.45111") - if mmseqs_minor_version != "45111": - logger.error("MMseqs2 is the wrong version. Please install v13.45111") + if mmseqs_major_version != 13: + logger.error("MMseqs2 is the wrong version. Please install v13.45111") + if mmseqs_minor_version != "45111": + logger.error("MMseqs2 is the wrong version. Please install v13.45111") logger.info("MMseqs2 version is ok.")