Skip to content

Commit

Permalink
modified saturation function to use uncorrected distances — calculate…
Browse files Browse the repository at this point in the history
…d as one minus the pairwise identity — rather than pairwise identity for the linear regression analysis
  • Loading branch information
JLSteenwyk committed Apr 29, 2024
1 parent ba7393e commit 680eaca
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 86 deletions.
3 changes: 3 additions & 0 deletions change_log.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
Major changes to PhyKIT are summarized here.

1.19.3
- Saturation function now uses uncorrected distances instead of pairwise identities

1.19.2
- Verbose pairwise identity reporting separates pairwise identities by tabs and not a dash

Expand Down
3 changes: 3 additions & 0 deletions docs/change_log/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Change log

Major changes to PhyKIT are summarized here.

**1.19.3**:
Saturation function now uses uncorrected distances instead of pairwise identities

**1.19.2**:
Verbose pairwise identity reporting separates pairwise identities by tabs and not a dash

Expand Down
31 changes: 16 additions & 15 deletions phykit/services/tree/saturation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,17 @@ def run(self):
# distances and pairwise identities
(
patristic_distances,
pairwise_identities,
uncorrected_distances,
) = self.loop_through_combos_and_calculate_pds_and_pis(combos, alignment, tree)

# calculate linear regression
_, _, r_value, _, _ = scipy.stats.linregress(
pairwise_identities, patristic_distances
uncorrected_distances, patristic_distances
)

# report res
self.print_res(
self.verbose, combos, pairwise_identities, patristic_distances, r_value
self.verbose, combos, uncorrected_distances, patristic_distances, r_value
)

def process_args(self, args):
Expand All @@ -68,19 +68,20 @@ def loop_through_combos_and_calculate_pds_and_pis(
their patristic distance and pairwise identity
"""
patristic_distances = []
pairwise_identities = []
uncorrected_distances = []
aln_len = alignment.get_alignment_length()
for combo in combos:
# calculate pd
patristic_distances.append(tree.distance(combo[0], combo[1]))
# calculate pairwise identity
pairwise_identities = self.calculate_pairwise_identities(
alignment, pairwise_identities, aln_len, combo
uncorrected_distances = self.calculate_uncorrected_distances(
alignment, uncorrected_distances, aln_len, combo
)
return patristic_distances, pairwise_identities

def calculate_pairwise_identities(
self, alignment, pairwise_identities: list, aln_len: int, combo: tuple
return patristic_distances, uncorrected_distances

def calculate_uncorrected_distances(
self, alignment, uncorrected_distances: list, aln_len: int, combo: tuple
) -> list:
"""
calculate pairwise identities for a given combo
Expand All @@ -96,15 +97,15 @@ def calculate_pairwise_identities(
for idx in range(0, aln_len):
if seq_one[idx] == seq_two[idx]:
identities += 1
pairwise_identities.append(identities / aln_len)
uncorrected_distances.append(1-(identities / aln_len))

return pairwise_identities
return uncorrected_distances

def print_res(
self,
verbose: bool,
combos: list,
pairwise_identities: list,
uncorrected_distances: list,
patristic_distances: list,
r_value: float,
) -> None:
Expand All @@ -113,11 +114,11 @@ def print_res(
"""
try:
if verbose:
for combo, pairwise_identity, patristic_distance in zip(
combos, pairwise_identities, patristic_distances
for combo, dist, patristic_distance in zip(
combos, uncorrected_distances, patristic_distances
):
print(
f"{combo[0]}-{combo[1]}\t{round(pairwise_identity,4)}\t{round(patristic_distance, 4)}"
f"{combo[0]}-{combo[1]}\t{round(dist,4)}\t{round(patristic_distance, 4)}"
)
else:
print(round(r_value**2, 4))
Expand Down
2 changes: 1 addition & 1 deletion phykit/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.19.2"
__version__ = "1.19.3"
139 changes: 69 additions & 70 deletions tests/integration/tree/test_saturation_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_saturation_incorrect_tree_path(self, mocked_print):
"phykit",
"saturation",
"-t",
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipkit.treefi",
f"{here.parent.parent.parent}/sample_files/does_not_exist",
"-a",
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipkit",
]
Expand All @@ -67,7 +67,7 @@ def test_saturation_incorrect_alignment_path(self, mocked_print):
"-t",
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipkit.treefile",
"-a",
f"{here.parent.parent.parent}/sample_files/12_YPR191W_Anc_7.548_codon_aln.fasta.clipki",
f"{here.parent.parent.parent}/sample_files/does_not_exist",
]

with pytest.raises(SystemExit) as pytest_wrapped_e:
Expand All @@ -78,7 +78,6 @@ def test_saturation_incorrect_alignment_path(self, mocked_print):

@patch("builtins.print")
def test_saturation_verbose(self, mocked_print):
expected_result = 0.8451
testargs = [
"phykit",
"sat",
Expand All @@ -91,70 +90,70 @@ def test_saturation_verbose(self, mocked_print):
with patch.object(sys, "argv", testargs):
Phykit()
assert mocked_print.mock_calls == [
call("Kpol-Kpha\t0.6136\t0.6176"),
call("Kpol-Snag\t0.5654\t0.7482"),
call("Kpol-Suva\t0.5948\t0.6945"),
call("Kpol-Skud\t0.5906\t0.7341"),
call("Kpol-Smik\t0.6157\t0.7734"),
call("Kpol-Scer\t0.6105\t0.7634"),
call("Kpol-Kbla\t0.599\t0.7287"),
call("Kpol-Kafr\t0.5696\t0.7509"),
call("Kpol-Sdai\t0.5916\t0.7297"),
call("Kpol-Scas\t0.5822\t0.7958"),
call("Kpol-Cgla\t0.5979\t0.653"),
call("Kpha-Snag\t0.5393\t0.8254"),
call("Kpha-Suva\t0.5424\t0.7717"),
call("Kpha-Skud\t0.5257\t0.8113"),
call("Kpha-Smik\t0.5466\t0.8505"),
call("Kpha-Scer\t0.5445\t0.8406"),
call("Kpha-Kbla\t0.5435\t0.9228"),
call("Kpha-Kafr\t0.5581\t0.945"),
call("Kpha-Sdai\t0.5424\t0.9238"),
call("Kpha-Scas\t0.5382\t0.9899"),
call("Kpha-Cgla\t0.5738\t0.8472"),
call("Snag-Suva\t0.5518\t0.7281"),
call("Snag-Skud\t0.5508\t0.7678"),
call("Snag-Smik\t0.5539\t0.807"),
call("Snag-Scer\t0.555\t0.797"),
call("Snag-Kbla\t0.5215\t1.0535"),
call("Snag-Kafr\t0.5539\t1.0757"),
call("Snag-Sdai\t0.5225\t1.0544"),
call("Snag-Scas\t0.4984\t1.1206"),
call("Snag-Cgla\t0.5131\t0.9778"),
call("Suva-Skud\t0.8105\t0.229"),
call("Suva-Smik\t0.7969\t0.2682"),
call("Suva-Scer\t0.7958\t0.2583"),
call("Suva-Kbla\t0.5361\t0.9997"),
call("Suva-Kafr\t0.5162\t1.0219"),
call("Suva-Sdai\t0.5309\t1.0007"),
call("Suva-Scas\t0.5162\t1.0668"),
call("Suva-Cgla\t0.5707\t0.9241"),
call("Skud-Smik\t0.8199\t0.2171"),
call("Skud-Scer\t0.8325\t0.2071"),
call("Skud-Kbla\t0.5183\t1.0394"),
call("Skud-Kafr\t0.5288\t1.0616"),
call("Skud-Sdai\t0.5089\t1.0403"),
call("Skud-Scas\t0.5005\t1.1065"),
call("Skud-Cgla\t0.5602\t0.9637"),
call("Smik-Scer\t0.8314\t0.1998"),
call("Smik-Kbla\t0.5382\t1.0786"),
call("Smik-Kafr\t0.5225\t1.1008"),
call("Smik-Sdai\t0.534\t1.0796"),
call("Smik-Scas\t0.5257\t1.1457"),
call("Smik-Cgla\t0.5529\t1.003"),
call("Scer-Kbla\t0.5372\t1.0686"),
call("Scer-Kafr\t0.5162\t1.0908"),
call("Scer-Sdai\t0.5298\t1.0696"),
call("Scer-Scas\t0.5236\t1.1357"),
call("Scer-Cgla\t0.5508\t0.993"),
call("Kbla-Kafr\t0.5686\t0.67"),
call("Kbla-Sdai\t0.5613\t0.8756"),
call("Kbla-Scas\t0.5393\t0.9418"),
call("Kbla-Cgla\t0.5508\t0.799"),
call("Kafr-Sdai\t0.534\t0.8978"),
call("Kafr-Scas\t0.5152\t0.964"),
call("Kafr-Cgla\t0.555\t0.8212"),
call("Sdai-Scas\t0.6356\t0.5357"),
call("Sdai-Cgla\t0.5675\t0.7045"),
call("Scas-Cgla\t0.5518\t0.7706"),
]
call("Kpol-Kpha\t0.3864\t0.6176"),
call("Kpol-Snag\t0.4346\t0.7482"),
call("Kpol-Suva\t0.4052\t0.6945"),
call("Kpol-Skud\t0.4094\t0.7341"),
call("Kpol-Smik\t0.3843\t0.7734"),
call("Kpol-Scer\t0.3895\t0.7634"),
call("Kpol-Kbla\t0.401\t0.7287"),
call("Kpol-Kafr\t0.4304\t0.7509"),
call("Kpol-Sdai\t0.4084\t0.7297"),
call("Kpol-Scas\t0.4178\t0.7958"),
call("Kpol-Cgla\t0.4021\t0.653"),
call("Kpha-Snag\t0.4607\t0.8254"),
call("Kpha-Suva\t0.4576\t0.7717"),
call("Kpha-Skud\t0.4743\t0.8113"),
call("Kpha-Smik\t0.4534\t0.8505"),
call("Kpha-Scer\t0.4555\t0.8406"),
call("Kpha-Kbla\t0.4565\t0.9228"),
call("Kpha-Kafr\t0.4419\t0.945"),
call("Kpha-Sdai\t0.4576\t0.9238"),
call("Kpha-Scas\t0.4618\t0.9899"),
call("Kpha-Cgla\t0.4262\t0.8472"),
call("Snag-Suva\t0.4482\t0.7281"),
call("Snag-Skud\t0.4492\t0.7678"),
call("Snag-Smik\t0.4461\t0.807"),
call("Snag-Scer\t0.445\t0.797"),
call("Snag-Kbla\t0.4785\t1.0535"),
call("Snag-Kafr\t0.4461\t1.0757"),
call("Snag-Sdai\t0.4775\t1.0544"),
call("Snag-Scas\t0.5016\t1.1206"),
call("Snag-Cgla\t0.4869\t0.9778"),
call("Suva-Skud\t0.1895\t0.229"),
call("Suva-Smik\t0.2031\t0.2682"),
call("Suva-Scer\t0.2042\t0.2583"),
call("Suva-Kbla\t0.4639\t0.9997"),
call("Suva-Kafr\t0.4838\t1.0219"),
call("Suva-Sdai\t0.4691\t1.0007"),
call("Suva-Scas\t0.4838\t1.0668"),
call("Suva-Cgla\t0.4293\t0.9241"),
call("Skud-Smik\t0.1801\t0.2171"),
call("Skud-Scer\t0.1675\t0.2071"),
call("Skud-Kbla\t0.4817\t1.0394"),
call("Skud-Kafr\t0.4712\t1.0616"),
call("Skud-Sdai\t0.4911\t1.0403"),
call("Skud-Scas\t0.4995\t1.1065"),
call("Skud-Cgla\t0.4398\t0.9637"),
call("Smik-Scer\t0.1686\t0.1998"),
call("Smik-Kbla\t0.4618\t1.0786"),
call("Smik-Kafr\t0.4775\t1.1008"),
call("Smik-Sdai\t0.466\t1.0796"),
call("Smik-Scas\t0.4743\t1.1457"),
call("Smik-Cgla\t0.4471\t1.003"),
call("Scer-Kbla\t0.4628\t1.0686"),
call("Scer-Kafr\t0.4838\t1.0908"),
call("Scer-Sdai\t0.4702\t1.0696"),
call("Scer-Scas\t0.4764\t1.1357"),
call("Scer-Cgla\t0.4492\t0.993"),
call("Kbla-Kafr\t0.4314\t0.67"),
call("Kbla-Sdai\t0.4387\t0.8756"),
call("Kbla-Scas\t0.4607\t0.9418"),
call("Kbla-Cgla\t0.4492\t0.799"),
call("Kafr-Sdai\t0.466\t0.8978"),
call("Kafr-Scas\t0.4848\t0.964"),
call("Kafr-Cgla\t0.445\t0.8212"),
call("Sdai-Scas\t0.3644\t0.5357"),
call("Sdai-Cgla\t0.4325\t0.7045"),
call("Scas-Cgla\t0.4482\t0.7706"),
]

0 comments on commit 680eaca

Please sign in to comment.