Skip to content

Commit

Permalink
updated the saturation function to now force the y-intercept to be ze…
Browse files Browse the repository at this point in the history
…ro when calculating slope. scikit-learn is a new dependency.
  • Loading branch information
JLSteenwyk committed May 3, 2024
1 parent b503734 commit 0bc4800
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 11 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ verify_ssl = true
[packages]
biopython = "==1.76"
scipy = "==1.4.1"
scikit-learn = "==1.4.2"

[requires]
python_version = "3.9"
3 changes: 3 additions & 0 deletions change_log.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
Major changes to PhyKIT are summarized here.

1.19.4
- Saturation function forces y-intercept to be zero when calculating slope

1.19.3
- Saturation function now uses uncorrected distances instead of pairwise identities

Expand Down
3 changes: 3 additions & 0 deletions docs/change_log/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Change log

Major changes to PhyKIT are summarized here.

**1.19.4**:
Saturation function forces y-intercept to be zero when calculating slope

**1.19.3**:
Saturation function now uses uncorrected distances instead of pairwise identities

Expand Down
22 changes: 14 additions & 8 deletions phykit/services/tree/saturation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import itertools
from typing import Tuple

import scipy
import numpy as np
from sklearn.linear_model import LinearRegression

from .base import Tree
from ...helpers.files import get_alignment_and_format as get_alignment_and_format_helper
Expand Down Expand Up @@ -43,14 +44,19 @@ def run(self):
uncorrected_distances,
) = self.loop_through_combos_and_calculate_pds_and_pis(combos, alignment, tree)

# calculate linear regression
_, _, r_value, _, _ = scipy.stats.linregress(
uncorrected_distances, patristic_distances
)


# calculate slope and fit the y-intercept to zero
# Fitting the y-intercept to zero follows Jeffroy et al.
# See fig 2 https://www.cell.com/trends/genetics/fulltext/S0168-9525(06)00051-5
model = LinearRegression(fit_intercept=False)
model.fit(
np.array(patristic_distances).reshape(-1, 1),
np.array(uncorrected_distances)
)
# report res
self.print_res(
self.verbose, combos, uncorrected_distances, patristic_distances, r_value
self.verbose, combos, uncorrected_distances, patristic_distances, model.coef_[0]
)

def process_args(self, args):
Expand Down Expand Up @@ -107,7 +113,7 @@ def print_res(
combos: list,
uncorrected_distances: list,
patristic_distances: list,
r_value: float,
slope: float,
) -> None:
"""
print results to stdout
Expand All @@ -121,6 +127,6 @@ def print_res(
f"{combo[0]}-{combo[1]}\t{round(dist,4)}\t{round(patristic_distance, 4)}"
)
else:
print(round(r_value**2, 4))
print(round(slope, 4))
except BrokenPipeError:
pass
2 changes: 1 addition & 1 deletion phykit/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.19.3"
__version__ = "1.19.4"
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
biopython>=1.81
numpy>=1.24.0
scipy>=1.11.3
scikit-learn>=1.4.2
cython
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"biopython>=1.81",
"numpy>=1.24.0",
"scipy>=1.11.3",
"scikit-learn>=1.4.2",
"cython"
]

Expand Down
4 changes: 2 additions & 2 deletions tests/integration/tree/test_saturation_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
class TestSaturation(object):
@patch("builtins.print")
def test_saturation(self, mocked_print):
expected_result = 0.8451
expected_result = 0.4919
testargs = [
"phykit",
"saturation",
Expand All @@ -29,7 +29,7 @@ def test_saturation(self, mocked_print):

@patch("builtins.print")
def test_saturation_alias(self, mocked_print):
expected_result = 0.8451
expected_result = 0.4919
testargs = [
"phykit",
"sat",
Expand Down

0 comments on commit 0bc4800

Please sign in to comment.