Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mglob prior #485

Merged
merged 16 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions multiego.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def meGO_parsing():

# mego_topology = pmd.load_file(f"{args.root_dir}/inputs/{args.system}/topol.top")
# topol_names = [m for m in mego_topology.molecules]

custom_dict = {}
if args.custom_dict:
custom_dict = parse_json(args.custom_dict)
Expand All @@ -102,7 +102,7 @@ def meGO_parsing():
sys.exit()

mego_ensemble = ensemble.init_meGO_ensemble(args, custom_dict)
topol_names = [ m for m in mego_ensemble['topology'].molecules ]
topol_names = [m for m in mego_ensemble["topology"].molecules]

args.names = []
for name in args.multi_epsilon_intra.keys():
Expand Down Expand Up @@ -188,7 +188,6 @@ def main():
generate_face.print_welcome()
print(f"Multi-eGO: {args.egos}\n")


print("- Checking for input files and folders")
io.check_files_existence(args)
if args.egos == "production":
Expand Down Expand Up @@ -268,4 +267,4 @@ def main():


if __name__ == "__main__":
main()
main()
122 changes: 71 additions & 51 deletions src/multiego/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,20 +225,19 @@ def initialize_molecular_contacts(contact_matrix, prior_matrix, args):
contact_matrix["md_threshold"] = md_threshold
contact_matrix.loc[~(contact_matrix["intra_domain"]), "md_threshold"] = md_threshold_id
contact_matrix["rc_threshold"] = contact_matrix["md_threshold"] ** (
contact_matrix["epsilon_0"] / ( prior_matrix["epsilon_prior"] + contact_matrix["epsilon_0"] - args.epsilon_min)
contact_matrix["epsilon_0"] / (prior_matrix["epsilon_prior"] + contact_matrix["epsilon_0"] - args.epsilon_min)
)
contact_matrix["limit_rc_att"] = (
# 1.0
# / contact_matrix["rc_threshold"] ** (args.epsilon_min / contact_matrix["epsilon_0"])
# * contact_matrix["zf"] ** (1 - (args.epsilon_min / contact_matrix["epsilon_0"]))
contact_matrix["rc_threshold"] ** (- (args.epsilon_min - prior_matrix["epsilon_prior"]) / contact_matrix["epsilon_0"])
* contact_matrix["zf"] ** (1 - (- (args.epsilon_min - prior_matrix["epsilon_prior"]) / contact_matrix["epsilon_0"]))
contact_matrix["rc_threshold"] ** (-(args.epsilon_min - prior_matrix["epsilon_prior"]) / contact_matrix["epsilon_0"])
* contact_matrix["zf"] ** (1 - (-(args.epsilon_min - prior_matrix["epsilon_prior"]) / contact_matrix["epsilon_0"]))
# contact_matrix["rc_threshold"] ** (- args.epsilon_min / contact_matrix["epsilon_0"]) * contact_matrix["zf"] ** (1 - (args.epsilon_min / contact_matrix["epsilon_0"]))
)
contact_matrix["limit_rc_rep"] = (
contact_matrix["rc_threshold"] ** (prior_matrix["epsilon_prior"] / contact_matrix["epsilon_0"])
* contact_matrix["zf"] ** (1 + (prior_matrix["epsilon_prior"] / contact_matrix["epsilon_0"]))
)
contact_matrix["limit_rc_rep"] = contact_matrix["rc_threshold"] ** (
prior_matrix["epsilon_prior"] / contact_matrix["epsilon_0"]
) * contact_matrix["zf"] ** (1 + (prior_matrix["epsilon_prior"] / contact_matrix["epsilon_0"]))

# TODO think on the limits of f (should be those for which all repulsive/attractive interactions are removed)
f_min = md_threshold
Expand Down Expand Up @@ -364,8 +363,10 @@ def init_meGO_matrices(ensemble, args, custom_dict):
lj_data = topology.get_lj_params(topol)
lj_pairs = topology.get_lj_pairs(topol)
lj_data_dict = {key: val for key, val in zip(lj_data["ai"], lj_data[["c6", "c12"]].values)}
lj_pairs_dict = {(ai, aj): (epsilon, sigma) for ai, aj, epsilon, sigma in lj_pairs[['ai', 'aj', 'epsilon', 'sigma']].to_numpy() }

lj_pairs_dict = {
(ai, aj): (epsilon, sigma) for ai, aj, epsilon, sigma in lj_pairs[["ai", "aj", "epsilon", "sigma"]].to_numpy()
}

matrix_paths = glob.glob(f"{reference_path}/int??mat_?_?.ndx")
matrix_paths = matrix_paths + glob.glob(f"{reference_path}/int??mat_?_?.ndx.gz")
matrix_paths = matrix_paths + glob.glob(f"{reference_path}/int??mat_?_?.ndx.h5")
Expand All @@ -383,29 +384,41 @@ def init_meGO_matrices(ensemble, args, custom_dict):
path, ensemble["molecules_idx_sbtype_dictionary"], reference, path.endswith(".h5")
)
reference_contact_matrices[name] = reference_contact_matrices[name].add_prefix("rc_")
reference_contact_matrices[name]['c6_i'] = [ lj_data_dict[x][0] for x in reference_contact_matrices[name]['rc_ai'].str.split(r'_.+_').str[0] ]
reference_contact_matrices[name]['c6_j'] = [ lj_data_dict[x][0] for x in reference_contact_matrices[name]['rc_aj'].str.split(r'_.+_').str[0] ]
reference_contact_matrices[name]['c6'] = np.sqrt(reference_contact_matrices[name]['c6_i'] * reference_contact_matrices[name]['c6_j'])
reference_contact_matrices[name]['c12_i'] = [ lj_data_dict[x][1] for x in reference_contact_matrices[name]['rc_ai'].str.split(r'_.+_').str[0] ]
reference_contact_matrices[name]['c12_j'] = [ lj_data_dict[x][1] for x in reference_contact_matrices[name]['rc_aj'].str.split(r'_.+_').str[0] ]
reference_contact_matrices[name]['c12'] = np.sqrt(reference_contact_matrices[name]['c12_i'] * reference_contact_matrices[name]['c12_j'])
reference_contact_matrices[name]['sigma_prior'] = np.where(
reference_contact_matrices[name]['c6'] > 0,
( reference_contact_matrices[name]['c12'] / reference_contact_matrices[name]['c6'] ) ** (1/6),
reference_contact_matrices[name]['rc_cutoff'] / (2.0 ** (1.0 / 6.0))
reference_contact_matrices[name]["c6_i"] = [
lj_data_dict[x][0] for x in reference_contact_matrices[name]["rc_ai"].str.split(r"_.+_").str[0]
]
reference_contact_matrices[name]["c6_j"] = [
lj_data_dict[x][0] for x in reference_contact_matrices[name]["rc_aj"].str.split(r"_.+_").str[0]
]
reference_contact_matrices[name]["c6"] = np.sqrt(
reference_contact_matrices[name]["c6_i"] * reference_contact_matrices[name]["c6_j"]
)
reference_contact_matrices[name]['epsilon_prior'] = np.where(
reference_contact_matrices[name]['c6'] > 0,
reference_contact_matrices[name]['c6'] ** 2 / ( 4 * reference_contact_matrices[name]['c12']),
0
reference_contact_matrices[name]["c12_i"] = [
lj_data_dict[x][1] for x in reference_contact_matrices[name]["rc_ai"].str.split(r"_.+_").str[0]
]
reference_contact_matrices[name]["c12_j"] = [
lj_data_dict[x][1] for x in reference_contact_matrices[name]["rc_aj"].str.split(r"_.+_").str[0]
]
reference_contact_matrices[name]["c12"] = np.sqrt(
reference_contact_matrices[name]["c12_i"] * reference_contact_matrices[name]["c12_j"]
)
reference_contact_matrices[name]["sigma_prior"] = np.where(
reference_contact_matrices[name]["c6"] > 0,
(reference_contact_matrices[name]["c12"] / reference_contact_matrices[name]["c6"]) ** (1 / 6),
reference_contact_matrices[name]["rc_cutoff"] / (2.0 ** (1.0 / 6.0)),
)
reference_contact_matrices[name]["epsilon_prior"] = np.where(
reference_contact_matrices[name]["c6"] > 0,
reference_contact_matrices[name]["c6"] ** 2 / (4 * reference_contact_matrices[name]["c12"]),
0,
)

# apply the LJ pairs
for i, row in reference_contact_matrices[name].iterrows():
if (row['rc_ai'], row['rc_aj']) in lj_pairs_dict.keys():
reference_contact_matrices[name].loc[i, 'epsilon_prior'] = lj_pairs_dict[(row['rc_ai'], row['rc_aj'])][0]
reference_contact_matrices[name].loc[i, 'sigma_prior'] = lj_pairs_dict[(row['rc_ai'], row['rc_aj'])][1]
reference_contact_matrices[name].drop(columns=['c6_i', 'c6_j', 'c12_i', 'c12_j', 'c6', 'c12'], inplace=True)
if (row["rc_ai"], row["rc_aj"]) in lj_pairs_dict.keys():
reference_contact_matrices[name].loc[i, "epsilon_prior"] = lj_pairs_dict[(row["rc_ai"], row["rc_aj"])][0]
reference_contact_matrices[name].loc[i, "sigma_prior"] = lj_pairs_dict[(row["rc_ai"], row["rc_aj"])][1]
reference_contact_matrices[name].drop(columns=["c6_i", "c6_j", "c12_i", "c12_j", "c6", "c12"], inplace=True)

et = time.time()
elapsed_time = et - st
Expand Down Expand Up @@ -818,34 +831,34 @@ def generate_mg_LJ(meGO_ensemble):
rc_LJ["ai"].map(meGO_ensemble["sbtype_c12_dict"]) * rc_LJ["aj"].map(meGO_ensemble["sbtype_c12_dict"])
)

#N_sbtype = [
# N_sbtype = [
# sbtype for sbtype, atomtype in meGO_ensemble["sbtype_type_dict"].items() if atomtype == "N"
#]
# ]

# Generate all combinations of N_sbtype and O_OM_sbtype
#ai_aj_combinations = [(ai, aj) for ai, aj in itertools.product(N_sbtype, O_OM_sbtype)]
# ai_aj_combinations = [(ai, aj) for ai, aj in itertools.product(N_sbtype, O_OM_sbtype)]

# Create the DataFrame
#df = pd.DataFrame(ai_aj_combinations, columns=["ai", "aj"])
#df["type"] = 1
#df["c6"] = 0.11 * np.sqrt((0.0024364096 / 0.63980)*(0.0022619536 / 1.27911))
#df["c12"] = 0.11 * np.sqrt((2.319529e-06 / 0.63980)*(1e-06 / 1.27911))
# df = pd.DataFrame(ai_aj_combinations, columns=["ai", "aj"])
# df["type"] = 1
# df["c6"] = 0.11 * np.sqrt((0.0024364096 / 0.63980)*(0.0022619536 / 1.27911))
# df["c12"] = 0.11 * np.sqrt((2.319529e-06 / 0.63980)*(1e-06 / 1.27911))

#CH1a_sbtype = [sbtype for sbtype, atomtype in meGO_ensemble["sbtype_type_dict"].items() if atomtype == "CH1a"]
# CH1a_sbtype = [sbtype for sbtype, atomtype in meGO_ensemble["sbtype_type_dict"].items() if atomtype == "CH1a"]

#all_sbtypes = list(meGO_ensemble["sbtype_type_dict"].keys())
# all_sbtypes = list(meGO_ensemble["sbtype_type_dict"].keys())
## Step 3: Create a list for 'aj' by excluding 'CH1a_sbtype'
#aj_sbtype = [sbtype for sbtype in all_sbtypes if sbtype not in CH1a_sbtype]
# aj_sbtype = [sbtype for sbtype in all_sbtypes if sbtype not in CH1a_sbtype]
## Step 4: Build the DataFrame with all combinations of 'ai' and 'aj'
#ai_aj_combinations = [(ai, aj) for ai in CH1a_sbtype for aj in aj_sbtype]
# ai_aj_combinations = [(ai, aj) for ai in CH1a_sbtype for aj in aj_sbtype]
## Create the DataFrame
#df = pd.DataFrame(ai_aj_combinations, columns=["ai", "aj"])
# df = pd.DataFrame(ai_aj_combinations, columns=["ai", "aj"])

#df["type"] = 1
#df["c6"] = 0.0
#df["c12"] = np.sqrt(df["ai"].map(meGO_ensemble["sbtype_c12_dict"]) * df["aj"].map(meGO_ensemble["sbtype_c12_dict"]))
# df["type"] = 1
# df["c6"] = 0.0
# df["c12"] = np.sqrt(df["ai"].map(meGO_ensemble["sbtype_c12_dict"]) * df["aj"].map(meGO_ensemble["sbtype_c12_dict"]))

#mg_LJ = pd.concat([rc_LJ, df])
# mg_LJ = pd.concat([rc_LJ, df])

return rc_LJ

Expand Down Expand Up @@ -973,7 +986,9 @@ def set_sig_epsilon(meGO_LJ, needed_fields):
"""
# when distance estimates are poor we use the cutoff value
meGO_LJ.loc[(meGO_LJ["probability"] <= meGO_LJ["md_threshold"]), "distance"] = meGO_LJ["sigma_prior"] * 2.0 ** (1.0 / 6.0)
meGO_LJ.loc[(meGO_LJ["rc_probability"] <= meGO_LJ["md_threshold"]), "rc_distance"] = meGO_LJ["sigma_prior"] * 2.0 ** (1.0 / 6.0)
meGO_LJ.loc[(meGO_LJ["rc_probability"] <= meGO_LJ["md_threshold"]), "rc_distance"] = meGO_LJ["sigma_prior"] * 2.0 ** (
1.0 / 6.0
)

# by default epsilon is set to the prior epsilon
meGO_LJ["epsilon"] = meGO_LJ["epsilon_prior"]
Expand All @@ -984,7 +999,9 @@ def set_sig_epsilon(meGO_LJ, needed_fields):
# This is always correct becasue distance is always well defined by either training data
# or using default C12 values
# negative epsilon are used to identify non-attractive interactions
meGO_LJ.loc[meGO_LJ["probability"] > meGO_LJ["md_threshold"], "epsilon"] = -meGO_LJ["rep"] * (meGO_LJ["distance"] / meGO_LJ["rc_distance"]) ** 12
meGO_LJ.loc[meGO_LJ["probability"] > meGO_LJ["md_threshold"], "epsilon"] = (
-meGO_LJ["rep"] * (meGO_LJ["distance"] / meGO_LJ["rc_distance"]) ** 12
)

# Attractive interactions
# These are defined only if the training probability is greater than MD_threshold and
Expand All @@ -998,20 +1015,23 @@ def set_sig_epsilon(meGO_LJ, needed_fields):
)

# General repulsive term
# this is used only when MD_p < RC_p eventually corrected by the ZF
# this is used only when MD_th < MD_p < RC_p eventually corrected by the ZF
# negative epsilon are used to identify non-attractive interactions
meGO_LJ.loc[
(
(np.maximum(meGO_LJ["probability"], meGO_LJ["rc_threshold"])
< meGO_LJ["limit_rc_rep"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"]))
(
np.maximum(meGO_LJ["probability"], meGO_LJ["rc_threshold"])
< meGO_LJ["limit_rc_rep"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"])
)
& (meGO_LJ["probability"] > meGO_LJ["md_threshold"])
),
"epsilon",
] = -(meGO_LJ["epsilon_0"] / (np.log(meGO_LJ["zf"] * meGO_LJ["rc_threshold"]))) * meGO_LJ["distance"] ** 12 * (
np.log(
np.maximum(meGO_LJ["probability"], meGO_LJ["rc_threshold"])
/ (meGO_LJ["zf"] * np.maximum(meGO_LJ["rc_probability"], meGO_LJ["rc_threshold"]))
) - meGO_LJ["epsilon_prior"]
)
- meGO_LJ["epsilon_prior"]
) - (
meGO_LJ["rep"] * (meGO_LJ["distance"] / meGO_LJ["rc_distance"]) ** 12
)
Expand Down Expand Up @@ -1512,7 +1532,7 @@ def make_pairs_exclusion_topology(meGO_ensemble, meGO_LJ_14, args):
sbtype_with_residue = [
(sbtype, resnum_type_dict[sbtype])
for sbtype in reduced_topology["sb_type"]
#if meGO_ensemble["sbtype_type_dict"][sbtype] != "CH1a"
# if meGO_ensemble["sbtype_type_dict"][sbtype] != "CH1a"
]
# Sort the list by residue numbers
sbtype_with_residue.sort(key=lambda x: x[1])
Expand Down Expand Up @@ -1641,4 +1661,4 @@ def make_pairs_exclusion_topology(meGO_ensemble, meGO_LJ_14, args):

pairs_molecule_dict[molecule] = pairs

return pairs_molecule_dict
return pairs_molecule_dict
2 changes: 1 addition & 1 deletion src/multiego/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,4 +847,4 @@ def read_inter_file(file_path):


def read_custom_c12_parameters(file):
return pd.read_csv(file, names=["name", "at.num", "c12"], usecols=[0, 1, 6], header=0)
return pd.read_csv(file, names=["name", "at.num", "c12"], usecols=[0, 1, 6], header=0)
10 changes: 4 additions & 6 deletions src/multiego/resources/type_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@


mg_eps = 0.110
mg_eps_ch2 = 0.10
mg_eps_ch1 = 0.09
mg_eps_ch2 = 0.100
mg_eps_ch1 = 0.090
# Dataframe with GROMOS atom types and associated parameters
gromos_atp = pd.DataFrame(
{
Expand Down Expand Up @@ -70,9 +70,8 @@
2.319529e-06 / 0.63980 * mg_eps, # "NE",
4.937284e-06 / 0.27741 * mg_eps, # "C",
4.937284e-06 / 0.27741 * mg_eps, # "CH"
9.70225e-05 / 0.09489 * mg_eps_ch1, # "CH1"
9.70225e-05 / 0.09489 * mg_eps_ch1, # "CH1"
9.70225e-05 / 0.09489 * mg_eps_ch1, # "CH1a"
#6.555574e-05, # "CH1a"
3.3965584e-05 / 0.4105 * mg_eps_ch2, # "CH2"
2.6646244e-05 / 0.8671 * mg_eps, # "CH3"
2.8058209e-05 / 0.4792 * mg_eps_ch2, # "CH2r"
Expand All @@ -97,7 +96,6 @@
0.0023406244 / 0.27741 * mg_eps, # "CH"
0.00606841 / 0.09489 * mg_eps_ch1, # "CH1"
0.00606841 / 0.09489 * mg_eps_ch1, # "CH1a"
#0.0, # "CH1a"
0.0074684164 / 0.41054 * mg_eps_ch2, # "CH2"
0.0096138025 / 0.86715 * mg_eps, # "CH3"
0.0073342096 / 0.47928 * mg_eps_ch2, # "CH2r"
Expand Down Expand Up @@ -206,4 +204,4 @@ def parse_json(file_path):
print(f"Error in reading the custom dictionary: {e}")
sys.exit()
else:
return {}
return {}
48 changes: 29 additions & 19 deletions src/multiego/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,14 +329,19 @@ def get_lj_params(topology):
c6, c12 = atom.sigma * 0.1, atom.epsilon * 4.184
# epsilon = c6 ** 2 / (4 * c12) if c6 > 0 else c12
# sigma = (c12 / c6) ** (1 / 6) if c6 > 0 else 0
lj_params = pd.concat([lj_params, pd.DataFrame(
{
"ai": atom.name,
"c6": c6,
"c12": c12,
},
index=[0],
)])
lj_params = pd.concat(
[
lj_params,
pd.DataFrame(
{
"ai": atom.name,
"c6": c6,
"c12": c12,
},
index=[0],
),
]
)
lj_params.drop_duplicates(inplace=True)
lj_params = lj_params.reset_index()
return lj_params
Expand All @@ -360,17 +365,22 @@ def get_lj_pairs(topology):
for sbtype_i, sbtype_j in topology.parameterset.nbfix_types:
key = (sbtype_i, sbtype_j)
c12, c6 = topology.parameterset.nbfix_types[key][0] * 4.184, topology.parameterset.nbfix_types[key][1] * 4.184
epsilon = c6 ** 2 / (4 * c12) if c6 > 0 else c12
epsilon = c6**2 / (4 * c12) if c6 > 0 else c12
sigma = (c12 / c6) ** (1 / 6) if c6 > 0 else 0
lj_pairs = pd.concat([lj_pairs, pd.DataFrame(
{
"ai": sbtype_i,
"aj": sbtype_j,
"epsilon": epsilon,
"sigma": sigma,
},
index=[0],
)])
lj_pairs = pd.concat(
[
lj_pairs,
pd.DataFrame(
{
"ai": sbtype_i,
"aj": sbtype_j,
"epsilon": epsilon,
"sigma": sigma,
},
index=[0],
),
]
)
lj_pairs = lj_pairs.reset_index()

return lj_pairs
Expand Down Expand Up @@ -616,4 +626,4 @@ def protein_LJ14(reduced_topology):
pairs["ai"] = pairs["ai"].astype(str)
pairs["aj"] = pairs["aj"].astype(str)

return pairs
return pairs
Loading
Loading