Skip to content

Commit

Permalink
improved managemnt of defaults interactions
Browse files Browse the repository at this point in the history
this actually fixes a bug for default cross interactions
  • Loading branch information
carlocamilloni committed Dec 6, 2024
1 parent 5c0149f commit c009760
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 98 deletions.
8 changes: 3 additions & 5 deletions multiego.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ def main():
print("- Done in:", elapsed_time, "seconds")
print("- Initializing LJ dataset")
train_dataset = ensemble.init_LJ_datasets(meGO_ensembles, matrices, pairs14, exclusion_bonds14, args)
basic_LJ = ensemble.generate_basic_LJ(meGO_ensembles, args, matrices)
# force memory cleaning to decrease footprint in case of large dataset
del matrices
gc.collect()
Expand All @@ -218,26 +217,25 @@ def main():
st = et
print("- Done in:", elapsed_time, "seconds")
print("- Generate LJ dataset")
meGO_LJ, meGO_LJ_14 = ensemble.generate_LJ(meGO_ensembles, train_dataset, basic_LJ, args)
meGO_LJ, meGO_LJ_14 = ensemble.generate_LJ(meGO_ensembles, train_dataset, args)
# force memory cleaning to decrease footprint in case of large dataset
del train_dataset
del basic_LJ
gc.collect()
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
elif args.egos == "mg":
print("- Generate the LJ dataset")
meGO_LJ = ensemble.generate_mg_LJ(meGO_ensembles)
meGO_LJ = ensemble.generate_OO_LJ(meGO_ensembles)
meGO_LJ_14 = pairs14
et = time.time()
elapsed_time = et - st
st = et
print("- Done in:", elapsed_time, "seconds")
else:
print("- Generate the LJ dataset")
meGO_LJ = ensemble.generate_rc_LJ(meGO_ensembles)
meGO_LJ = ensemble.generate_OO_LJ(meGO_ensembles)
meGO_LJ_14 = pairs14
et = time.time()
elapsed_time = et - st
Expand Down
105 changes: 12 additions & 93 deletions src/multiego/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,7 @@ def get_residue_number(s):
return int(s.split("_")[-1])


def generate_rc_LJ(meGO_ensemble):
def generate_OO_LJ(meGO_ensemble):
"""
The multi-eGO random coil force-field includes special repulsive interaction only for oxygen-oxygen pairs
these are generate in the following
Expand All @@ -846,30 +846,7 @@ def generate_rc_LJ(meGO_ensemble):
return rc_LJ


def generate_mg_LJ(meGO_ensemble):
"""
The multi-eGO random coil force-field includes special repulsive interaction for oxygen-oxygen pairs and for the
ch1a with all other atoms, these are generate in the following
"""
O_OM_sbtype = [
sbtype for sbtype, atomtype in meGO_ensemble["sbtype_type_dict"].items() if atomtype == "O" or atomtype == "OM"
]

# Generate all possible combinations
combinations = list(itertools.combinations_with_replacement(O_OM_sbtype, 2))

# Create a DataFrame from the combinations
rc_LJ = pd.DataFrame(combinations, columns=["ai", "aj"])
rc_LJ["type"] = 1
rc_LJ["c6"] = 0.0
rc_LJ["c12"] = 11.4 * np.sqrt(
rc_LJ["ai"].map(meGO_ensemble["sbtype_c12_dict"]) * rc_LJ["aj"].map(meGO_ensemble["sbtype_c12_dict"])
)

return rc_LJ


def generate_basic_LJ(meGO_ensemble, args, matrices=None):
def generate_basic_LJ(meGO_ensemble):
"""
Generates basic LJ (Lennard-Jones) interactions DataFrame within a molecular ensemble.
Expand All @@ -888,69 +865,15 @@ def generate_basic_LJ(meGO_ensemble, args, matrices=None):
types, c6, c12, sigma, epsilon, probability, rc_probability, molecule names, source, and thresholds.
- The generated DataFrame provides basic LJ interactions for further analysis or processing within the ensemble.
"""
columns = [
"ai",
"aj",
"type",
"c6",
"c12",
"sigma",
"epsilon",
"probability",
"rc_probability",
"distance",
"molecule_name_ai",
"molecule_name_aj",
"same_chain",
"source",
"md_threshold",
"rc_threshold",
"number_ai",
"number_aj",
"cutoff",
]

basic_LJ = pd.DataFrame()
topol_df = meGO_ensemble["topology_dataframe"]

name_to_c12 = {key: val for key, val in zip(type_definitions.gromos_atp.name, type_definitions.gromos_atp.rc_c12)}
if args.custom_c12 is not None:
custom_c12_dict = io.read_custom_c12_parameters(args.custom_c12)
name_to_c12_appo = {key: val for key, val in zip(custom_c12_dict.name, custom_c12_dict.c12)}
name_to_c12.update(name_to_c12_appo)

for name in matrices["reference_matrices"].keys():
temp_basic_LJ = pd.DataFrame(columns=columns)
mol_num_i = str(name.split("_")[-2])
mol_num_j = str(name.split("_")[-1])
ensemble = matrices["reference_matrices"][name]
temp_basic_LJ["ai"] = ensemble["rc_ai"]
temp_basic_LJ["aj"] = ensemble["rc_aj"]
temp_basic_LJ["type"] = 1
temp_basic_LJ["c6"] = 0.0
temp_basic_LJ["c12"] = 0.0
temp_basic_LJ["same_chain"] = ensemble["rc_same_chain"]
temp_basic_LJ["molecule_name_ai"] = ensemble["rc_molecule_name_ai"]
temp_basic_LJ["molecule_name_aj"] = ensemble["rc_molecule_name_aj"]
temp_basic_LJ["source"] = "basic"

atom_set_i = topol_df[topol_df["molecule_number"] == mol_num_i]["type"]
atom_set_j = topol_df[topol_df["molecule_number"] == mol_num_j]["type"]
c12_list_i = atom_set_i.map(name_to_c12).to_numpy(dtype=np.float64)
c12_list_j = atom_set_j.map(name_to_c12).to_numpy(dtype=np.float64)
ai_name = atom_set_i.to_numpy(dtype=str)
aj_name = atom_set_j.to_numpy(dtype=str)
oxygen_mask = masking.create_array_mask(ai_name, aj_name, [("O", "OM"), ("O", "O"), ("OM", "OM")], symmetrize=True)
temp_basic_LJ["c12"] = 11.4 * np.sqrt(c12_list_i * c12_list_j[:, np.newaxis]).flatten()
temp_basic_LJ["rep"] = temp_basic_LJ["c12"]
temp_basic_LJ["mg_sigma"] = temp_basic_LJ["c12"] ** (1 / 12)
temp_basic_LJ["mg_epsilon"] = -temp_basic_LJ["c12"]
temp_basic_LJ = temp_basic_LJ[oxygen_mask]
temp_basic_LJ = temp_basic_LJ.dropna(axis=1, how="all")
temp_basic_LJ = temp_basic_LJ.drop_duplicates(subset=["ai", "aj", "same_chain"], keep="first")

basic_LJ = pd.concat([basic_LJ, temp_basic_LJ])

basic_LJ = generate_OO_LJ(meGO_ensemble)
basic_LJ["same_chain"] = False
basic_LJ["source"] = "basic"
basic_LJ["rep"] = basic_LJ["c12"]
basic_LJ["mg_sigma"] = basic_LJ["c12"] ** (1 / 12)
basic_LJ["mg_epsilon"] = -basic_LJ["c12"]
basic_LJ["molecule_name_ai"] = basic_LJ["ai"].apply(lambda x: "_".join(x.split("_")[1:-1]))
basic_LJ["molecule_name_aj"] = basic_LJ["aj"].apply(lambda x: "_".join(x.split("_")[1:-1]))
basic_LJ["probability"] = 1.0
basic_LJ["rc_probability"] = 1.0
basic_LJ["rc_threshold"] = 1.0
Expand All @@ -961,10 +884,6 @@ def generate_basic_LJ(meGO_ensemble, args, matrices=None):
basic_LJ["distance"] = basic_LJ["cutoff"]
basic_LJ["learned"] = 0
basic_LJ["1-4"] = "1>4"
# Sorting the pairs prioritising intermolecular interactions
basic_LJ.sort_values(by=["ai", "aj", "same_chain"], ascending=[True, True, True], inplace=True)
# Cleaning the duplicates
basic_LJ = basic_LJ.drop_duplicates(subset=["ai", "aj"], keep="first")

return basic_LJ

Expand Down Expand Up @@ -1200,7 +1119,7 @@ def apply_symmetries(meGO_ensemble, meGO_input, symmetry):
return tmp_df


def generate_LJ(meGO_ensemble, train_dataset, basic_LJ, parameters):
def generate_LJ(meGO_ensemble, train_dataset, parameters):
"""
Generates LJ (Lennard-Jones) interactions and associated atomic contacts within a molecular ensemble.
Expand Down Expand Up @@ -1405,7 +1324,7 @@ def generate_LJ(meGO_ensemble, train_dataset, basic_LJ, parameters):

# Now is time to add masked default interactions for pairs
# that have not been learned in any other way
basic_LJ = basic_LJ[needed_fields]
basic_LJ = generate_basic_LJ(meGO_ensemble)[needed_fields]
meGO_LJ = pd.concat([meGO_LJ, basic_LJ])

# make meGO_LJ fully symmetric
Expand Down

0 comments on commit c009760

Please sign in to comment.