Skip to content

Commit

Permalink
Merge branch 'main' into mglob_prior
Browse files Browse the repository at this point in the history
  • Loading branch information
carlocamilloni committed Dec 7, 2023
2 parents 6618190 + 8f4b52a commit adcc805
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 40 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Multi-*e*GO: a multi-ensemble Gō model
[![Version](https://img.shields.io/badge/Version-beta1-blue)](https://github.com/multi-ego/multi-eGO/releases)
![Generic badge](https://img.shields.io/badge/Codename-Vanessa-<COLOR>.svg)
[![Version](https://img.shields.io/badge/Version-beta.1-blue)](https://github.com/multi-ego/multi-eGO/releases)
[![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![CI](https://github.com/plumed/plumed2/workflows/CI/badge.svg?branch=master)](https://github.com/plumed/plumed2/actions)
Expand Down Expand Up @@ -55,7 +54,7 @@ gmx pdb2gmx -f file.pdb -ignh
and select the multi-ego-basic force-field. From this you should get a (.gro) file for your structure and a (.top) topology file. In the ```multi-eGO/inputs``` folder add a folder for your system and a ```reference/``` subfolder. Copy your GROMACS topology in this ```reference/``` subfolder so that the final structure looks like ```multi-eGO/inputs/$SYSTEM_NAME/reference```

> [!NOTE]
> When using a system with disulfide bridges, it is as of version Vanessa (beta.1) necessary to remove the comments from ```ffbonded.itp``` in the ```multi-ego-basic.ff/``` folder and later to add them in the .top file.
> When using a system with disulfide bridges, it is as of version `beta.1` necessary to remove the comments from ```ffbonded.itp``` in the ```multi-ego-basic.ff/``` folder and later to add them in the .top file.
### Setup of a multi-*e*GO random coil simulation

Expand Down
8 changes: 4 additions & 4 deletions codecheck.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ fi
echo "Running the regtests"
python test/run_tests.py >& /dev/null
if [ $? -eq 1 ]; then
echo "Regtests failing, you should not push your code"
echo $flak
elif [ $? -eq 0 ]; then
echo "Regtests failing, you should not push your code"
echo $flak
else
echo "Regtests passed"
if [ -z "$flak" ]; then
echo "Ready to push"
else
echo "Regtest passed"
echo $flak
fi
fi
4 changes: 4 additions & 0 deletions multiego.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from src.multiego import ensemble
from src.multiego import io
from src.multiego.util import float_range
from tools.face_generator import generate_face


def main():
Expand Down Expand Up @@ -149,6 +150,7 @@ def main():
if not os.path.exists(f"{args.root_dir}/outputs"):
os.mkdir(f"{args.root_dir}/outputs")

generate_face.print_wellcome()
output_dir = io.create_output_directories(args)

print("- Checking for input files and folders")
Expand All @@ -174,6 +176,8 @@ def main():

io.write_model(meGO_ensemble, meGO_LJ, meGO_LJ_14, args, output_dir, args.out)

generate_face.print_goodbye()


if __name__ == "__main__":
main()
51 changes: 35 additions & 16 deletions src/multiego/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ def initialize_molecular_contacts(contact_matrix, path, ensemble_molecules_idx_s
A contact matrix containing contact data for each of the different simulations
"""

print("\t\t-", f"Initializing {simulation} contact matrix")
molecule_names_dictionary = {}
for molecule_name in ensemble_molecules_idx_sbtype_dictionary.keys():
name = molecule_name.split("_", maxsplit=1)
Expand Down Expand Up @@ -269,11 +268,16 @@ def init_meGO_ensemble(args):
Returns:
- ensemble (dict): A dictionary containing the initialized ensemble with various molecular attributes and contact matrices.
This function sets up meGO by initializing the reference topology and processing train and check contact matrices based on the provided arguments. It reads topology files, loads molecular information, and sets up dictionaries and data frames to organize molecular data and contact matrices.
This function sets up meGO by initializing the reference topology and processing train and check contact matrices based
on the provided arguments. It reads topology files, loads molecular information, and sets up dictionaries and data frames
to organize molecular data and contact matrices.
The function initializes the reference topology and extracts essential molecular details such as topological data frames, subtype dictionaries, c12 values, names, molecule types, and contact matrices for the reference ensemble. It then processes train and check contact matrices, aligning them with the reference ensemble to detect any differences in atom types.
The function initializes the reference topology and extracts essential molecular details such as topological data frames,
subtype dictionaries, c12 values, names, molecule types, and contact matrices for the reference ensemble. It then processes
train and check contact matrices, aligning them with the reference ensemble to detect any differences in atom types.
If atom type differences are found between ensembles, the function prints a warning message and exits, indicating the need to add missing atom types to the conversion dictionary for proper contact merging.
If atom type differences are found between ensembles, the function prints a warning message and exits, indicating
the need to add missing atom types to the conversion dictionary for proper contact merging.
Note:
- This function assumes the availability of various directories, files, and modules (e.g., 'parmed', 'io').
Expand Down Expand Up @@ -362,7 +366,6 @@ def init_meGO_ensemble(args):
warnings.simplefilter("ignore")
topology = parmed.load_file(topology_path)

print("\t-", f"{simulation} topology contains: {topology.molecules}")
(
temp_topology_dataframe,
molecules_idx_sbtype_dictionary,
Expand Down Expand Up @@ -532,13 +535,19 @@ def generate_14_data(meGO_ensemble):
- pairs14 (DataFrame): DataFrame containing information about 1-4 interactions.
- exclusion_bonds14 (DataFrame): DataFrame containing exclusion bonded interactions.
This function generates data for 1-4 interactions within a molecular ensemble. It iterates through each molecule in the ensemble, processes the topology, and computes exclusion bonded interactions and specific 1-4 interactions.
This function generates data for 1-4 interactions within a molecular ensemble.
It iterates through each molecule in the ensemble, processes the topology, and computes
exclusion bonded interactions and specific 1-4 interactions.
The function creates DataFrames 'pairs14' and 'exclusion_bonds14' containing information about 1-4 interactions and exclusion bonded interactions, respectively. It extracts details such as atom numbers, subtypes, residue numbers, names, types, residue names, molecule types, and interaction characteristics.
The function creates DataFrames 'pairs14' and 'exclusion_bonds14' containing information
about 1-4 interactions and exclusion bonded interactions, respectively.
It extracts details such as atom numbers, subtypes, residue numbers, names, types, residue names,
molecule types, and interaction characteristics.
Note:
- The 'meGO_ensemble' dictionary is expected to contain necessary details regarding the molecular ensemble.
- The returned DataFrames provide comprehensive information about 1-4 interactions and exclusion bonded interactions within the ensemble for further analysis or processing.
- The returned DataFrames provide comprehensive information about 1-4 interactions and exclusion bonded
interactions within the ensemble for further analysis or processing.
"""
# First of all we generate the random-coil 1-4 interactions:
pairs14 = pd.DataFrame()
Expand Down Expand Up @@ -631,16 +640,23 @@ def init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14):
- train_dataset (DataFrame): DataFrame containing LJ datasets for the train matrices.
- check_dataset (DataFrame): DataFrame containing LJ datasets for the check matrices.
This function initializes LJ datasets for train and check matrices within a molecular ensemble. It processes the train and check matrices by merging them with reference matrices, assigning 1-4 interactions, setting default c12 values, and updating specialized cases.
This function initializes LJ datasets for train and check matrices within a molecular ensemble.
It processes the train and check matrices by merging them with reference matrices, assigning
1-4 interactions, setting default c12 values, and updating specialized cases.
The function generates DataFrames 'train_dataset' and 'check_dataset' containing LJ datasets for the train and check matrices, respectively. It performs various operations, such as flagging 1-4 interactions, setting correct default c12 values, and updating values for special cases based on atom types and interactions.
The function generates DataFrames 'train_dataset' and 'check_dataset' containing LJ datasets
for the train and check matrices, respectively. It performs various operations, such as flagging
1-4 interactions, setting correct default c12 values, and updating values for special cases based
on atom types and interactions.
Note:
- The 'meGO_ensemble' dictionary is expected to contain necessary details regarding the molecular ensemble.
- The 'pairs14' DataFrame contains information about 1-4 interactions, and 'exclusion_bonds14' DataFrame contains exclusion bonded interactions.
- The 'pairs14' DataFrame contains information about 1-4 interactions, and 'exclusion_bonds14' DataFrame
contains exclusion bonded interactions.
- The returned DataFrames provide comprehensive LJ datasets for further analysis or processing within the ensemble.
"""
# we cycle over train matrices to pair them with reference matrices and then we add 1-4 assignments and defaults c12s and concatenate everything
# we cycle over train matrices to pair them with reference matrices and
# then we add 1-4 assignments and defaults c12s and concatenate everything
train_dataset = pd.DataFrame()
for name, ref_name in meGO_ensemble["train_matrix_tuples"]:
# sysname_train_from_intramat_1_1 <-> sysname_reference_intramat_1_1
Expand Down Expand Up @@ -701,7 +717,8 @@ def init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14):
)
train_dataset["rep"] = train_dataset["rep"].fillna(pd.Series(pairwise_c12))

# we cycle over check matrices to pair them with reference matrices and then we add 1-4 assignments and defaults c12s and concatenate everything
# we cycle over check matrices to pair them with reference matrices
# and then we add 1-4 assignments and defaults c12s and concatenate everything
check_dataset = pd.DataFrame()
for name, ref_name in meGO_ensemble["check_matrix_tuples"]:
# sysname_check_from_intramat_1_1 <-> sysname_reference_intramat_1_1
Expand Down Expand Up @@ -730,7 +747,7 @@ def init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14):
"1-4",
] = "0"
check_dataset["1-4"] = check_dataset["1-4"].fillna("1>4")
# This is to set the correct default C12 values taking into account specialised 1-4 values (including the special 1-5 O-O)
# This is to set the correct default C12 values taking into account specialised 1-4 values
check_dataset = pd.merge(
check_dataset,
pairs14[["ai", "aj", "same_chain", "rep"]],
Expand Down Expand Up @@ -774,11 +791,13 @@ def generate_basic_LJ(meGO_ensemble):
Returns:
- basic_LJ (DataFrame): DataFrame containing basic LJ interactions.
This function generates a DataFrame 'basic_LJ' containing basic LJ interactions within a molecular ensemble. It calculates LJ interactions based on atom types, molecules, and reference matrices present in the ensemble.
This function generates a DataFrame 'basic_LJ' containing basic LJ interactions within a molecular ensemble.
It calculates LJ interactions based on atom types, molecules, and reference matrices present in the ensemble.
Note:
- The 'meGO_ensemble' dictionary is expected to contain necessary details regarding the molecular ensemble.
- The returned DataFrame 'basic_LJ' includes columns defining LJ interaction properties such as atom indices, types, c6, c12, sigma, epsilon, probability, rc_probability, molecule names, source, and thresholds.
- The returned DataFrame 'basic_LJ' includes columns defining LJ interaction properties such as atom indices,
types, c6, c12, sigma, epsilon, probability, rc_probability, molecule names, source, and thresholds.
- The generated DataFrame provides basic LJ interactions for further analysis or processing within the ensemble.
"""
columns = [
Expand Down
22 changes: 13 additions & 9 deletions src/multiego/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def read_molecular_contacts(path):
Returns
-------
contact_matrix : pd.DataFrame
The content of the intra-/intermat file returned as a dataframe with columns ['molecule_number_ai', 'ai', 'molecule_number_aj', 'aj', 'distance', 'probability', 'cutoff']
The content of the intra-/intermat file returned as a dataframe with columns
['molecule_number_ai', 'ai', 'molecule_number_aj', 'aj', 'distance', 'probability', 'cutoff']
"""

print("\t-", f"Reading {path}")
Expand Down Expand Up @@ -119,14 +120,16 @@ def write_model(meGO_ensemble, meGO_LJ_potential, meGO_LJ_14, parameters, output
- LJ parameterization completed with a total of {len(meGO_LJ_potential)} contacts.
- Attractive: {len(meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.])}
- Repulsive: {len(meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']<0.])}
- The average epsilon is {meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].mean():{5}.{3}}
- Epsilon range is [min:max] [{meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['epsilon'].max():{5}.{3}}]
- Sigma range is [min:max] [{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{5}.{3}}]
- Suggested rlist and cut-off at {1.1*2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} and {2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} nm
- The average epsilon is: {meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].mean():{5}.{3}} kJ/mol
- Epsilon range is: [{meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['epsilon'].max():{5}.{3}}] kJ/mol
- Sigma range is: [{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{5}.{3}}] nm
RELEVANT MDP PARAMETERS:
- Suggested rlist value: {1.1*2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} nm
- Suggested cut-off value: {2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} nm
"""
)
print(f"\nAnd it can be found in the following folder:\n{output_dir}")
print("\nNessuno è più basito, nessuno è più sorpreso. Ognuno di voi ha capito tutto.\nCarlo is happy!\t\^o^/\n")
print(f"\n- And it can be found in the following folder:\n{output_dir}")


def dataframe_to_write(df):
Expand Down Expand Up @@ -155,10 +158,11 @@ def make_header(parameters):
now = time.strftime("%d-%m-%Y %H:%M", time.localtime())

header = f"""
; Multi-eGO force field version alpha2
; Multi-eGO force field version beta.1
; https://github.com/multi-ego/multi-eGO
; Please read and cite:
; Scalone, E. et al. PNAS 119, e2203181119 (2022)
; Scalone, E. et al. PNAS 119, e2203181119 (2022) 10.1073/pnas.2203181119
; Bacic Toplek, F., Scalone, E. et al. ChemRxiv (2023) 10.26434/chemrxiv-2023-67255-v2
; Created on the {now}
; With the following parameters:
"""
Expand Down
14 changes: 6 additions & 8 deletions tools/face_generator/generate_face.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
]


def print_goodbye():
def print_wellcome():
print("")
print("-----------------------------===========+===+++++++++++++********#########################")
print("-----------------------------=========++++**##########**********##########################")
print("-----------------------------=-==+**#%%%%####*****#####%%%@%#########%%%%%################")
Expand Down Expand Up @@ -78,15 +79,12 @@ def print_goodbye():
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@%%%@@@@@@@@@@@@@@@@@#+*%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print("")
print("")


def print_goodbye():
print("")
print("----------------------------------------------------")
print("")
print(":-) LUIGI says goodbye (-:")
print("")
print(aforismi[random.choice(range(len(aforismi)))])
print("Luigi Pirandello, Uno, nessuno, centomila (1926)")
print()
print("")
print("----------------------------------------------------")
print("")
print("")
print("")

0 comments on commit adcc805

Please sign in to comment.