Merge branch 'main' into mglob_prior

carlocamilloni · Dec 7, 2023 · adcc805 · adcc805
2 parents 6618190 + 8f4b52a
commit adcc805
Show file tree

Hide file tree

Showing 6 changed files with 64 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,5 @@
 # Multi-*e*GO: a multi-ensemble Gō model
-[![Version](https://img.shields.io/badge/Version-beta1-blue)](https://github.com/multi-ego/multi-eGO/releases)
-![Generic badge](https://img.shields.io/badge/Codename-Vanessa-<COLOR>.svg)
+[![Version](https://img.shields.io/badge/Version-beta.1-blue)](https://github.com/multi-ego/multi-eGO/releases)
 [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 [![CI](https://github.com/plumed/plumed2/workflows/CI/badge.svg?branch=master)](https://github.com/plumed/plumed2/actions)
@@ -55,7 +54,7 @@ gmx pdb2gmx -f file.pdb -ignh
 and select the multi-ego-basic force-field. From this you should get a (.gro) file for your structure and a (.top) topology file. In the ```multi-eGO/inputs``` folder add a folder for your system and a ```reference/``` subfolder. Copy your GROMACS topology in this ```reference/``` subfolder so that the final structure looks like ```multi-eGO/inputs/$SYSTEM_NAME/reference```
 
 > [!NOTE]
-> When using a system with disulfide bridges, it is as of version Vanessa (beta.1) necessary to remove the comments from ```ffbonded.itp``` in the ```multi-ego-basic.ff/``` folder and later to add them in the .top file.
+> When using a system with disulfide bridges, it is as of version `beta.1` necessary to remove the comments from ```ffbonded.itp``` in the ```multi-ego-basic.ff/``` folder and later to add them in the .top file.
 
 ### Setup of a multi-*e*GO random coil simulation
 

diff --git a/codecheck.sh b/codecheck.sh
@@ -13,13 +13,13 @@ fi
 echo "Running the regtests"
 python test/run_tests.py >& /dev/null
 if [ $? -eq 1 ]; then
-echo "Regtests failing, you should not push your code"
-echo $flak
-elif [ $? -eq 0 ]; then
+  echo "Regtests failing, you should not push your code"
+  echo $flak
+else
+  echo "Regtests passed"
   if [ -z "$flak" ]; then
     echo "Ready to push"
   else
-    echo "Regtest passed"
     echo $flak
   fi
 fi
diff --git a/multiego.py b/multiego.py
@@ -5,6 +5,7 @@
 from src.multiego import ensemble
 from src.multiego import io
 from src.multiego.util import float_range
+from tools.face_generator import generate_face
 
 
 def main():
@@ -149,6 +150,7 @@ def main():
     if not os.path.exists(f"{args.root_dir}/outputs"):
         os.mkdir(f"{args.root_dir}/outputs")
 
+    generate_face.print_wellcome()
     output_dir = io.create_output_directories(args)
 
     print("- Checking for input files and folders")
@@ -174,6 +176,8 @@ def main():
 
     io.write_model(meGO_ensemble, meGO_LJ, meGO_LJ_14, args, output_dir, args.out)
 
+    generate_face.print_goodbye()
+
 
 if __name__ == "__main__":
     main()
diff --git a/src/multiego/ensemble.py b/src/multiego/ensemble.py
@@ -165,7 +165,6 @@ def initialize_molecular_contacts(contact_matrix, path, ensemble_molecules_idx_s
         A contact matrix containing contact data for each of the different simulations
     """
 
-    print("\t\t-", f"Initializing {simulation} contact matrix")
     molecule_names_dictionary = {}
     for molecule_name in ensemble_molecules_idx_sbtype_dictionary.keys():
         name = molecule_name.split("_", maxsplit=1)
@@ -269,11 +268,16 @@ def init_meGO_ensemble(args):
     Returns:
     - ensemble (dict): A dictionary containing the initialized ensemble with various molecular attributes and contact matrices.
 
-    This function sets up meGO by initializing the reference topology and processing train and check contact matrices based on the provided arguments. It reads topology files, loads molecular information, and sets up dictionaries and data frames to organize molecular data and contact matrices.
+    This function sets up meGO by initializing the reference topology and processing train and check contact matrices based
+    on the provided arguments. It reads topology files, loads molecular information, and sets up dictionaries and data frames
+    to organize molecular data and contact matrices.
 
-    The function initializes the reference topology and extracts essential molecular details such as topological data frames, subtype dictionaries, c12 values, names, molecule types, and contact matrices for the reference ensemble. It then processes train and check contact matrices, aligning them with the reference ensemble to detect any differences in atom types.
+    The function initializes the reference topology and extracts essential molecular details such as topological data frames,
+    subtype dictionaries, c12 values, names, molecule types, and contact matrices for the reference ensemble. It then processes
+    train and check contact matrices, aligning them with the reference ensemble to detect any differences in atom types.
 
-    If atom type differences are found between ensembles, the function prints a warning message and exits, indicating the need to add missing atom types to the conversion dictionary for proper contact merging.
+    If atom type differences are found between ensembles, the function prints a warning message and exits, indicating
+    the need to add missing atom types to the conversion dictionary for proper contact merging.
 
     Note:
     - This function assumes the availability of various directories, files, and modules (e.g., 'parmed', 'io').
@@ -362,7 +366,6 @@ def init_meGO_ensemble(args):
             warnings.simplefilter("ignore")
             topology = parmed.load_file(topology_path)
 
-        print("\t-", f"{simulation} topology contains: {topology.molecules}")
         (
             temp_topology_dataframe,
             molecules_idx_sbtype_dictionary,
@@ -532,13 +535,19 @@ def generate_14_data(meGO_ensemble):
     - pairs14 (DataFrame): DataFrame containing information about 1-4 interactions.
     - exclusion_bonds14 (DataFrame): DataFrame containing exclusion bonded interactions.
 
-    This function generates data for 1-4 interactions within a molecular ensemble. It iterates through each molecule in the ensemble, processes the topology, and computes exclusion bonded interactions and specific 1-4 interactions.
+    This function generates data for 1-4 interactions within a molecular ensemble.
+    It iterates through each molecule in the ensemble, processes the topology, and computes
+    exclusion bonded interactions and specific 1-4 interactions.
 
-    The function creates DataFrames 'pairs14' and 'exclusion_bonds14' containing information about 1-4 interactions and exclusion bonded interactions, respectively. It extracts details such as atom numbers, subtypes, residue numbers, names, types, residue names, molecule types, and interaction characteristics.
+    The function creates DataFrames 'pairs14' and 'exclusion_bonds14' containing information
+    about 1-4 interactions and exclusion bonded interactions, respectively.
+    It extracts details such as atom numbers, subtypes, residue numbers, names, types, residue names,
+    molecule types, and interaction characteristics.
 
     Note:
     - The 'meGO_ensemble' dictionary is expected to contain necessary details regarding the molecular ensemble.
-    - The returned DataFrames provide comprehensive information about 1-4 interactions and exclusion bonded interactions within the ensemble for further analysis or processing.
+    - The returned DataFrames provide comprehensive information about 1-4 interactions and exclusion bonded
+      interactions within the ensemble for further analysis or processing.
     """
     # First of all we generate the random-coil 1-4 interactions:
     pairs14 = pd.DataFrame()
@@ -631,16 +640,23 @@ def init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14):
     - train_dataset (DataFrame): DataFrame containing LJ datasets for the train matrices.
     - check_dataset (DataFrame): DataFrame containing LJ datasets for the check matrices.
 
-    This function initializes LJ datasets for train and check matrices within a molecular ensemble. It processes the train and check matrices by merging them with reference matrices, assigning 1-4 interactions, setting default c12 values, and updating specialized cases.
+    This function initializes LJ datasets for train and check matrices within a molecular ensemble.
+    It processes the train and check matrices by merging them with reference matrices, assigning
+    1-4 interactions, setting default c12 values, and updating specialized cases.
 
-    The function generates DataFrames 'train_dataset' and 'check_dataset' containing LJ datasets for the train and check matrices, respectively. It performs various operations, such as flagging 1-4 interactions, setting correct default c12 values, and updating values for special cases based on atom types and interactions.
+    The function generates DataFrames 'train_dataset' and 'check_dataset' containing LJ datasets
+    for the train and check matrices, respectively. It performs various operations, such as flagging
+    1-4 interactions, setting correct default c12 values, and updating values for special cases based
+    on atom types and interactions.
 
     Note:
     - The 'meGO_ensemble' dictionary is expected to contain necessary details regarding the molecular ensemble.
-    - The 'pairs14' DataFrame contains information about 1-4 interactions, and 'exclusion_bonds14' DataFrame contains exclusion bonded interactions.
+    - The 'pairs14' DataFrame contains information about 1-4 interactions, and 'exclusion_bonds14' DataFrame
+      contains exclusion bonded interactions.
     - The returned DataFrames provide comprehensive LJ datasets for further analysis or processing within the ensemble.
     """
-    # we cycle over train matrices to pair them with reference matrices and then we add 1-4 assignments and defaults c12s and concatenate everything
+    # we cycle over train matrices to pair them with reference matrices and
+    # then we add 1-4 assignments and defaults c12s and concatenate everything
     train_dataset = pd.DataFrame()
     for name, ref_name in meGO_ensemble["train_matrix_tuples"]:
         # sysname_train_from_intramat_1_1 <-> sysname_reference_intramat_1_1
@@ -701,7 +717,8 @@ def init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14):
     )
     train_dataset["rep"] = train_dataset["rep"].fillna(pd.Series(pairwise_c12))
 
-    # we cycle over check matrices to pair them with reference matrices and then we add 1-4 assignments and defaults c12s and concatenate everything
+    # we cycle over check matrices to pair them with reference matrices
+    # and then we add 1-4 assignments and defaults c12s and concatenate everything
     check_dataset = pd.DataFrame()
     for name, ref_name in meGO_ensemble["check_matrix_tuples"]:
         # sysname_check_from_intramat_1_1 <-> sysname_reference_intramat_1_1
@@ -730,7 +747,7 @@ def init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14):
             "1-4",
         ] = "0"
         check_dataset["1-4"] = check_dataset["1-4"].fillna("1>4")
-        # This is to set the correct default C12 values taking into account specialised 1-4 values (including the special 1-5 O-O)
+        # This is to set the correct default C12 values taking into account specialised 1-4 values
         check_dataset = pd.merge(
             check_dataset,
             pairs14[["ai", "aj", "same_chain", "rep"]],
@@ -774,11 +791,13 @@ def generate_basic_LJ(meGO_ensemble):
     Returns:
     - basic_LJ (DataFrame): DataFrame containing basic LJ interactions.
 
-    This function generates a DataFrame 'basic_LJ' containing basic LJ interactions within a molecular ensemble. It calculates LJ interactions based on atom types, molecules, and reference matrices present in the ensemble.
+    This function generates a DataFrame 'basic_LJ' containing basic LJ interactions within a molecular ensemble.
+    It calculates LJ interactions based on atom types, molecules, and reference matrices present in the ensemble.
 
     Note:
     - The 'meGO_ensemble' dictionary is expected to contain necessary details regarding the molecular ensemble.
-    - The returned DataFrame 'basic_LJ' includes columns defining LJ interaction properties such as atom indices, types, c6, c12, sigma, epsilon, probability, rc_probability, molecule names, source, and thresholds.
+    - The returned DataFrame 'basic_LJ' includes columns defining LJ interaction properties such as atom indices,
+      types, c6, c12, sigma, epsilon, probability, rc_probability, molecule names, source, and thresholds.
     - The generated DataFrame provides basic LJ interactions for further analysis or processing within the ensemble.
     """
     columns = [

diff --git a/src/multiego/io.py b/src/multiego/io.py
@@ -16,7 +16,8 @@ def read_molecular_contacts(path):
     Returns
     -------
     contact_matrix : pd.DataFrame
-        The content of the intra-/intermat file returned as a dataframe with columns ['molecule_number_ai', 'ai', 'molecule_number_aj', 'aj', 'distance', 'probability', 'cutoff']
+        The content of the intra-/intermat file returned as a dataframe with columns
+        ['molecule_number_ai', 'ai', 'molecule_number_aj', 'aj', 'distance', 'probability', 'cutoff']
     """
 
     print("\t-", f"Reading {path}")
@@ -119,14 +120,16 @@ def write_model(meGO_ensemble, meGO_LJ_potential, meGO_LJ_14, parameters, output
         - LJ parameterization completed with a total of {len(meGO_LJ_potential)} contacts.
         - Attractive: {len(meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.])}
         - Repulsive: {len(meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']<0.])}
-        - The average epsilon is {meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].mean():{5}.{3}}
-        - Epsilon range is [min:max] [{meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['epsilon'].max():{5}.{3}}]
-        - Sigma range is [min:max] [{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{5}.{3}}]
-        - Suggested rlist and cut-off at {1.1*2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} and {2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} nm
+        - The average epsilon is: {meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].mean():{5}.{3}} kJ/mol
+        - Epsilon range is: [{meGO_LJ_potential['epsilon'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['epsilon'].max():{5}.{3}}] kJ/mol
+        - Sigma range is: [{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].min():{5}.{3}}:{meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{5}.{3}}] nm
+
+        RELEVANT MDP PARAMETERS:
+        - Suggested rlist value: {1.1*2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} nm
+        - Suggested cut-off value: {2.5*meGO_LJ_potential['sigma'].loc[meGO_LJ_potential['epsilon']>0.].max():{4}.{3}} nm
         """
         )
-    print(f"\nAnd it can be found in the following folder:\n{output_dir}")
-    print("\nNessuno è più basito, nessuno è più sorpreso. Ognuno di voi ha capito tutto.\nCarlo is happy!\t\^o^/\n")
+    print(f"\n- And it can be found in the following folder:\n{output_dir}")
 
 
 def dataframe_to_write(df):
@@ -155,10 +158,11 @@ def make_header(parameters):
     now = time.strftime("%d-%m-%Y %H:%M", time.localtime())
 
     header = f"""
-; Multi-eGO force field version alpha2
+; Multi-eGO force field version beta.1
 ; https://github.com/multi-ego/multi-eGO
 ; Please read and cite:
-; Scalone, E. et al. PNAS 119, e2203181119 (2022)
+; Scalone, E. et al. PNAS 119, e2203181119 (2022) 10.1073/pnas.2203181119
+; Bacic Toplek, F., Scalone, E. et al. ChemRxiv (2023) 10.26434/chemrxiv-2023-67255-v2
 ; Created on the {now}
 ; With the following parameters:
 """

diff --git a/tools/face_generator/generate_face.py b/tools/face_generator/generate_face.py
@@ -17,7 +17,8 @@
 ]
 
 
-def print_goodbye():
+def print_wellcome():
+    print("")
     print("-----------------------------===========+===+++++++++++++********#########################")
     print("-----------------------------=========++++**##########**********##########################")
     print("-----------------------------=-==+**#%%%%####*****#####%%%@%#########%%%%%################")
@@ -78,15 +79,12 @@ def print_goodbye():
     print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@%%%@@@@@@@@@@@@@@@@@#+*%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
     print("")
     print("")
+
+
+def print_goodbye():
     print("")
-    print("----------------------------------------------------")
-    print("")
-    print(":-)  LUIGI says goodbye  (-:")
     print("")
     print(aforismi[random.choice(range(len(aforismi)))])
+    print("Luigi Pirandello, Uno, nessuno, centomila (1926)")
     print()
     print("")
-    print("----------------------------------------------------")
-    print("")
-    print("")
-    print("")