FelixBaensch · JonasSchaub · Feb 22, 2024 · Aug 10, 2023 · Aug 10, 2023 · Aug 11, 2023
diff --git a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java
@@ -390,8 +390,10 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
     //
     //<editor-fold desc="protected methods" defaultstate="collapsed">
     /**
-     * Imports a SMILES file. This method is able to parse different types of SMILES files, e.g. with and without header
-     * or with only one column or two (SMILES and name/ID, which is in which column is detected).
+     * Imports a SMILES file. This method is able to parse differently formatted SMILES files, e.g. with and without
+     * header or with one or two columns (SMILES and name/ID). If no name can be detected for a structure, the structure
+     * is assigned the name of the file extended with the index of the structure in the file as name.
+     * <br>
      * Protected and not private for testing in class ImporterTest.
      *
      * @param aFile a SMILES codes-containing *.txt or *.smi file
@@ -402,22 +404,23 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
     protected IAtomContainerSet importSMILESFile(File aFile) throws IOException {
         try (
                 FileReader tmpSmilesFileReader = new FileReader(aFile);
-                BufferedReader tmpSmilesFileBufferedReader = new BufferedReader(tmpSmilesFileReader, BasicDefinitions.BUFFER_SIZE)
+                BufferedReader tmpSmilesFileBufferedReader = new BufferedReader(tmpSmilesFileReader,
+                        BasicDefinitions.BUFFER_SIZE)
         ) {
             IAtomContainerSet tmpAtomContainerSet = new AtomContainerSet();
-            //AtomContainer to save the parsed SMILES in
+            // AtomContainer to save the parsed SMILES in
             IAtomContainer tmpMolecule = new AtomContainer();
             SmilesParser tmpSmilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance());
             String tmpSmilesFileNextLine = "";
             String tmpSmilesFileDeterminedSeparator = "";
-            String[] tmpProcessedLineArray;
+            String[] tmpProcessedLineArray = null;
             int tmpSmilesCodeExpectedPosition = 0;
             int tmpIDExpectedPosition = 0;
             int tmpSmilesFileParsableLinesCounter = 0;
             int tmpSmilesFileInvalidLinesCounter = 0;
-            //marking the BufferedReader to reset the reader after checking the format and determining the separator
+            // marking the BufferedReader to reset the reader after checking the format and determining the separator
             tmpSmilesFileBufferedReader.mark(BasicDefinitions.BUFFER_SIZE);
-            //as potential headline the first line should be avoided for separator determination
+            // as potential headline the first line should be avoided for separator determination
             String tmpSmilesFileFirstLine = tmpSmilesFileBufferedReader.readLine();
             /*  first block
                 Checking for parsable SMILES code and saving the determined separator (if one is used).
@@ -428,7 +431,7 @@ Checking for parsable SMILES code and saving the determined separator (if one is
             findSeparatorLoop:
             while (!Thread.currentThread().isInterrupted() && tmpFilesLine <= 3) {
                 if ((tmpSmilesFileNextLine = tmpSmilesFileBufferedReader.readLine()) == null) {
-                    //if the file's end is reached at this point, the first line is used to determine the separator
+                    // if the file's end is reached at this point, the first line is used to determine the separator
                     if (tmpSmilesFileFirstLine != null || !tmpSmilesFileFirstLine.isEmpty()) {
                         tmpSmilesFileNextLine = tmpSmilesFileFirstLine;
                         tmpSmilesFileFirstLine = null;
@@ -437,7 +440,8 @@ Checking for parsable SMILES code and saving the determined separator (if one is
                     }
                 }
                 for (String tmpSeparator : BasicDefinitions.POSSIBLE_SMILES_FILE_SEPARATORS) {
-                    //maximum of two array elements expected, otherwise the separator or the line itself are assumed to be invalid
+                    // maximum of two array elements expected, otherwise the separator or the line itself are assumed
+                    // to be invalid
                     tmpProcessedLineArray = tmpSmilesFileNextLine.split(tmpSeparator, 3);
                     if (tmpProcessedLineArray.length > 2) {
                         continue;
@@ -455,9 +459,8 @@ Checking for parsable SMILES code and saving the determined separator (if one is
                                 if (tmpProcessedLineArray.length > 1) {
                                     if (tmpSmilesCodeExpectedPosition == 0) {
                                         tmpIDExpectedPosition = 1;
-                                    } else {
-                                        tmpIDExpectedPosition = 0;
                                     }
+                                    // else: tmpIDExpectedPosition = 0;
                                 }
                                 break findSeparatorLoop;
                             }
@@ -475,20 +478,30 @@ Checking for parsable SMILES code and saving the determined separator (if one is
             tmpSmilesFileBufferedReader.reset();
             tmpSmilesFileBufferedReader.mark(0);    //to avoid the memory of unnecessary data
             /*  second block
-                Reading the file line by line and adding an AtomContainer to the AtomContainerSet for each line with parsable SMILES code
+                Reading the file line by line and adding an AtomContainer to the AtomContainerSet for each line with
+                parsable SMILES code
              */
-            while (!Thread.currentThread().isInterrupted() && (tmpSmilesFileNextLine = tmpSmilesFileBufferedReader.readLine()) != null) {
+            while (!Thread.currentThread().isInterrupted()
+                    && (tmpSmilesFileNextLine = tmpSmilesFileBufferedReader.readLine()) != null) {
                 //trying to parse as SMILES code
+                boolean tmpContainsParsableSmilesCode;
                 try {
                     tmpProcessedLineArray = tmpSmilesFileNextLine.split(tmpSmilesFileDeterminedSeparator, 2);
                     if (!tmpProcessedLineArray[tmpSmilesCodeExpectedPosition].isEmpty()) {
                         tmpMolecule = tmpSmilesParser.parseSmiles(tmpProcessedLineArray[tmpSmilesCodeExpectedPosition]);
+                        tmpContainsParsableSmilesCode = true;
                         tmpSmilesFileParsableLinesCounter++;
                     } else {
-                        tmpSmilesFileInvalidLinesCounter++;
-                        continue;
+                        tmpContainsParsableSmilesCode = false;
                     }
-                } catch (InvalidSmilesException | IndexOutOfBoundsException anException) {  //case: invalid line or SMILES code
+                } catch (InvalidSmilesException | IndexOutOfBoundsException anException) {
+                    //case: invalid line or SMILES code
+                    tmpContainsParsableSmilesCode = false;
+                }
+                if (!tmpContainsParsableSmilesCode) {
+                    int tmpIndexInFile = tmpSmilesFileParsableLinesCounter + tmpSmilesFileInvalidLinesCounter;
+                    Importer.LOGGER.info("Contains no parsable SMILES string: line " + tmpIndexInFile
+                            + " (index).");
                     tmpSmilesFileInvalidLinesCounter++;
                     continue;
                 }
@@ -497,14 +510,17 @@ Checking for parsable SMILES code and saving the determined separator (if one is
                 if (tmpProcessedLineArray.length > 1 && !tmpProcessedLineArray[tmpIDExpectedPosition].isEmpty()) {
                     tmpName = tmpProcessedLineArray[tmpIDExpectedPosition];
                 } else {
-                    tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpSmilesFileParsableLinesCounter;
+                    int tmpIndexInFile = tmpSmilesFileParsableLinesCounter + tmpSmilesFileInvalidLinesCounter - 1;
+                    tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpIndexInFile;
                 }
                 tmpMolecule.setProperty(Importer.MOLECULE_NAME_PROPERTY_KEY, tmpName);
                 //adding tmpMolecule to the AtomContainerSet
                 tmpAtomContainerSet.addAtomContainer(tmpMolecule);
             }
-            Importer.LOGGER.log(Level.INFO, "\tSmilesFile ParsableLinesCounter:\t" + tmpSmilesFileParsableLinesCounter +
-                    "\n\tSmilesFile InvalidLinesCounter:\t\t" + tmpSmilesFileInvalidLinesCounter);
+            if (tmpSmilesFileInvalidLinesCounter > 0) {
+                Importer.LOGGER.info("\tSmilesFile ParsableLinesCount:\t" + tmpSmilesFileParsableLinesCounter +
+                        "\n\tSmilesFile InvalidLinesCount:\t" + tmpSmilesFileInvalidLinesCounter);
+            }
             return tmpAtomContainerSet;
         }
     }

diff --git a/src/main/java/de/unijena/cheminf/mortar/model/util/BasicDefinitions.java b/src/main/java/de/unijena/cheminf/mortar/model/util/BasicDefinitions.java
@@ -75,7 +75,7 @@ public final class BasicDefinitions {
     /**
      * Possible SMILES file separators used to separate SMILES code from ID
      */
-    public static final String[] POSSIBLE_SMILES_FILE_SEPARATORS = {"\t", ";", ",", " "};
+    public static final String[] POSSIBLE_SMILES_FILE_SEPARATORS = {"|", "\t", ";", ",", " "};
     //</editor-fold>
     //
     // <editor-fold defaultstate="collapsed" desc="Buffer">