diff --git a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java
index ecad9aac..aea61408 100644
--- a/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java
+++ b/src/main/java/de/unijena/cheminf/mortar/model/io/Importer.java
@@ -421,8 +421,10 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
//
//
/**
- * Imports a SMILES file. This method is able to parse different types of SMILES files, e.g. with and without header
- * or with only one column or two (SMILES and name/ID, which is in which column is detected).
+ * Imports a SMILES file. This method is able to parse differently formatted SMILES files, e.g. with and without
+ * header or with one or two columns (SMILES and name/ID). If no name can be detected for a structure, the structure
+ * is assigned the name of the file extended with the index of the structure in the file as name.
+ *
* Protected and not private for testing in class ImporterTest.
*
* @param aFile a SMILES codes-containing *.txt or *.smi file
@@ -433,22 +435,23 @@ private void preprocessMoleculeSet(IAtomContainerSet aMoleculeSet) throws NullPo
protected IAtomContainerSet importSMILESFile(File aFile) throws IOException {
try (
FileReader tmpSmilesFileReader = new FileReader(aFile);
- BufferedReader tmpSmilesFileBufferedReader = new BufferedReader(tmpSmilesFileReader, BasicDefinitions.BUFFER_SIZE)
+ BufferedReader tmpSmilesFileBufferedReader = new BufferedReader(tmpSmilesFileReader,
+ BasicDefinitions.BUFFER_SIZE)
) {
IAtomContainerSet tmpAtomContainerSet = new AtomContainerSet();
- //AtomContainer to save the parsed SMILES in
+ // AtomContainer to save the parsed SMILES in
IAtomContainer tmpMolecule = new AtomContainer();
SmilesParser tmpSmilesParser = new SmilesParser(SilentChemObjectBuilder.getInstance());
String tmpSmilesFileNextLine = "";
String tmpSmilesFileDeterminedSeparator = "";
- String[] tmpProcessedLineArray;
+ String[] tmpProcessedLineArray = null;
int tmpSmilesCodeExpectedPosition = 0;
int tmpIDExpectedPosition = 0;
int tmpSmilesFileParsableLinesCounter = 0;
int tmpSmilesFileInvalidLinesCounter = 0;
- //marking the BufferedReader to reset the reader after checking the format and determining the separator
+ // marking the BufferedReader to reset the reader after checking the format and determining the separator
tmpSmilesFileBufferedReader.mark(BasicDefinitions.BUFFER_SIZE);
- //as potential headline the first line should be avoided for separator determination
+ // as potential headline the first line should be avoided for separator determination
String tmpSmilesFileFirstLine = tmpSmilesFileBufferedReader.readLine();
/* first block
Checking for parsable SMILES code and saving the determined separator (if one is used).
@@ -459,7 +462,7 @@ Checking for parsable SMILES code and saving the determined separator (if one is
findSeparatorLoop:
while (!Thread.currentThread().isInterrupted() && tmpFilesLine <= 3) {
if ((tmpSmilesFileNextLine = tmpSmilesFileBufferedReader.readLine()) == null) {
- //if the file's end is reached at this point, the first line is used to determine the separator
+ // if the file's end is reached at this point, the first line is used to determine the separator
if (tmpSmilesFileFirstLine != null || !tmpSmilesFileFirstLine.isEmpty()) {
tmpSmilesFileNextLine = tmpSmilesFileFirstLine;
tmpSmilesFileFirstLine = null;
@@ -468,7 +471,8 @@ Checking for parsable SMILES code and saving the determined separator (if one is
}
}
for (String tmpSeparator : BasicDefinitions.POSSIBLE_SMILES_FILE_SEPARATORS) {
- //maximum of two array elements expected, otherwise the separator or the line itself are assumed to be invalid
+ // maximum of two array elements expected, otherwise the separator or the line itself are assumed
+ // to be invalid
tmpProcessedLineArray = tmpSmilesFileNextLine.split(tmpSeparator, 3);
if (tmpProcessedLineArray.length > 2) {
continue;
@@ -486,9 +490,8 @@ Checking for parsable SMILES code and saving the determined separator (if one is
if (tmpProcessedLineArray.length > 1) {
if (tmpSmilesCodeExpectedPosition == 0) {
tmpIDExpectedPosition = 1;
- } else {
- tmpIDExpectedPosition = 0;
}
+ // else: tmpIDExpectedPosition = 0;
}
break findSeparatorLoop;
}
@@ -506,20 +509,24 @@ Checking for parsable SMILES code and saving the determined separator (if one is
tmpSmilesFileBufferedReader.reset();
tmpSmilesFileBufferedReader.mark(0); //to avoid the memory of unnecessary data
/* second block
- Reading the file line by line and adding an AtomContainer to the AtomContainerSet for each line with parsable SMILES code
+ Reading the file line by line and adding an AtomContainer to the AtomContainerSet for each line with
+ parsable SMILES code
*/
- while (!Thread.currentThread().isInterrupted() && (tmpSmilesFileNextLine = tmpSmilesFileBufferedReader.readLine()) != null) {
+ while (!Thread.currentThread().isInterrupted()
+ && (tmpSmilesFileNextLine = tmpSmilesFileBufferedReader.readLine()) != null) {
//trying to parse as SMILES code
try {
tmpProcessedLineArray = tmpSmilesFileNextLine.split(tmpSmilesFileDeterminedSeparator, 2);
- if (!tmpProcessedLineArray[tmpSmilesCodeExpectedPosition].isEmpty()) {
- tmpMolecule = tmpSmilesParser.parseSmiles(tmpProcessedLineArray[tmpSmilesCodeExpectedPosition]);
- tmpSmilesFileParsableLinesCounter++;
- } else {
- tmpSmilesFileInvalidLinesCounter++;
- continue;
- }
- } catch (InvalidSmilesException | IndexOutOfBoundsException anException) { //case: invalid line or SMILES code
+ String tmpSmiles = tmpProcessedLineArray[tmpSmilesCodeExpectedPosition].isBlank() ? null :
+ tmpProcessedLineArray[tmpSmilesCodeExpectedPosition];
+ //throws exception if SMILES string is null, goes to catch block
+ tmpMolecule = tmpSmilesParser.parseSmiles(tmpSmiles);
+ tmpSmilesFileParsableLinesCounter++;
+ }
+ catch (InvalidSmilesException | IndexOutOfBoundsException | NullPointerException anException) {
+ int tmpIndexInFile = tmpSmilesFileParsableLinesCounter + tmpSmilesFileInvalidLinesCounter;
+ Importer.LOGGER.info("Contains no parsable SMILES string: line " + tmpIndexInFile
+ + " (index).");
tmpSmilesFileInvalidLinesCounter++;
continue;
}
@@ -528,14 +535,17 @@ Checking for parsable SMILES code and saving the determined separator (if one is
if (tmpProcessedLineArray.length > 1 && !tmpProcessedLineArray[tmpIDExpectedPosition].isEmpty()) {
tmpName = tmpProcessedLineArray[tmpIDExpectedPosition];
} else {
- tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpSmilesFileParsableLinesCounter;
+ int tmpIndexInFile = tmpSmilesFileParsableLinesCounter + tmpSmilesFileInvalidLinesCounter - 1;
+ tmpName = FileUtil.getFileNameWithoutExtension(aFile) + tmpIndexInFile;
}
tmpMolecule.setProperty(Importer.MOLECULE_NAME_PROPERTY_KEY, tmpName);
//adding tmpMolecule to the AtomContainerSet
tmpAtomContainerSet.addAtomContainer(tmpMolecule);
}
- Importer.LOGGER.log(Level.INFO, "\tSmilesFile ParsableLinesCounter:\t" + tmpSmilesFileParsableLinesCounter +
- "\n\tSmilesFile InvalidLinesCounter:\t\t" + tmpSmilesFileInvalidLinesCounter);
+ if (tmpSmilesFileInvalidLinesCounter > 0) {
+ Importer.LOGGER.info("\tSmilesFile ParsableLinesCount:\t" + tmpSmilesFileParsableLinesCounter +
+ "\n\tSmilesFile InvalidLinesCount:\t" + tmpSmilesFileInvalidLinesCounter);
+ }
return tmpAtomContainerSet;
}
}
diff --git a/src/main/java/de/unijena/cheminf/mortar/model/util/BasicDefinitions.java b/src/main/java/de/unijena/cheminf/mortar/model/util/BasicDefinitions.java
index 3a723a58..f310dce4 100644
--- a/src/main/java/de/unijena/cheminf/mortar/model/util/BasicDefinitions.java
+++ b/src/main/java/de/unijena/cheminf/mortar/model/util/BasicDefinitions.java
@@ -89,7 +89,7 @@ private BasicDefinitions() {
/**
* Possible SMILES file separators used to separate SMILES code from ID
*/
- public static final String[] POSSIBLE_SMILES_FILE_SEPARATORS = {"\t", ";", ",", " "};
+ public static final String[] POSSIBLE_SMILES_FILE_SEPARATORS = {"|", "\t", ";", ",", " "};
//
//
//