-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch '103-support-txt-ending-form-mass-bank-format' into 'mas…
…ter' Resolve "Support .txt ending form mass bank format" Closes #103 See merge request bright-giant/sirius/sirius-libs!67
- Loading branch information
Showing
12 changed files
with
185 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
31 changes: 31 additions & 0 deletions
31
io/src/main/java/de/unijena/bioinf/babelms/txt/TxtExperimentParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package de.unijena.bioinf.babelms.txt; | ||
|
||
import de.unijena.bioinf.ChemistryBase.ms.Ms2Experiment; | ||
import de.unijena.bioinf.ChemistryBase.utils.FileUtils; | ||
import de.unijena.bioinf.babelms.Parser; | ||
import de.unijena.bioinf.babelms.massbank.MassbankExperimentParser; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.net.URI; | ||
|
||
public class TxtExperimentParser implements Parser<Ms2Experiment> { | ||
|
||
MassbankExperimentParser delegate = new MassbankExperimentParser(); | ||
|
||
@Override | ||
public Ms2Experiment parse(BufferedReader reader, URI source) throws IOException { | ||
try { | ||
return delegate.parse(reader, source); | ||
} catch (Exception e) { | ||
throw new RuntimeException("Could not parse MassBank .txt file", e); | ||
} | ||
} | ||
|
||
@Override | ||
public Ms2Experiment parse(InputStream inputStream, URI source) throws IOException { | ||
return parse(FileUtils.ensureBuffering(new InputStreamReader(inputStream)), source); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
50 changes: 50 additions & 0 deletions
50
io/src/test/java/de/unijena/bioinf/babelms/txt/TxtExperimentParserTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package de.unijena.bioinf.babelms.txt; | ||
|
||
import de.unijena.bioinf.ChemistryBase.ms.Ms2Experiment; | ||
import de.unijena.bioinf.babelms.ParserTestUtils; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.net.URI; | ||
|
||
import static org.junit.jupiter.api.Assertions.*; | ||
|
||
public class TxtExperimentParserTest { | ||
|
||
@Test | ||
public void testParseValidFile() throws IOException { | ||
File input = ParserTestUtils.getTestFile("massbank/MSBNK-UFZ-UP000040.txt"); | ||
URI uri = input.toURI(); | ||
TxtExperimentParser parser = new TxtExperimentParser(); | ||
try (FileInputStream stream = new FileInputStream(input)) { | ||
Ms2Experiment firstCall = parser.parse(stream, uri); | ||
assertNotNull(firstCall); | ||
assertEquals(2, firstCall.getMs2Spectra().get(0).size()); | ||
|
||
Ms2Experiment secondCall = parser.parse(stream, uri); | ||
assertNull(secondCall); | ||
} | ||
} | ||
|
||
@Test | ||
public void testParseInvalidFile() throws IOException { | ||
File input = ParserTestUtils.getTestFile("massbank/invalid.txt"); | ||
URI uri = input.toURI(); | ||
TxtExperimentParser parser = new TxtExperimentParser(); | ||
try (FileInputStream stream = new FileInputStream(input)) { | ||
assertThrows(RuntimeException.class, () -> parser.parse(stream, uri)); | ||
} | ||
} | ||
|
||
@Test | ||
public void testParseEmptyFile() throws IOException { | ||
File input = ParserTestUtils.getTestFile("massbank/empty.txt"); | ||
URI uri = input.toURI(); | ||
TxtExperimentParser parser = new TxtExperimentParser(); | ||
try (FileInputStream stream = new FileInputStream(input)) { | ||
assertNull(parser.parse(stream, uri)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
ACCESSION: MSBNK-UFZ-UP000040 | ||
RECORD_TITLE: Triethylene glycol monomethyl ether; LC-ESI-ITFT; MS2; CE: 25%; R=15000; [M+H]+ | ||
DATE: 2019.07.31 | ||
AUTHORS: Liza-Marie Beckers, Werner Brack, Janek-Paul Dann, Martin Krauss, Erik Mueller, Tobias Schulze, Helmholtz Centre for Environmental Research GmbH - UFZ, Leipzig, Germany | ||
LICENSE: CC0 | ||
COPYRIGHT: Copyright (C) 2019 | ||
PUBLICATION: Beckers L-M, Brack W, Dann JP, Krauss M, Mueller E, Schulze T, 2020. Unraveling longitudinal pollution patterns of organic micropollutants in a river by non-target screening and cluster analysis. Science of The Total Environment, https://doi.org/10.1016/j.scitotenv.2020.138388 | ||
COMMENT: CONFIDENCE Reference Standard (Level 1) | ||
COMMENT: INTERNAL_ID 5 | ||
CH$NAME: Triethylene glycol monomethyl ether | ||
CH$NAME: 2-[2-(2-methoxyethoxy)ethoxy]ethanol | ||
CH$COMPOUND_CLASS: N/A; Environmental Standard | ||
CH$FORMULA: C7H16O4 | ||
CH$EXACT_MASS: 164.1049 | ||
CH$SMILES: COCCOCCOCCO | ||
CH$IUPAC: InChI=1S/C7H16O4/c1-9-4-5-11-7-6-10-3-2-8/h8H,2-7H2,1H3 | ||
CH$LINK: CAS 112-35-6 | ||
CH$LINK: CHEBI 84233 | ||
CH$LINK: PUBCHEM CID:8178 | ||
CH$LINK: INCHIKEY JLGLQAWTXXGVEM-UHFFFAOYSA-N | ||
CH$LINK: CHEMSPIDER 7886 | ||
AC$INSTRUMENT: LTQ Orbitrap XL Thermo Scientific | ||
AC$INSTRUMENT_TYPE: LC-ESI-ITFT | ||
AC$MASS_SPECTROMETRY: MS_TYPE MS2 | ||
AC$MASS_SPECTROMETRY: ION_MODE POSITIVE | ||
AC$MASS_SPECTROMETRY: IONIZATION ESI | ||
AC$MASS_SPECTROMETRY: FRAGMENTATION_MODE HCD | ||
AC$MASS_SPECTROMETRY: COLLISION_ENERGY 25 % (nominal) | ||
AC$MASS_SPECTROMETRY: RESOLUTION 15000 | ||
AC$CHROMATOGRAPHY: COLUMN_NAME Kinetex Evo C18 2.6 um 50 x 2.1 mm | ||
AC$CHROMATOGRAPHY: FLOW_GRADIENT 95/5 at 0 min, 95/5 at 1 min, 0/100 at 13 min, 0/100 at 24 min, 95/5 at 24.3 min, 95/5/0 at 32 min | ||
AC$CHROMATOGRAPHY: FLOW_RATE 300 uL/min | ||
AC$CHROMATOGRAPHY: RETENTION_TIME 1.410 min | ||
AC$CHROMATOGRAPHY: SOLVENT A water with 0.1% formic acid | ||
AC$CHROMATOGRAPHY: SOLVENT B methanol with 0.1% formic acid | ||
MS$FOCUSED_ION: BASE_PEAK 165.1119 | ||
MS$FOCUSED_ION: PRECURSOR_M/Z 165.1121 | ||
MS$FOCUSED_ION: PRECURSOR_TYPE [M+H]+ | ||
MS$DATA_PROCESSING: RECALIBRATE loess on assigned fragments and MS1 | ||
MS$DATA_PROCESSING: REANALYZE Peaks with additional N2/O included | ||
MS$DATA_PROCESSING: WHOLE RMassBank 2.12.0 | ||
PK$SPLASH: splash10-0zfr-5900000000-a3f9bbebe85d609d644c | ||
PK$ANNOTATION: m/z tentative_formula formula_count mass error(ppm) | ||
59.049 C3H7O+ 1 59.0491 -3.05 | ||
103.0753 C5H11O2+ 1 103.0754 -0.48 | ||
PK$NUM_PEAK: 2 | ||
PK$PEAK: m/z int. rel.int. | ||
59.049 2330.9 661 | ||
103.0753 3518.3 999 | ||
// |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Not a massbank txt file | ||
Should break parsing |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters