Skip to content

Commit

Permalink
enable -L to read Feature Files from Paths (#4854)
Browse files Browse the repository at this point in the history
* expand -L support for Feature Files to work with Paths
* previously interval files could be read from Paths, but not feature
files like vcf and bed
* fixes #4852
  • Loading branch information
lbergelson authored Jun 7, 2018
1 parent 987f52c commit f4225b8
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,8 @@ private <T extends Feature> FeatureDataSource<T> lookupDataSource( final Feature
* @param file file to check
* @return True if the file exists and contains Features (ie., we have a FeatureCodec that can decode it), otherwise false
*/
public static boolean isFeatureFile( final File file ) {
return file.exists() && ! getCandidateCodecsForFile(file.toPath()).isEmpty();
public static boolean isFeatureFile( final Path file ) {
return Files.exists(file) && ! getCandidateCodecsForFile(file).isEmpty();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ public static List<GenomeLoc> parseIntervalArguments(final GenomeLocParser parse
"interval or an interval file instead.");
}
// If it's a Feature-containing file, convert it to a list of intervals
else if ( FeatureManager.isFeatureFile(new File(arg)) ) {
else if ( FeatureManager.isFeatureFile(IOUtils.getPath(arg)) ) {
rawIntervals.addAll(featureFileToIntervals(parser, arg));
}
// If it's an interval file, add its contents to the raw interval list
Expand Down Expand Up @@ -307,12 +307,11 @@ else if ( new File(arg).exists() ) {
* Converts a Feature-containing file to a list of intervals
*
* @param parser GenomeLocParser for creating intervals
* @param featureFileName file containing Features to convert to intervals
* @param featureFile file containing Features to convert to intervals
* @return a List of intervals corresponding to the locations of the Features in the provided file
* @throws UserException.CouldNotReadInputFile if the provided file is not in a supported Feature file format
*/
public static List<GenomeLoc> featureFileToIntervals( final GenomeLocParser parser, final String featureFileName ) {
final File featureFile = new File(featureFileName);
public static List<GenomeLoc> featureFileToIntervals( final GenomeLocParser parser, final String featureFile ) {

try ( final FeatureDataSource<? extends Feature> dataSource = new FeatureDataSource<>(featureFile) ) {
final List<GenomeLoc> featureIntervals = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public Object[][] getIsFeatureFileTestData() {

@Test(dataProvider = "IsFeatureFileTestData")
public void testIsFeatureFile( final File file, final boolean expectedIsFeatureFile ) {
Assert.assertEquals(FeatureManager.isFeatureFile(file), expectedIsFeatureFile, "isFeatureFile() returned incorrect result for file " + file.getAbsolutePath());
Assert.assertEquals(FeatureManager.isFeatureFile(file.toPath()), expectedIsFeatureFile, "isFeatureFile() returned incorrect result for file " + file.getAbsolutePath());
}

@CommandLineProgramProperties(summary = "", oneLineSummary = "", programGroup = TestProgramGroup.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.jimfs.Configuration;
import com.google.common.jimfs.Jimfs;
import htsjdk.samtools.QueryInterval;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
Expand All @@ -28,6 +30,9 @@
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -1245,10 +1250,21 @@ public Object[][] loadIntervalsFromFeatureFileData() {

@Test(dataProvider = "loadIntervalsFromFeatureFileData")
public void testLoadIntervalsFromFeatureFile( final File featureFile, final List<GenomeLoc> expectedIntervals ) {
final GenomeLocSortedSet actualIntervals = IntervalUtils.loadIntervals(Arrays.asList(featureFile.getAbsolutePath()), IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, hg19GenomeLocParser);
final GenomeLocSortedSet actualIntervals = IntervalUtils.loadIntervals(Collections.singletonList(featureFile.getAbsolutePath()), IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, hg19GenomeLocParser);
Assert.assertEquals(actualIntervals, expectedIntervals, "Wrong intervals loaded from Feature file " + featureFile.getAbsolutePath());
}

@Test(dataProvider = "loadIntervalsFromFeatureFileData")
public void testLoadIntervalsFromFeatureFileInJimfs( final File featureFile, final List<GenomeLoc> expectedIntervals ) throws IOException {
try(final FileSystem fs = Jimfs.newFileSystem(Configuration.unix())){
final Path jimfsRootPath = fs.getRootDirectories().iterator().next();
final Path jimfsCopy = Files.copy(featureFile.toPath(), jimfsRootPath.resolve(featureFile.getName()));
final String jimfsPathString = jimfsCopy.toAbsolutePath().toUri().toString();
final GenomeLocSortedSet actualIntervals = IntervalUtils.loadIntervals(Collections.singletonList(jimfsPathString), IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, hg19GenomeLocParser);
Assert.assertEquals(actualIntervals, expectedIntervals, "Wrong intervals loaded from Feature file " + jimfsPathString);
}
}

// Note: because the file does not exist and all characters are allowed in contig names,
// we will not know that this is supposed to be interpreted as a file.
// So we'll blow up with MalformedGenomeLoc and not anything related to files
Expand Down

0 comments on commit f4225b8

Please sign in to comment.