1
1
package org .broadinstitute .hellbender .engine ;
2
2
3
+ import htsjdk .beta .io .bundle .Bundle ;
4
+ import htsjdk .beta .io .bundle .BundleJSON ;
5
+ import htsjdk .beta .io .bundle .BundleResource ;
6
+ import htsjdk .beta .io .bundle .BundleResourceType ;
7
+ import htsjdk .io .IOPath ;
3
8
import htsjdk .samtools .SAMSequenceDictionary ;
4
9
import htsjdk .samtools .util .IOUtil ;
5
10
import htsjdk .samtools .util .Locatable ;
@@ -148,7 +153,7 @@ public FeatureDataSource(final File featureFile) {
148
153
* generated name, and will look ahead the default number of bases ({@link #DEFAULT_QUERY_LOOKAHEAD_BASES})
149
154
* during queries that produce cache misses.
150
155
*
151
- * @param featurePath path or URI to source of Features
156
+ * @param featurePath path or URI to source of Features (may be a Bundle)
152
157
*/
153
158
public FeatureDataSource (final String featurePath ) {
154
159
this (featurePath , null , DEFAULT_QUERY_LOOKAHEAD_BASES , null );
@@ -159,7 +164,7 @@ public FeatureDataSource(final String featurePath) {
159
164
* name. We will look ahead the default number of bases ({@link #DEFAULT_QUERY_LOOKAHEAD_BASES}) during queries
160
165
* that produce cache misses.
161
166
*
162
- * @param featureFile file containing Features
167
+ * @param featureFile file or Bundle containing Features
163
168
* @param name logical name for this data source (may be null)
164
169
*/
165
170
public FeatureDataSource (final File featureFile , final String name ) {
@@ -170,7 +175,7 @@ public FeatureDataSource(final File featureFile, final String name) {
170
175
* Creates a FeatureDataSource backed by the provided File and assigns this data source the specified logical
171
176
* name. We will look ahead the specified number of bases during queries that produce cache misses.
172
177
*
173
- * @param featureFile file containing Features
178
+ * @param featureFile file or Bundle containing Features
174
179
* @param name logical name for this data source (may be null)
175
180
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
176
181
*/
@@ -181,7 +186,7 @@ public FeatureDataSource(final File featureFile, final String name, final int qu
181
186
/**
182
187
* Creates a FeatureDataSource backed by the resource at the provided path.
183
188
*
184
- * @param featurePath path to file or GenomicsDB url containing features
189
+ * @param featurePath path to file or GenomicsDB url or Bundle containing features
185
190
* @param name logical name for this data source (may be null)
186
191
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
187
192
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
@@ -195,7 +200,7 @@ public FeatureDataSource(final String featurePath, final String name, final int
195
200
* Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
196
201
* during queries that produce cache misses.
197
202
*
198
- * @param featureInput a FeatureInput specifying a source of Features
203
+ * @param featureInput a FeatureInput specifying a source of Features (or a Bundle)
199
204
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
200
205
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
201
206
* that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -207,7 +212,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
207
212
/**
208
213
* Creates a FeatureDataSource backed by the resource at the provided path.
209
214
*
210
- * @param featurePath path to file or GenomicsDB url containing features
215
+ * @param featurePath path to file or GenomicsDB url or Bundle containing features
211
216
* @param name logical name for this data source (may be null)
212
217
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
213
218
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
@@ -224,7 +229,7 @@ public FeatureDataSource(final String featurePath, final String name, final int
224
229
* Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
225
230
* during queries that produce cache misses.
226
231
*
227
- * @param featureInput a FeatureInput specifying a source of Features
232
+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
228
233
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
229
234
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
230
235
* that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -241,7 +246,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
241
246
* Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
242
247
* during queries that produce cache misses.
243
248
*
244
- * @param featureInput a FeatureInput specifying a source of Features
249
+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
245
250
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
246
251
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
247
252
* that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -259,7 +264,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
259
264
* Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
260
265
* during queries that produce cache misses.
261
266
*
262
- * @param featureInput a FeatureInput specifying a source of Features
267
+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
263
268
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
264
269
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
265
270
* that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -278,7 +283,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
278
283
* Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
279
284
* during queries that produce cache misses.
280
285
*
281
- * @param featureInput a FeatureInput specifying a source of Features
286
+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
282
287
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
283
288
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
284
289
* that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -296,7 +301,7 @@ public FeatureDataSource(final FeatureInput<T> featureInput, final int queryLook
296
301
* Creates a FeatureDataSource backed by the provided FeatureInput. We will look ahead the specified number of bases
297
302
* during queries that produce cache misses.
298
303
*
299
- * @param featureInput a FeatureInput specifying a source of Features
304
+ * @param featureInput a FeatureInput specifying a source of Features (may be a Bundle)
300
305
* @param queryLookaheadBases look ahead this many bases during queries that produce cache misses
301
306
* @param targetFeatureType When searching for a {@link FeatureCodec} for this data source, restrict the search to codecs
302
307
* that produce this type of Feature. May be null, which results in an unrestricted search.
@@ -369,9 +374,26 @@ private static <T extends Feature> FeatureReader<T> getFeatureReader(final Featu
369
374
} catch (final ClassCastException e ) {
370
375
throw new UserException ("GenomicsDB inputs can only be used to provide VariantContexts." , e );
371
376
}
377
+ } else if (featureInput .hasExtension (BundleJSON .BUNDLE_EXTENSION )) {
378
+ // the feature input specifies a serialized json bundle file
379
+ final Bundle vcfBundle = BundleJSON .toBundle (htsjdk .beta .plugin .IOUtils .getStringFromPath (featureInput ), GATKPath ::new );
380
+ final IOPath vcfPath = vcfBundle .getOrThrow (BundleResourceType .CT_VARIANT_CONTEXTS ).getIOPath ().get ();
381
+ // to get the codec we have to use the path of the underlying vcf resource, not the bundle path
382
+ final FeatureInput <T > fi = new FeatureInput <T >(vcfPath .getRawInputString (), featureInput .getName ());
383
+ final FeatureCodec <T , ?> codec = getCodecForFeatureInput (fi , targetFeatureType , setNameOnCodec );
384
+ // propagate the bundle path, not the vcf path, to the reader, so that downstream code can retrieve
385
+ // the index path from the bundle
386
+ return getTribbleFeatureReader (featureInput , codec , cloudWrapper , cloudIndexWrapper );
387
+ } else if (featureInput .getParentBundle () != null ) {
388
+ // the featureInput was created from a bundle list expansion (i.e, MultiVariantWalkers). it has the
389
+ // primary resource as the underlying resource path, and the containing bundle attached as the
390
+ // "parent bundle". Use the original FI to get the codec, but to get the feature reader, we use
391
+ // the FI that contains the bundle path, since the feature reader may require acccess to the index
392
+ final FeatureCodec <T , ?> codec = getCodecForFeatureInput (featureInput , targetFeatureType , setNameOnCodec );
393
+ return getTribbleFeatureReader (featureInput , codec , cloudWrapper , cloudIndexWrapper );
372
394
} else {
373
395
final FeatureCodec <T , ?> codec = getCodecForFeatureInput (featureInput , targetFeatureType , setNameOnCodec );
374
- if ( featureInput .getFeaturePath ().toLowerCase ().endsWith (BCI_FILE_EXTENSION ) ) {
396
+ if (featureInput .getFeaturePath ().toLowerCase ().endsWith (BCI_FILE_EXTENSION )) {
375
397
return new Reader (featureInput , codec );
376
398
}
377
399
return getTribbleFeatureReader (featureInput , codec , cloudWrapper , cloudIndexWrapper );
@@ -419,18 +441,48 @@ private static <T extends Feature> FeatureReader<T> getFeatureReader(final Featu
419
441
private static <T extends Feature > AbstractFeatureReader <T , ?> getTribbleFeatureReader (final FeatureInput <T > featureInput , final FeatureCodec <T , ?> codec , final Function <SeekableByteChannel , SeekableByteChannel > cloudWrapper , final Function <SeekableByteChannel , SeekableByteChannel > cloudIndexWrapper ) {
420
442
Utils .nonNull (codec );
421
443
try {
422
- // Must get the path to the data file from the codec here:
423
- final String absoluteRawPath = featureInput .getRawInputString ();
424
-
425
444
// Instruct the reader factory to not require an index. We will require one ourselves as soon as
426
445
// a query by interval is attempted.
427
446
final boolean requireIndex = false ;
428
447
429
- // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
430
- if (BucketUtils .isEligibleForPrefetching (featureInput )) {
431
- return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
448
+ if (featureInput .hasExtension (BundleJSON .BUNDLE_EXTENSION )) {
449
+ final Bundle vcfBundle = BundleJSON .toBundle (htsjdk .beta .plugin .IOUtils .getStringFromPath (featureInput ), GATKPath ::new );
450
+ final IOPath vcfPath = vcfBundle .getOrThrow (BundleResourceType .CT_VARIANT_CONTEXTS ).getIOPath ().get ();
451
+ final Optional <BundleResource > vcfIndexPath = vcfBundle .get (BundleResourceType .CT_VARIANTS_INDEX );
452
+ final String rawIndexResourcePath =
453
+ vcfIndexPath .isPresent () ? vcfIndexPath .get ().getIOPath ().get ().getRawInputString () : null ;
454
+
455
+ // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
456
+ if (BucketUtils .isEligibleForPrefetching (vcfPath )) {
457
+ final String absoluteRawPath = vcfPath .getRawInputString ();
458
+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , rawIndexResourcePath , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
459
+ } else {
460
+ return AbstractFeatureReader .getFeatureReader (vcfPath .getRawInputString (), rawIndexResourcePath , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
461
+ }
462
+ } else if (featureInput .getParentBundle () != null ) {
463
+ final Bundle vcfBundle = featureInput .getParentBundle ();
464
+ // code path for when a user has specified multiple bundles on the command line, so there is no single
465
+ // serialized bundle file to access
466
+ final IOPath vcfPath = vcfBundle .getOrThrow (BundleResourceType .CT_VARIANT_CONTEXTS ).getIOPath ().get ();
467
+ // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
468
+ final Optional <BundleResource > vcfIndexPath = vcfBundle .get (BundleResourceType .CT_VARIANTS_INDEX );
469
+ final String rawIndexResourcePath =
470
+ vcfIndexPath .isPresent () ? vcfIndexPath .get ().getIOPath ().get ().getRawInputString () : null ;
471
+ final String absoluteRawPath = vcfPath .getRawInputString ();
472
+ if (BucketUtils .isEligibleForPrefetching (vcfPath )) {
473
+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , rawIndexResourcePath , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
474
+ } else {
475
+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , rawIndexResourcePath , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
476
+ }
432
477
} else {
433
- return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
478
+ final String absoluteRawPath = featureInput .getRawInputString ();
479
+
480
+ // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
481
+ if (BucketUtils .isEligibleForPrefetching (featureInput )) {
482
+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , cloudWrapper , cloudIndexWrapper );
483
+ } else {
484
+ return AbstractFeatureReader .getFeatureReader (absoluteRawPath , null , codec , requireIndex , Utils .identityFunction (), Utils .identityFunction ());
485
+ }
434
486
}
435
487
} catch (final TribbleException e ) {
436
488
throw new GATKException ("Error initializing feature reader for path " + featureInput .getFeaturePath (), e );
0 commit comments