@@ -36,35 +36,35 @@ import std.path;
36
36
import std.file ;
37
37
import bio.std.file.fai ;
38
38
39
- /*
40
- A text-based single-letter format for representing
39
+ /*
40
+ A text-based single-letter format for representing
41
41
nucleotide (nt) and amino-acid (aa) sequences.
42
42
43
43
The ">" symbol/character marks the start of a fasta entry.
44
- Each fasta entry comprise of an alphanumeric definition line followed by a
44
+ Each fasta entry comprise of an alphanumeric definition line followed by a
45
45
newline character and a single or multiline sequence of IUPAC codes used to
46
46
represent nucleotide or amino-acid sequences.
47
47
48
- An example of a nucleotide fasta file
49
-
48
+ An example of a nucleotide fasta file
49
+
50
50
>Entry1_ID header field1|field2|...
51
51
TTGACGGGTTTTTGTCCTGATT
52
-
52
+
53
53
>Entry2_ID header field1|field2|...
54
54
ATTTTGGGTTACTGTTGGTTTTTGGGC
55
55
56
- TODO:
56
+ TODO:
57
57
1. Allow reading gzipped fasta files.
58
-
58
+
59
59
*/
60
60
61
61
struct FastaRecord {
62
62
string header;
63
63
string sequence;
64
64
ulong lineLen;
65
65
string lineTerm = " \n " ;
66
-
67
- // split the header to array of fields
66
+
67
+ // split the header to array of fields
68
68
@property string [] headerFields(){
69
69
return split (header, " |" ).map! strip.array;
70
70
}
@@ -83,7 +83,7 @@ struct FastaRecord {
83
83
seq~= sequence[i- lineLen.. $];
84
84
return format (" >%s\n %s" , header, seq);
85
85
}
86
-
86
+
87
87
unittest {
88
88
auto recString = q" (
89
89
>chr2
@@ -106,7 +106,7 @@ struct Region {
106
106
@property len() {
107
107
return end - beg;
108
108
}
109
-
109
+
110
110
string toString () {
111
111
if ( end == 0 ) {
112
112
if ( beg == 0 )
@@ -116,7 +116,7 @@ struct Region {
116
116
}
117
117
return format (" %s:%s-%s" , reference, beg+ 1 , end);
118
118
}
119
-
119
+
120
120
this (string q) {
121
121
auto res = q.split(" :" );
122
122
reference = res[0 ];
@@ -158,7 +158,7 @@ struct Region {
158
158
auto fastaRecords (string filename) {
159
159
160
160
File f = File (filename);
161
- FastaRecord[] records;
161
+ FastaRecord[] records;
162
162
string lineTerm = f.byLine(KeepTerminator.yes).take(1 ).front.endsWith(" \r\n " ) ? " \r\n " : " \n " ;
163
163
f.seek(0 );
164
164
ulong offset;
@@ -175,14 +175,17 @@ auto fastaRecords(string filename) {
175
175
records[$- 1 ].sequence ~= line;
176
176
}
177
177
}
178
+ f.close();
178
179
179
180
return records;
180
181
}
181
182
182
183
unittest {
183
- auto testFa = tempDir.buildPath(" test.fa" );
184
- scope (exit) testFa.remove;
185
- File (testFa, " w" ).writeln(q" (
184
+ auto testFa = tempDir.buildPath(" test2.fa" );
185
+ // scope(exit) testFa.remove;
186
+
187
+ auto f = File (testFa, " w" );
188
+ f.writeln(q" (
186
189
>chr1
187
190
acgtgagtgc
188
191
>chr2
@@ -191,6 +194,7 @@ unittest {
191
194
>chr3 hrsv | Kilifi | partial sequence
192
195
CATGTTATTACAAGTAGTGATATTTGCCCTAATAATAATATTGTAGTGAAATCCAATTTCACAACAATGC
193
196
)" .outdent().strip());
197
+ f.close();
194
198
auto records = fastaRecords(testFa);
195
199
assert ( records.length == 3 );
196
200
assert ( records[0 ].header == " chr1" );
@@ -218,7 +222,9 @@ auto fastaRegions(string filename, string[] queries) {
218
222
File f = File (filename);
219
223
FaiRecord[string ] index = makeIndex(readFai(filename~= " .fai" ));
220
224
Region[] regions = to! (Region[])(queries);
221
- return fetchFastaRegions (f, index, regions);
225
+ auto res = fetchFastaRegions(f, index, regions);
226
+ f.close();
227
+ return res;
222
228
}
223
229
224
230
auto fetchFastaRegions (File fasta, FaiRecord[string ] index, Region[] regions) {
@@ -232,7 +238,7 @@ auto fetchFastaRegions(File fasta, FaiRecord[string] index, Region[] regions) {
232
238
auto reference = index[region.reference];
233
239
fasta.seek(reference.offset+ region.beg+ region.beg/ reference.lineLen);
234
240
size_t bufLen;
235
- if ( region.end == 0 )
241
+ if ( region.end == 0 )
236
242
bufLen = reference.seqLen + reference.seqLen/ reference.lineLen;
237
243
else
238
244
bufLen = region.len + region.len/ reference.lineLen;
@@ -242,27 +248,31 @@ auto fetchFastaRegions(File fasta, FaiRecord[string] index, Region[] regions) {
242
248
records ~= FastaRecord(region.to! string , seq, len);
243
249
}
244
250
245
- return records;
251
+ return records;
246
252
}
247
253
248
254
unittest {
249
- auto testFa = tempDir.buildPath(" test.fa" );
250
- scope (exit) testFa.remove;
251
- File (testFa, " w" ).writeln(q" (
255
+ auto testFa = tempDir.buildPath(" test3.fa" );
256
+ // scope(exit) remove(testFa);
257
+ auto fa = File (testFa," w" );
258
+ fa.writeln(q" (
252
259
>chr1
253
260
acgtgagtgc
254
261
>chr2
255
262
acgtgagtgcacgtgagtgcacgtgagtgc
256
263
acgtgagtgcacgtgagtgc
257
264
)" .outdent().strip());
258
- auto faiString = "
265
+ fa.close();
266
+ auto faiString = "
259
267
chr1\t 10\t 6\t 10\t 11
260
268
chr2\t 50\t 23\t 30\t 31
261
269
" .outdent().strip();
262
- auto testIndex = tempDir.buildPath(" test.fa.fai" );
263
- scope (exit) testIndex.remove;
264
- File (testIndex, " w" ).writeln(faiString);
265
-
270
+ auto testIndex = tempDir.buildPath(" test3.fa.fai" );
271
+ // scope(exit) testIndex.remove;
272
+ auto f2 = File (testIndex," w" );
273
+ f2.writeln(faiString);
274
+ f2.close();
275
+
266
276
auto regions = fastaRegions(testFa, [" chr1:4-6" , " chr2:36-45" ]);
267
277
assert ( regions.length == 2 );
268
278
assert ( regions[0 ].header == " chr1:4-6" );
@@ -273,14 +283,14 @@ unittest {
273
283
assert ( regions[1 ].len == 10 );
274
284
assert ( regions[1 ].sequence == " agtgcacgtg" );
275
285
assert ( regions[1 ].lineLen == 30 );
276
-
286
+
277
287
regions = fastaRegions(testFa, [" chr1" ]);
278
288
assert ( regions.length == 1 );
279
289
assert ( regions[0 ].header == " chr1" );
280
- assert ( regions[0 ].len == 10 );
290
+ assert ( regions[0 ].len == 10 , regions[ 0 ].toString() );
281
291
assert ( regions[0 ].sequence == " acgtgagtgc" );
282
- assert ( regions[0 ].lineLen == 10 );
283
-
292
+ assert ( regions[0 ].lineLen == 10 , regions[ 0 ].toString() );
293
+
284
294
regions = fastaRegions(testFa, [" chr2" ]);
285
295
assert ( regions.length == 1 );
286
296
assert ( regions[0 ].header == " chr2" );
0 commit comments