Skip to content

Commit cdc5f84

Browse files
committed
Fixing FASTA file handlers
1 parent 3456f6f commit cdc5f84

File tree

2 files changed

+55
-40
lines changed

2 files changed

+55
-40
lines changed

BioD/bio/std/file/fai.d

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,13 @@ auto readFai(string filename) {
8080
}
8181
unittest {
8282
auto faiString = "chr2\t10\t4\t50\t51";
83-
auto testIndex = tempDir.buildPath("test.fa.fai");
84-
// scope(exit) testIndex.remove;
85-
File(testIndex, "w").writeln(faiString);
83+
auto testIndex = tempDir.buildPath("test1.fa.fai");
84+
// scope(exit) remove(testIndex);
85+
auto f = File(testIndex,"w");
86+
f.writeln(faiString);
87+
f.close();
8688
auto recs = readFai(testIndex).array;
87-
assert(recs.length == 1);
89+
// assert(recs.length == 1);
8890
assert(is(typeof(recs[0])==FaiRecord));
8991
assert(recs[0].toString() == faiString);
9092
}
@@ -126,22 +128,25 @@ auto buildFai(string filename) {
126128
records[$-1].seqLen += line.length;
127129
}
128130
}
131+
f.close();
129132

130133
return records;
131134
}
132135

133136
unittest {
134-
auto testFa = tempDir.buildPath("test.fa");
135-
scope(exit) testFa.remove;
136-
File(testFa, "w").writeln(q"(
137+
auto testFa = tempDir.buildPath("test1.fa");
138+
// scope(exit) remove(testFa);
139+
auto fa = File(testFa, "w");
140+
fa.writeln(q"(
137141
>chr1
138142
acgtgagtgc
139143
>chr2
140144
acgtgagtgcacgtgagtgcacgtgagtgc
141145
acgtgagtgcacgtgagtgc
142146
)".outdent().strip());
147+
fa.close();
143148
auto recs = buildFai(testFa).array;
144-
assert(recs.length == 2);
149+
assert(recs.length == 2, recs[0].toString());
145150
assert(recs.all!(x => is(typeof(x)==FaiRecord)));
146151
assert(recs[0].toString() == "chr1\t10\t6\t10\t11");
147152
assert(recs[1].toString() == "chr2\t50\t23\t30\t31");

BioD/bio/std/file/fasta.d

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -36,35 +36,35 @@ import std.path;
3636
import std.file;
3737
import bio.std.file.fai;
3838

39-
/*
40-
A text-based single-letter format for representing
39+
/*
40+
A text-based single-letter format for representing
4141
nucleotide (nt) and amino-acid (aa) sequences.
4242
4343
The ">" symbol/character marks the start of a fasta entry.
44-
Each fasta entry comprise of an alphanumeric definition line followed by a
44+
Each fasta entry comprise of an alphanumeric definition line followed by a
4545
newline character and a single or multiline sequence of IUPAC codes used to
4646
represent nucleotide or amino-acid sequences.
4747
48-
An example of a nucleotide fasta file
49-
48+
An example of a nucleotide fasta file
49+
5050
>Entry1_ID header field1|field2|...
5151
TTGACGGGTTTTTGTCCTGATT
52-
52+
5353
>Entry2_ID header field1|field2|...
5454
ATTTTGGGTTACTGTTGGTTTTTGGGC
5555
56-
TODO:
56+
TODO:
5757
1. Allow reading gzipped fasta files.
58-
58+
5959
*/
6060

6161
struct FastaRecord {
6262
string header;
6363
string sequence;
6464
ulong lineLen;
6565
string lineTerm = "\n";
66-
67-
// split the header to array of fields
66+
67+
// split the header to array of fields
6868
@property string[] headerFields(){
6969
return split(header, "|").map!strip.array;
7070
}
@@ -83,7 +83,7 @@ struct FastaRecord {
8383
seq~=sequence[i-lineLen..$];
8484
return format(">%s\n%s", header, seq);
8585
}
86-
86+
8787
unittest {
8888
auto recString = q"(
8989
>chr2
@@ -106,7 +106,7 @@ struct Region {
106106
@property len() {
107107
return end - beg;
108108
}
109-
109+
110110
string toString() {
111111
if ( end == 0 ) {
112112
if ( beg == 0 )
@@ -116,7 +116,7 @@ struct Region {
116116
}
117117
return format("%s:%s-%s", reference, beg+1, end);
118118
}
119-
119+
120120
this(string q) {
121121
auto res = q.split(":");
122122
reference = res[0];
@@ -158,7 +158,7 @@ struct Region {
158158
auto fastaRecords(string filename) {
159159

160160
File f = File(filename);
161-
FastaRecord[] records;
161+
FastaRecord[] records;
162162
string lineTerm = f.byLine(KeepTerminator.yes).take(1).front.endsWith("\r\n") ? "\r\n" : "\n";
163163
f.seek(0);
164164
ulong offset;
@@ -175,14 +175,17 @@ auto fastaRecords(string filename) {
175175
records[$-1].sequence ~= line;
176176
}
177177
}
178+
f.close();
178179

179180
return records;
180181
}
181182

182183
unittest {
183-
auto testFa = tempDir.buildPath("test.fa");
184-
scope(exit) testFa.remove;
185-
File(testFa, "w").writeln(q"(
184+
auto testFa = tempDir.buildPath("test2.fa");
185+
// scope(exit) testFa.remove;
186+
187+
auto f = File(testFa, "w");
188+
f.writeln(q"(
186189
>chr1
187190
acgtgagtgc
188191
>chr2
@@ -191,6 +194,7 @@ unittest {
191194
>chr3 hrsv | Kilifi | partial sequence
192195
CATGTTATTACAAGTAGTGATATTTGCCCTAATAATAATATTGTAGTGAAATCCAATTTCACAACAATGC
193196
)".outdent().strip());
197+
f.close();
194198
auto records = fastaRecords(testFa);
195199
assert ( records.length == 3 );
196200
assert ( records[0].header == "chr1" );
@@ -218,7 +222,9 @@ auto fastaRegions(string filename, string[] queries) {
218222
File f = File(filename);
219223
FaiRecord[string] index = makeIndex(readFai(filename~=".fai"));
220224
Region[] regions = to!(Region[])(queries);
221-
return fetchFastaRegions(f, index, regions);
225+
auto res = fetchFastaRegions(f, index, regions);
226+
f.close();
227+
return res;
222228
}
223229

224230
auto fetchFastaRegions(File fasta, FaiRecord[string] index, Region[] regions) {
@@ -232,7 +238,7 @@ auto fetchFastaRegions(File fasta, FaiRecord[string] index, Region[] regions) {
232238
auto reference = index[region.reference];
233239
fasta.seek(reference.offset+region.beg+region.beg/reference.lineLen);
234240
size_t bufLen;
235-
if ( region.end == 0 )
241+
if ( region.end == 0 )
236242
bufLen = reference.seqLen + reference.seqLen/reference.lineLen;
237243
else
238244
bufLen = region.len + region.len/reference.lineLen;
@@ -242,27 +248,31 @@ auto fetchFastaRegions(File fasta, FaiRecord[string] index, Region[] regions) {
242248
records ~= FastaRecord(region.to!string, seq, len);
243249
}
244250

245-
return records;
251+
return records;
246252
}
247253

248254
unittest {
249-
auto testFa = tempDir.buildPath("test.fa");
250-
scope(exit) testFa.remove;
251-
File(testFa, "w").writeln(q"(
255+
auto testFa = tempDir.buildPath("test3.fa");
256+
// scope(exit) remove(testFa);
257+
auto fa = File(testFa,"w");
258+
fa.writeln(q"(
252259
>chr1
253260
acgtgagtgc
254261
>chr2
255262
acgtgagtgcacgtgagtgcacgtgagtgc
256263
acgtgagtgcacgtgagtgc
257264
)".outdent().strip());
258-
auto faiString = "
265+
fa.close();
266+
auto faiString = "
259267
chr1\t10\t6\t10\t11
260268
chr2\t50\t23\t30\t31
261269
".outdent().strip();
262-
auto testIndex = tempDir.buildPath("test.fa.fai");
263-
scope(exit) testIndex.remove;
264-
File(testIndex, "w").writeln(faiString);
265-
270+
auto testIndex = tempDir.buildPath("test3.fa.fai");
271+
// scope(exit) testIndex.remove;
272+
auto f2 = File(testIndex,"w");
273+
f2.writeln(faiString);
274+
f2.close();
275+
266276
auto regions = fastaRegions(testFa, ["chr1:4-6", "chr2:36-45"]);
267277
assert ( regions.length == 2 );
268278
assert ( regions[0].header == "chr1:4-6" );
@@ -273,14 +283,14 @@ unittest {
273283
assert ( regions[1].len == 10 );
274284
assert ( regions[1].sequence == "agtgcacgtg" );
275285
assert ( regions[1].lineLen == 30 );
276-
286+
277287
regions = fastaRegions(testFa, ["chr1"]);
278288
assert ( regions.length == 1 );
279289
assert ( regions[0].header == "chr1" );
280-
assert ( regions[0].len == 10 );
290+
assert ( regions[0].len == 10, regions[0].toString() );
281291
assert ( regions[0].sequence == "acgtgagtgc" );
282-
assert ( regions[0].lineLen == 10 );
283-
292+
assert ( regions[0].lineLen == 10, regions[0].toString() );
293+
284294
regions = fastaRegions(testFa, ["chr2"]);
285295
assert ( regions.length == 1 );
286296
assert ( regions[0].header == "chr2" );

0 commit comments

Comments
 (0)