Skip to content

Commit 9ee9726

Browse files
committed
Pulled source of BioD back into trunk
1 parent 0dca9f4 commit 9ee9726

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+33165
-7
lines changed

.gitignore

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
.dub/
22
dub.selections.json
3-
BioD/
4-
BioD
5-
./BioD
6-
undeaD/
73
lz4/
8-
core
94
bin/
105
test.log
116
build/

BioD/bio/etc/ragel/Makefile

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
all: fastrecordparser recordparser regionparser
2+
3+
.PHONY : fastrecordparser
4+
5+
.PHONY : recordparser
6+
7+
.PHONY : regionparser
8+
9+
fastrecordparser:
10+
ragel sam_alignment.rl -D -G2
11+
./workarounds/fix_switch_case_fallthrough.sh sam_alignment.d
12+
echo 'module bio.sam.utils.fastrecordparser;' | cat - sam_alignment.d > .sam_alignment.d.tmp
13+
rm sam_alignment.d
14+
mv .sam_alignment.d.tmp fastrecordparser.d
15+
mv fastrecordparser.d ../bio/sam/utils/fastrecordparser.d
16+
17+
recordparser:
18+
ragel sam_alignment.rl -D
19+
./workarounds/fix_static_const.sh sam_alignment.d
20+
echo 'module bio.sam.utils.recordparser;' | cat - sam_alignment.d > .sam_alignment.d.tmp
21+
rm sam_alignment.d
22+
mv .sam_alignment.d.tmp recordparser.d
23+
mv recordparser.d ../bio/sam/utils/recordparser.d
24+
25+
regionparser:
26+
ragel region.rl -D
27+
./workarounds/fix_static_const.sh region.d
28+
mv region.d ../bio/core/region.d
29+
30+
clean:
31+
rm -f *parser.d region.d

BioD/bio/etc/ragel/maf_block.rl

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
This file is part of BioD.
3+
Copyright (C) 2013 Artem Tarasov <lomereiter@gmail.com>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the "Software"),
7+
to deal in the Software without restriction, including without limitation
8+
the rights to use, copy, modify, merge, publish, distribute, sublicense,
9+
and/or sell copies of the Software, and to permit persons to whom the
10+
Software is furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21+
DEALINGS IN THE SOFTWARE.
22+
23+
*/
24+
module bio.maf.parser;
25+
import std.conv, std.array;
26+
import bio.maf.block;
27+
28+
%%{
29+
machine maf_block;
30+
31+
# Common utilities for parsing integers and floats
32+
action update_sign { current_sign = fc == '-' ? -1 : 1; }
33+
action init_integer { int_value = 0; }
34+
action consume_next_digit { int_value *= 10; int_value += fc - '0'; }
35+
action take_sign_into_account { int_value *= current_sign; current_sign = 1; }
36+
37+
sign = [\-+];
38+
39+
uint = ([0-9]{1,18}) > init_integer $ consume_next_digit ;
40+
int = (sign >update_sign)? uint % take_sign_into_account ;
41+
42+
action mark_float_start { float_beg = p - line.ptr; }
43+
action update_float_value {
44+
float_value = to!float(line[float_beg .. p - line.ptr]);
45+
}
46+
47+
float = ((sign? ((digit* '.'? digit+ ([eE] sign? digit+)?) | "inf") ) | "nan")
48+
> mark_float_start % update_float_value ;
49+
# --------------------------------------------------------------------------
50+
51+
action set_score { block.score = float_value; }
52+
action set_pass { block.pass = int_value; }
53+
# Alignment block line
54+
score_vp = "score=" float % set_score;
55+
pass_vp = "pass=" uint % set_pass ;
56+
ab_name_value_pair = score_vp | pass_vp ;
57+
alignment_block_line = 'a' (space+ ab_name_value_pair)* ;
58+
59+
# Common
60+
action src_begin { src_beg = p - line.ptr; }
61+
action set_src { sequence.source = line[src_beg .. p - line.ptr]; }
62+
action set_start { sequence.start = int_value; }
63+
action set_size { sequence.size = int_value; }
64+
action set_strand { sequence.strand = fc; }
65+
action set_src_size { sequence.source_size = int_value; }
66+
action add_sequence { sequences.put(sequence); sequence = MafSequence.init; }
67+
action check_sequence { assert(line[src_beg .. p - line.ptr] == sequences.data.back.source); }
68+
src = (^space)+ > src_begin % set_src ;
69+
start = uint % set_start ;
70+
size = uint % set_size ;
71+
strand = ('+' | '-') > set_strand ;
72+
srcSize = uint % set_src_size ;
73+
74+
# Sequence line
75+
action text_begin { text_beg = p - line.ptr; }
76+
action set_text { sequence.text = line[text_beg .. p - line.ptr]; }
77+
text = (^space)+ ;
78+
s_line = ('s'
79+
space+ src
80+
space+ start
81+
space+ size
82+
space+ strand
83+
space+ srcSize
84+
space+ text > text_begin % set_text) % add_sequence ;
85+
86+
# 'i' line
87+
action set_left_status { sequences.data.back.left_status = fc; }
88+
action set_left_count { sequences.data.back.left_count = int_value; }
89+
action set_right_status { sequences.data.back.right_status = fc; }
90+
action set_right_count { sequences.data.back.right_count = int_value; }
91+
i_status = [CINnMT] ;
92+
leftStatus = i_status ;
93+
leftCount = uint ;
94+
rightStatus = i_status ;
95+
rightCount = uint ;
96+
i_line = 'i'
97+
space+ (src > src_begin % check_sequence)
98+
space+ leftStatus > set_left_status
99+
space+ leftCount % set_left_count
100+
space+ rightStatus > set_right_status
101+
space+ rightCount % set_right_count ;
102+
103+
# 'e' line
104+
action set_empty_status { sequence.empty_status = *p; }
105+
e_status = [CIMn] ;
106+
e_line = ('e'
107+
space+ src
108+
space+ start
109+
space+ size
110+
space+ strand
111+
space+ srcSize
112+
space+ (e_status > set_empty_status)) % add_sequence ;
113+
114+
# 'q' line
115+
action qual_begin { qual_beg = p - line.ptr; }
116+
action set_qual { sequences.data.back.quality = line[qual_beg .. p - line.ptr]; }
117+
q_value = (digit | 'F' | '-')+ ;
118+
q_line = 'q'
119+
space+ (src > src_begin % check_sequence)
120+
space+ (q_value > qual_begin % set_qual);
121+
122+
newline = "\n" | "\r\n" ;
123+
block := alignment_block_line space*
124+
(newline ((s_line | i_line | e_line | q_line) space*))+ ;
125+
126+
write data;
127+
}%%
128+
129+
MafBlock parseMafBlock(string line) {
130+
char* p = cast(char*)line.ptr;
131+
char* pe = p + line.length;
132+
char* eof = pe;
133+
int cs;
134+
135+
int current_sign;
136+
int int_value;
137+
double float_value;
138+
size_t float_beg;
139+
140+
MafBlock block;
141+
MafSequence sequence;
142+
auto sequences = Appender!(MafSequence[])();
143+
144+
size_t src_beg;
145+
size_t text_beg;
146+
size_t qual_beg;
147+
148+
%%write init;
149+
%%write exec;
150+
151+
block.sequences = sequences.data;
152+
return block;
153+
}

BioD/bio/etc/ragel/region.rl

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
This file is part of BioD.
3+
Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the "Software"),
7+
to deal in the Software without restriction, including without limitation
8+
the rights to use, copy, modify, merge, publish, distribute, sublicense,
9+
and/or sell copies of the Software, and to permit persons to whom the
10+
Software is furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21+
DEALINGS IN THE SOFTWARE.
22+
23+
*/
24+
module bio.core.region;
25+
26+
%%{
27+
machine region_parser;
28+
29+
action init_integer { uint_value = 0; }
30+
action consume_next_digit { if (fc != ',') uint_value *= 10, uint_value += fc - '0'; }
31+
integer = [,0-9]+ > init_integer @consume_next_digit ;
32+
33+
action set_reference { region.reference = str[0 .. p - str.ptr]; }
34+
action set_left_end { region.beg = to!uint(uint_value - 1); }
35+
action set_right_end { region.end = to!uint(uint_value); }
36+
37+
reference = ([!-()+-<>-~] [!-~]*) % set_reference ;
38+
reference_and_left_end = reference :> ':' integer % set_left_end ;
39+
reference_and_both_ends = reference_and_left_end '-' integer % set_right_end ;
40+
41+
region := (reference @ 0) | (reference_and_left_end @ 1) | (reference_and_both_ends @ 1);
42+
43+
write data;
44+
}%%
45+
46+
import std.conv;
47+
48+
struct Region {
49+
string reference;
50+
uint beg;
51+
uint end;
52+
}
53+
54+
Region parseRegion(string str) {
55+
char* p = cast(char*)str.ptr;
56+
char* pe = p + str.length;
57+
char* eof = pe;
58+
int cs;
59+
long uint_value;
60+
61+
Region region;
62+
region.beg = 0;
63+
region.end = uint.max;
64+
65+
%%write init;
66+
%%write exec;
67+
68+
return region;
69+
}
70+
71+
unittest {
72+
auto region1 = parseRegion("chr1:1,000-2000");
73+
assert(region1.reference == "chr1");
74+
assert(region1.beg == 999);
75+
assert(region1.end == 2000);
76+
77+
auto region2 = parseRegion("chr2");
78+
assert(region2.reference == "chr2");
79+
assert(region2.beg == 0);
80+
assert(region2.end == uint.max);
81+
82+
auto region3 = parseRegion("chr3:1,000,000");
83+
assert(region3.reference == "chr3");
84+
assert(region3.beg == 999_999);
85+
assert(region3.end == uint.max);
86+
}

0 commit comments

Comments
 (0)