Skip to content

Commit 3456f6f

Browse files
committed
BioD import
1 parent 9ee9726 commit 3456f6f

28 files changed

+4899
-5
lines changed

BioD/bio/core/base.d

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
/*
2+
This file is part of BioD.
3+
Copyright (C) 2012 Artem Tarasov <lomereiter@gmail.com>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the "Software"),
7+
to deal in the Software without restriction, including without limitation
8+
the rights to use, copy, modify, merge, publish, distribute, sublicense,
9+
and/or sell copies of the Software, and to permit persons to whom the
10+
Software is furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21+
DEALINGS IN THE SOFTWARE.
22+
23+
*/
24+
module bio.core.base;
25+
26+
import bio.core.tinymap;
27+
import std.traits;
28+
29+
/// Code common to both Base5 and Base16
30+
mixin template CommonBaseOperations() {
31+
/// Convert to char
32+
char asCharacter() @property const { return _code2char[_code]; }
33+
///
34+
alias asCharacter this;
35+
36+
}
37+
38+
/// Base representation supporting full set of IUPAC codes
39+
struct Base {
40+
mixin TinyMapInterface!16;
41+
42+
private enum ubyte[256] _char2code = [
43+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
44+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
45+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
46+
1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0,15,15,
47+
48+
15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
49+
15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
50+
15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
51+
15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
52+
53+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
54+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
55+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
56+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
57+
58+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
59+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
60+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
61+
15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
62+
];
63+
64+
// = 0000
65+
//
66+
// A 0001
67+
// C 0010
68+
// G 0100
69+
// T 1000
70+
//
71+
// W 1001 (A T) Weak
72+
// S 0110 (C G) Strong
73+
//
74+
// M 0011 (A C) aMino
75+
// K 1100 (G T) Keto
76+
// R 0101 (A G) puRine
77+
// Y 1010 (A G) pYrimidine
78+
//
79+
// B 1110 (not A)
80+
// D 1101 (not C)
81+
// H 1011 (not G)
82+
// V 0111 (not T)
83+
//
84+
// N 1111 (aNy base)
85+
private enum _code2char = "=ACMGRSVTWYHKDBN";
86+
87+
private enum ubyte[16] _complement_table = [0x0, 0x8, 0x4, 0xC,
88+
0x2, 0xA, 0x6, 0xE,
89+
0x1, 0x9, 0x5, 0xD,
90+
0x3, 0xB, 0x7, 0xF];
91+
/// Complementary base
92+
Base complement() @property const {
93+
// take the code, reverse the bits, and return the base
94+
return Base.fromInternalCode(_complement_table[_code]);
95+
}
96+
97+
unittest {
98+
import std.ascii;
99+
100+
foreach (i, c; _code2char) {
101+
assert(_char2code[c] == i);
102+
}
103+
104+
foreach (c; 0 .. 256) {
105+
auto c2 = _code2char[_char2code[c]];
106+
if (c2 != 'N') {
107+
if ('0' <= c && c <= '9') {
108+
assert(c2 == "ACGT"[c - '0']);
109+
} else {
110+
assert(c2 == toUpper(c));
111+
}
112+
}
113+
}
114+
}
115+
116+
mixin CommonBaseOperations;
117+
/// Construct from IUPAC code
118+
this(char c) {
119+
_code = _char2code[cast(ubyte)c];
120+
}
121+
122+
/// ditto
123+
this(dchar c) {
124+
_code = _char2code[cast(ubyte)c];
125+
}
126+
127+
private enum ubyte[5] nt5_to_nt16 = [1, 2, 4, 8, 15];
128+
private static Base fromBase5(Base5 base) {
129+
Base b = void;
130+
b._code = nt5_to_nt16[base.internal_code];
131+
return b;
132+
}
133+
134+
/// Conversion to Base5
135+
Base5 opCast(T)() const
136+
if (is(T == Base5))
137+
{
138+
return Base5.fromBase16(this);
139+
}
140+
141+
T opCast(T)() const
142+
if (is(Unqual!T == char) || is(Unqual!T == dchar))
143+
{
144+
return asCharacter;
145+
}
146+
}
147+
148+
unittest {
149+
Base b = 'W';
150+
assert(b == 'W');
151+
152+
b = Base.fromInternalCode(0);
153+
assert(b == '=');
154+
}
155+
156+
alias Base Base16;
157+
158+
/// Base representation supporting only 'A', 'C', 'G', 'T', and 'N'
159+
/// (internal codes are 0, 1, 2, 3, and 4 correspondingly)
160+
struct Base5 {
161+
mixin TinyMapInterface!5;
162+
163+
private enum ubyte[256] _char2code = [
164+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
165+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
166+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
167+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
168+
169+
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
170+
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
171+
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
172+
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
173+
174+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
175+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
176+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
177+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
178+
179+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
180+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
181+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
182+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
183+
];
184+
185+
private enum _code2char = "ACGTN";
186+
private enum ubyte[16] nt16_to_nt5 = [4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4];
187+
188+
mixin CommonBaseOperations;
189+
190+
/// Complementary base
191+
Base5 complement() @property const {
192+
return Base5.fromInternalCode(cast(ubyte)(_code == 4 ? 4 : (3 - _code)));
193+
}
194+
195+
/// Construct base from one of "acgtACGT" symbols.
196+
/// Every other character is converted to 'N'
197+
this(char c) {
198+
_code = _char2code[cast(ubyte)c];
199+
}
200+
201+
/// ditto
202+
this(dchar c) {
203+
_code = _char2code[cast(ubyte)c];
204+
}
205+
206+
private static Base5 fromBase16(Base16 base) {
207+
Base5 b = void;
208+
b._code = nt16_to_nt5[base.internal_code];
209+
return b;
210+
}
211+
212+
/// Conversion to Base16
213+
Base16 opCast(T)() const
214+
if(is(T == Base16))
215+
{
216+
return Base16.fromBase5(this);
217+
}
218+
219+
T opCast(T)() const
220+
if (is(Unqual!T == char) || is(Unqual!T == dchar))
221+
{
222+
return asCharacter;
223+
}
224+
}
225+
226+
unittest {
227+
auto b5 = Base5('C');
228+
assert(b5.internal_code == 1);
229+
b5 = Base5.fromInternalCode(3);
230+
assert(b5 == 'T');
231+
232+
// doesn't work with std.conv.to
233+
//
234+
//import std.conv;
235+
//assert(to!Base16(b5).internal_code == 8);
236+
237+
assert((cast(Base16)b5).internal_code == 8);
238+
}
239+
240+
/// Complement base, which might be Base5, Base16, char, or dchar.
241+
B complementBase(B)(B base) {
242+
static if(is(Unqual!B == dchar) || is(Unqual!B == char))
243+
{
244+
return cast(B)(Base16(base).complement);
245+
}
246+
else
247+
return base.complement;
248+
}
249+
250+
/// Convert character to base
251+
template charToBase(B=Base16)
252+
{
253+
B charToBase(C)(C c)
254+
if(is(Unqual!C == char) || is(Unqual!C == dchar))
255+
{
256+
return B(c);
257+
}
258+
}
259+
260+
unittest {
261+
assert(complementBase('T') == 'A');
262+
assert(complementBase('G') == 'C');
263+
264+
assert(complementBase(Base5('A')) == Base5('T'));
265+
assert(complementBase(Base16('C')) == Base16('G'));
266+
267+
assert(charToBase!Base16('A').complement == Base16('T'));
268+
}

0 commit comments

Comments
 (0)