Skip to content

Commit 358774e

Browse files
authored
Start to create a 'fasta' example
Merges #26 This is seriously broken, because of flaws in the `safepoint_recurse!` macro. I'm running into some really serious lifetime problems. ☹️ I feel like I need to do a little API cleanup.... I also want to write more documentation. I think some sort of guidebook is in order ;)
2 parents e289ce8 + f46f56b commit 358774e

File tree

1 file changed

+268
-0
lines changed

1 file changed

+268
-0
lines changed

libs/simple/examples/fasta.rs

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#![feature(
2+
arbitrary_self_types, // Unfortunately this is required for methods on Gc refs
3+
)]
4+
use std::cell::{Cell, RefCell};
5+
6+
use slog::{Logger, Drain, o};
7+
8+
use zerogc_simple::{Gc, SimpleCollector, SimpleCollectorContext, CollectorId as SimpleCollectorId};
9+
use zerogc_derive::{Trace, NullTrace, unsafe_gc_impl};
10+
use zerogc::prelude::*;
11+
use std::io::Write;
12+
13+
const IM: i32 = 139968;
14+
const IA: i32 = 3877;
15+
const IC: i32 = 29573;
16+
17+
const LINE_LENGTH: usize = 60;
18+
const BUFFER_SIZE: usize = (LINE_LENGTH + 1)*1024; // add 1 for '\n'
19+
20+
/// Weighted selection from alphabet
21+
const ALU: &'static str = concat!(
22+
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG",
23+
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA",
24+
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT",
25+
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA",
26+
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG",
27+
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC",
28+
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
29+
);
30+
31+
#[derive(NullTrace)]
32+
struct State {
33+
last: Cell<i32>
34+
}
35+
impl State {
36+
fn new() -> State {
37+
State {
38+
last: Cell::new(42) // we want determinism
39+
}
40+
}
41+
/// Psuedo random number generator
42+
fn random(&self, max: f32) -> f32 {
43+
const ONE_OVEER_IM: f32 = 1.0f32 / IM as f32;
44+
self.last.set((self.last.get() * IA + IC) % IM);
45+
return max * self.last.get() as f32 * ONE_OVEER_IM;
46+
}
47+
}
48+
49+
#[derive(Trace)]
50+
#[zerogc(collector_id(SimpleCollectorId))]
51+
struct FloatProbFreq<'gc> {
52+
chars: Gc<'gc, Vec<Cell<u8>>>,
53+
probs: Gc<'gc, Vec<Cell<f32>>>
54+
}
55+
impl<'gc> FloatProbFreq<'gc> {
56+
pub fn alloc(gc: &'gc SimpleCollectorContext, chars: Gc<'gc, Vec<Cell<u8>>>, probs: &[f64]) -> Gc<'gc, FloatProbFreq<'gc>> {
57+
let probs = gc.alloc(probs.iter().map(|&f| f as f32).map(Cell::new).collect::<Vec<_>>());
58+
gc.alloc(FloatProbFreq { chars, probs })
59+
}
60+
pub fn make_cumulative(&self) {
61+
let mut cp = 0.0f64;
62+
for prob in self.probs.value() {
63+
cp += prob.get() as f64;
64+
prob.set(cp as f32);
65+
}
66+
}
67+
pub fn select_random_into_buffer(
68+
&self, state: &State, buffer: &mut [u8],
69+
mut buffer_index: usize, nRandom: usize
70+
) -> usize {
71+
let chars = self.chars.value();
72+
let probs = self.probs.value();
73+
'outer: for rIndex in 0..nRandom {
74+
let r = state.random(1.0f32);
75+
for (i, prob) in probs.iter().enumerate() {
76+
if r < prob.get() {
77+
buffer[buffer_index] = chars[i].get();
78+
buffer_index += 1;
79+
continue 'outer;
80+
}
81+
}
82+
buffer[buffer_index] = chars[probs.len() - 1].get();
83+
}
84+
return buffer_index;
85+
}
86+
}
87+
88+
#[derive(Trace)]
89+
#[zerogc(collector_id(SimpleCollectorId), copy)]
90+
#[derive(Copy, Clone)]
91+
struct MakeFastaTask<'gc> {
92+
buffer: Option<Gc<'gc, RefCell<Vec<u8>>>>, // TODO: Replace with Gc slice
93+
buffer_index: usize,
94+
n_chars: usize,
95+
state: Gc<'gc, State>,
96+
id: Gc<'gc, String>,
97+
desc: Gc<'gc, String>,
98+
}
99+
impl<'gc> MakeFastaTask<'gc> {
100+
pub fn new(
101+
gc: &'gc SimpleCollectorContext,
102+
state: Gc<'gc, State>,
103+
id: &str,
104+
desc: &str,
105+
n_chars: usize,
106+
) -> Self {
107+
MakeFastaTask {
108+
buffer: None,
109+
buffer_index: 0,
110+
n_chars, state,
111+
id: gc.alloc(String::from(id)),
112+
desc: gc.alloc(String::from(desc)),
113+
}
114+
}
115+
}
116+
117+
fn make_fasta<'gc1, F: for<'gc> FnOnce(usize, &'gc SimpleCollectorContext, &mut MakeFastaTask<'gc>) -> std::io::Result<()>>(
118+
mut task: MakeFastaTask<'gc1>,
119+
gc: &'gc1 mut SimpleCollectorContext,
120+
func: F
121+
) -> std::io::Result<()> {
122+
task.buffer = Some(gc.alloc(RefCell::new(vec![0; BUFFER_SIZE])));
123+
if (task.buffer.as_ref().unwrap().borrow().len() % (LINE_LENGTH + 1)) != 0 {
124+
return Err(std::io::Error::new(
125+
std::io::ErrorKind::Other,
126+
"buffer size must be a multiple of line length"
127+
));
128+
}
129+
let desc_string = gc.alloc(format!(">{} {}\n", &**task.id, &**task.desc));
130+
std::io::stdout().write_all(desc_string.as_bytes())?;
131+
task = safepoint!(gc, task);
132+
while task.n_chars > 0 {
133+
let chunk_size = if task.n_chars > LINE_LENGTH { LINE_LENGTH } else { task.n_chars };
134+
if task.buffer_index == BUFFER_SIZE {
135+
let buffer = task.buffer.as_ref().unwrap().borrow();
136+
std::io::stdout().write_all(&buffer[0..task.buffer_index])?;
137+
task.buffer_index = 0;
138+
}
139+
let (new_task, res) = safepoint_recurse!(gc, task, |gc, task| {
140+
let mut task = task;
141+
func(chunk_size, gc, &mut task)
142+
});
143+
match res {
144+
Ok(()) => {},
145+
Err(e) => return Err(e)
146+
}
147+
task = safepoint!(gc, new_task);
148+
}
149+
{
150+
let buffer = task.buffer.as_ref().unwrap().borrow();
151+
std::io::stdout().write_all(&buffer[0..task.buffer_index])?;
152+
task.buffer_index = 0;
153+
}
154+
Ok(())
155+
}
156+
157+
fn make_random_fasta<'gc>(
158+
task: MakeFastaTask<'gc>,
159+
gc: &'gc mut SimpleCollectorContext,
160+
fpf: Gc<'gc, FloatProbFreq<'gc>>,
161+
) -> std::io::Result<()> {
162+
make_fasta(task, gc, |chunk_size, gc, task| {
163+
let mut buffer = task.buffer.as_ref().unwrap().borrow_mut();
164+
task.buffer_index = fpf.select_random_into_buffer(
165+
&*task.state, &mut *buffer,
166+
task.buffer_index, chunk_size
167+
);
168+
buffer[task.buffer_index] = b'\n';
169+
task.buffer_index += 1;
170+
task.n_chars -= chunk_size;
171+
Ok(())
172+
})
173+
}
174+
175+
fn make_repeat_fasta<'gc>(
176+
task: MakeFastaTask<'gc>,
177+
gc: &'gc mut SimpleCollectorContext,
178+
alu: &str
179+
) -> std::io::Result<()> {
180+
let alu_bytes = alu.as_bytes();
181+
let mut alu_index = 0usize;
182+
make_fasta(task, gc, |chunk_size, gc, task| {
183+
let mut buffer = task.buffer.as_ref().unwrap().borrow_mut();
184+
for _ in 0..chunk_size {
185+
if alu_index == alu_bytes.len() {
186+
alu_index = 0;
187+
}
188+
buffer[task.buffer_index] = alu_bytes[alu_index];
189+
task.buffer_index += 1;
190+
alu_index += 1;
191+
}
192+
buffer[task.buffer_index] = b'\n';
193+
task.buffer_index += 1;
194+
task.n_chars -= chunk_size;
195+
Ok(())
196+
})
197+
}
198+
199+
fn main() {
200+
let n = std::env::args().nth(1)
201+
.and_then(|n| n.parse().ok())
202+
.unwrap_or(1000);
203+
204+
let plain = slog_term::PlainSyncDecorator::new(std::io::stdout());
205+
let logger = Logger::root(
206+
slog_term::FullFormat::new(plain).build().fuse(),
207+
o!("bench" => file!())
208+
);
209+
let collector = SimpleCollector::with_logger(logger);
210+
let mut gc = collector.into_context();
211+
let mut state = gc.alloc(State::new());
212+
{
213+
let (new_state, ()) = safepoint_recurse!(gc, state, |gc, state| {
214+
let task = MakeFastaTask::new(&gc, state, "ONE", "Homo sapiens alu", n * 2);
215+
safepoint_recurse!(gc, task, |gc, task| {
216+
make_repeat_fasta(task, gc, ALU).unwrap();
217+
});
218+
});
219+
state = new_state;
220+
}
221+
state = safepoint!(gc, state);
222+
{
223+
const PROBS: &[f64] = &[0.27, 0.12, 0.12, 0.27,
224+
0.02, 0.02, 0.02, 0.02,
225+
0.02, 0.02, 0.02, 0.02,
226+
0.02, 0.02, 0.02];
227+
let (new_state, ()) = safepoint_recurse!(gc, state, |gc, state| {
228+
let task = MakeFastaTask::new(
229+
&gc, state, "ONE", "Homo sapiens alu",
230+
n * 2,
231+
);
232+
let iub = gc.alloc(FloatProbFreq {
233+
chars: gc.alloc(b"acgtBDHKMNRSVWY".iter().cloned().map(Cell::new).collect()),
234+
probs: gc.alloc(PROBS.iter().map(|&f| f as f32).map(Cell::new).collect())
235+
});
236+
make_random_fasta(
237+
task,
238+
gc,
239+
iub
240+
).unwrap();
241+
});
242+
state = new_state;
243+
}
244+
state = safepoint!(gc, state);
245+
{
246+
const PROBS: &[f64] = &[0.3029549426680,
247+
0.1979883004921,
248+
0.1975473066391,
249+
0.3015094502008];
250+
let (new_state, ()) = safepoint_recurse!(gc, state, |gc, state| {
251+
let task = MakeFastaTask::new(
252+
&gc, state, "THREE", "Homo sapiens frequency",
253+
n * 5,
254+
);
255+
let homo_sapiens = gc.alloc(FloatProbFreq {
256+
chars: gc.alloc(b"acgt".iter().cloned().map(Cell::new).collect()),
257+
probs: gc.alloc(PROBS.iter().map(|&f| f as f32).map(Cell::new).collect())
258+
});
259+
safepoint_recurse!(gc, (task, homo_sapiens), |gc, roots| {
260+
let (task, homo_sapiens) = roots;
261+
make_random_fasta(task, gc, homo_sapiens).unwrap();
262+
});
263+
});
264+
state = new_state;
265+
}
266+
state = safepoint!(gc, state);
267+
std::io::stdout().flush().unwrap();
268+
}

0 commit comments

Comments
 (0)