Skip to content

Commit 523b124

Browse files
committed
frequency() is now bases(), which makes more sense as a name
1 parent f8ec717 commit 523b124

File tree

6 files changed

+25
-24
lines changed

6 files changed

+25
-24
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,21 +91,21 @@ If it was requested during file opening that soft-masking information be stored,
9191

9292
## Fetch per-base statistics
9393

94-
It's often required to compute the percentage of 1 or more bases in a chromosome. This can be done with the `frequency()` method.
94+
It's often required to compute the percentage of 1 or more bases in a chromosome. This can be done with the `bases()` method.
9595

96-
>>> tb.frequency("chr1")
96+
>>> tb.bases("chr1")
9797
{'A': 0.08, 'C': 0.08, 'T': 0.08666666666666667, 'G': 0.08666666666666667}
9898

99-
This returns a dictionary with bases as keys and their frequency as values. Note that this will not sum to 1 if there are any hard-masked bases (the chromosome is 2/3 `N` in this case). One can also request this information over a particular region.
99+
This returns a dictionary with bases as keys and the fraction of the sequence composed of them as values. Note that this will not sum to 1 if there are any hard-masked bases (the chromosome is 2/3 `N` in this case). One can also request this information over a particular region.
100100

101-
>>> tb.frequency("chr1", 24, 74)
101+
>>> tb.bases("chr1", 24, 74)
102102
{'A': 0.12, 'C': 0.12, 'T': 0.12, 'G': 0.12}
103103

104104
The start and end position are as with the `sequence()` method described above.
105105

106106
If integer counts are preferred, then they can instead be returned.
107107

108-
>>> tb.frequency("chr1", 24, 74, True)
108+
>>> tb.bases("chr1", 24, 74, True)
109109
{'A': 6, 'C': 6, 'T': 6, 'G': 6}
110110

111111
## Close a file

lib2bit/2bit.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ void increment(char base, uint32_t *A, uint32_t *C, uint32_t *T, uint32_t *G) {
272272
}
273273
}
274274

275-
void *twobitFrequencyWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end, int fraction) {
275+
void *twobitBasesWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end, int fraction) {
276276
void *out;
277277
uint32_t sz = end - start, pos = 0;
278278
uint32_t A = 0, C = 0, T = 0, G = 0, len = end - start;
@@ -328,7 +328,7 @@ void *twobitFrequencyWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t e
328328
return NULL;
329329
}
330330

331-
void *twobitFrequency(TwoBit *tb, char *chrom, uint32_t start, uint32_t end, int fraction) {
331+
void *twobitBases(TwoBit *tb, char *chrom, uint32_t start, uint32_t end, int fraction) {
332332
uint32_t tid = 0, i;
333333

334334
//Get the chromosome ID
@@ -350,7 +350,7 @@ void *twobitFrequency(TwoBit *tb, char *chrom, uint32_t start, uint32_t end, int
350350
if(end > tb->idx->size[tid]) return NULL;
351351
if(start >= end) return NULL;
352352

353-
return twobitFrequencyWorker(tb, tid, start, end, fraction);
353+
return twobitBasesWorker(tb, tid, start, end, fraction);
354354
}
355355

356356
/*

lib2bit/2bit.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ uint32_t twobitChromLen(TwoBit *tb, char *chrom);
4848
char *twobitSequence(TwoBit *tb, char *chrom, uint32_t start, uint32_t end);
4949

5050
//Return a pointer to either 4 doubles or 4 uint32_ts holding per-base frequencies or counts.
51-
void *twobitFrequency(TwoBit *tb, char *chrom, uint32_t start, uint32_t end, int fractional);
51+
//The order is A, C, T, G
52+
void *twobitBases(TwoBit *tb, char *chrom, uint32_t start, uint32_t end, int fractional);
5253

5354
#ifdef __cplusplus
5455
}

py2bit.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ static PyObject *py2bitSequence(pyTwoBit_t *self, PyObject *args, PyObject *kwds
193193
return ret;
194194
}
195195

196-
static PyObject *py2bitFrequency(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
196+
static PyObject *py2bitBases(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
197197
PyObject *ret = NULL, *val = NULL;
198198
PyObject *fractionO = Py_True;
199199
TwoBit *tb = self->tb;
@@ -224,9 +224,9 @@ static PyObject *py2bitFrequency(pyTwoBit_t *self, PyObject *args, PyObject *kwd
224224

225225
if(fractionO == Py_False) fraction = 0;
226226

227-
o = twobitFrequency(tb, chrom, start, end, fraction);
227+
o = twobitBases(tb, chrom, start, end, fraction);
228228
if(!o) {
229-
PyErr_SetString(PyExc_RuntimeError, "Received an error while determining the per-base frequency.");
229+
PyErr_SetString(PyExc_RuntimeError, "Received an error while determining the per-base metrics.");
230230
return NULL;
231231
}
232232

py2bit.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ static PyObject *py2bitInfo(pyTwoBit_t *pybw, PyObject *args);
1212
static PyObject* py2bitClose(pyTwoBit_t *pybw, PyObject *args);
1313
static PyObject* py2bitChroms(pyTwoBit_t *pybw, PyObject *args);
1414
static PyObject *py2bitSequence(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
15-
static PyObject *py2bitFrequency(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
15+
static PyObject *py2bitBases(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
1616
static void py2bitDealloc(pyTwoBit_t *pybw);
1717

1818
static PyMethodDef tbMethods[] = {
@@ -107,14 +107,14 @@ NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATCGATCGT
107107
>>> tb.sequence(\"chr1\", 24, 74)\n\
108108
NNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATC\n\
109109
>>> tb.close()"},
110-
{"frequency", (PyCFunction)py2bitFrequency, METH_VARARGS|METH_KEYWORDS,
111-
"Retrieve the percentage of A, C, T, and Gs in a chromosome or subset thereof.\n\
112-
On error, a runtime exception is thrown.\n\
110+
{"bases", (PyCFunction)py2bitBases, METH_VARARGS|METH_KEYWORDS,
111+
"Retrieve the percentage or number of A, C, T, and Gs in a chromosome or subset\n\
112+
thereof. On error, a runtime exception is thrown.\n\
113113
\n\
114114
Positional arguments:\n\
115115
chr: Chromosome name\n\
116116
\n\
117-
Keyword arguments:\n\
117+
Optional keyword arguments:\n\
118118
start: Starting position (0-based)\n\
119119
end: Ending position (1-based)\n\
120120
fraction: Whether to return fractional or integer values (default 'True',\n\
@@ -132,11 +132,11 @@ bases. Counts may sum to less than the length of the region for the same reason.
132132
\n\
133133
>>> import py2bit\n\
134134
>>> tb = py2bit.open(\"test/test.2bit\")\n\
135-
>>> tb.frequency(tb, \"chr1\")\n\
135+
>>> tb.bases(tb, \"chr1\")\n\
136136
{'A': 0.08, 'C': 0.08, 'T': 0.08666666666666667, 'G': 0.08666666666666667}\n\
137-
>>> tb.frequency(tb, \"chr1\", 24, 74)\n\
137+
>>> tb.bases(tb, \"chr1\", 24, 74)\n\
138138
{'A': 0.12, 'C': 0.12, 'T': 0.12, 'G': 0.12}\n\
139-
>>> tb.frequency(tb, \"chr1\", 24, 74, True)\n\
139+
>>> tb.bases(tb, \"chr1\", 24, 74, True)\n\
140140
{'A': 6, 'C': 6, 'T': 6, 'G': 6}\n\
141141
>>> tb.close()"},
142142
{NULL, NULL, 0, NULL}

py2bitTest/test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ def testSequence(self):
3535
assert(tb.sequence("chr1", 24, 74) == "NNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATC")
3636
tb.close()
3737

38-
def testFrequency(self):
38+
def testBases(self):
3939
tb = py2bit.open(self.fname, True)
40-
assert(tb.frequency("chr1") == {'A': 0.08, 'C': 0.08, 'T': 0.08666666666666667, 'G': 0.08666666666666667})
41-
assert(tb.frequency("chr1", 24, 74) == {'A': 0.12, 'C': 0.12, 'T': 0.12, 'G': 0.12})
42-
assert(tb.frequency("chr1", 24, 74, False) == {'A': 6, 'C': 6, 'T': 6, 'G': 6})
40+
assert(tb.bases("chr1") == {'A': 0.08, 'C': 0.08, 'T': 0.08666666666666667, 'G': 0.08666666666666667})
41+
assert(tb.bases("chr1", 24, 74) == {'A': 0.12, 'C': 0.12, 'T': 0.12, 'G': 0.12})
42+
assert(tb.bases("chr1", 24, 74, False) == {'A': 6, 'C': 6, 'T': 6, 'G': 6})
4343
tb.close()

0 commit comments

Comments
 (0)