Skip to content

Commit a412e89

Browse files
committed
Enable querying hardmasked blocks
1 parent da22244 commit a412e89

File tree

4 files changed

+102
-4
lines changed

4 files changed

+102
-4
lines changed

py2bit.c

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,75 @@ static PyObject *py2bitBases(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
306306
return NULL;
307307
}
308308

309+
static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
310+
PyObject *ret = NULL, *tup = NULL;
311+
TwoBit *tb = self->tb;
312+
char *chrom;
313+
unsigned long startl = 0, endl = 0, totalBlocks = 0, tid;
314+
uint32_t start, end, len, blockStart, blockEnd, i, j;
315+
static char *kwd_list[] = {"chrom", "start", "end", NULL};
316+
317+
if(!tb) {
318+
PyErr_SetString(PyExc_RuntimeError, "The 2bit file handle is not open!");
319+
return NULL;
320+
}
321+
322+
if(!PyArg_ParseTupleAndKeywords(args, kwds, "s|kk", kwd_list, &chrom, &startl, &endl)) {
323+
PyErr_SetString(PyExc_RuntimeError, "You must supply at least a chromosome!");
324+
return NULL;
325+
}
326+
327+
//Get the chromosome ID
328+
for(i=0; i<tb->hdr->nChroms; i++) {
329+
if(strcmp(tb->cl->chrom[i], chrom) == 0) {
330+
tid = i;
331+
break;
332+
}
333+
}
334+
335+
len = twobitChromLen(tb, chrom);
336+
if(len == 0) {
337+
PyErr_SetString(PyExc_RuntimeError, "The specified chromosome doesn't exist in the 2bit file!");
338+
return NULL;
339+
}
340+
if(endl == 0) endl = len;
341+
if(endl > len) endl = len;
342+
end = (uint32_t) endl;
343+
if(startl >= endl && startl > 0) {
344+
PyErr_SetString(PyExc_RuntimeError, "The start value must be less then the end value (and the end of the chromosome");
345+
return NULL;
346+
}
347+
start = (uint32_t) startl;
348+
349+
// Count the total number of overlapping N-masked blocks
350+
for(i=0; i<tb->idx->nBlockCount[tid]; i++) {
351+
blockStart = tb->idx->nBlockStart[tid][i];
352+
blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
353+
if(blockStart < end && blockEnd > start) totalBlocks++;
354+
}
355+
356+
// Form the output
357+
ret = PyList_New(totalBlocks);
358+
if(!ret) goto error;
359+
for(i=0, j=0; i<tb->idx->nBlockCount[tid]; i++) {
360+
blockStart = tb->idx->nBlockStart[tid][i];
361+
blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
362+
if(blockStart < end && blockEnd > start) {
363+
tup = Py_BuildValue("(kk)", (unsigned long) blockStart, (unsigned long) blockEnd);
364+
if(!tup) goto error;
365+
if(PyList_SetItem(ret, j++, tup)) goto error;
366+
}
367+
}
368+
369+
return ret;
370+
371+
error:
372+
if(ret) Py_XDECREF(ret);
373+
if(tup) Py_XDECREF(tup);
374+
PyErr_SetString(PyExc_RuntimeError, "Received an error while constructing the output list and tuples!");
375+
return NULL;
376+
}
377+
309378
#if PY_MAJOR_VERSION >= 3
310379
PyMODINIT_FUNC PyInit_py2bit(void) {
311380
PyObject *res;

py2bit.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <Python.h>
22
#include "2bit.h"
33

4-
#define pyTwoBitVersion "0.2.2"
4+
#define pyTwoBitVersion "0.3.0"
55

66
typedef struct {
77
PyObject_HEAD
@@ -16,6 +16,7 @@ static PyObject* py2bitClose(pyTwoBit_t *pybw, PyObject *args);
1616
static PyObject* py2bitChroms(pyTwoBit_t *pybw, PyObject *args);
1717
static PyObject *py2bitSequence(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
1818
static PyObject *py2bitBases(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
19+
static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
1920
static void py2bitDealloc(pyTwoBit_t *pybw);
2021

2122
static PyMethodDef tbMethods[] = {
@@ -145,6 +146,24 @@ bases. Counts may sum to less than the length of the region for the same reason.
145146
{'A': 0.12, 'C': 0.12, 'T': 0.12, 'G': 0.12}\n\
146147
>>> tb.bases(tb, \"chr1\", 24, 74, True)\n\
147148
{'A': 6, 'C': 6, 'T': 6, 'G': 6}\n\
149+
>>> tb.close()"},
150+
{"hardMaskedBlocks", (PyCFunction)py2bitHardMaskedBlocks, METH_VARARGS|METH_KEYWORDS,
151+
"Retrieve a list of hard-masked blocks on a single-chromosome (or range on it).\n\
152+
\n\
153+
Positional arguments:\n\
154+
chr: Chromosome name\n\
155+
\n\
156+
Optional keyword arguments:\n\
157+
start: Starting position (0-based)\n\
158+
end: Ending position (1-based)\n\
159+
\n\
160+
Returns:\n\
161+
A list of tuples, with items start and end.\n\
162+
\n\
163+
>>> import py2bit\n\
164+
>>> tb = py2bit.open(\"test/test.2bit\")\n\
165+
>>> print(tb.hardMaskedBlocks(\"chr1\")\n\
166+
>>> \n\
148167
>>> tb.close()"},
149168
{"__enter__", (PyCFunction) py2bitEnter, METH_NOARGS, NULL},
150169
{"__exit__", (PyCFunction) py2bitClose, METH_VARARGS, NULL},

py2bitTest/test.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,13 @@ def testSequence(self):
4646
tb = py2bit.open(self.fname, True)
4747
assert(tb.sequence("chr1", 1, 3) == "NN")
4848
assert(tb.sequence("chr1", 1, 2) == "N")
49+
tb.close()
50+
51+
def testHardMaskedBlocks(self):
52+
tb = py2bit.open(self.fname, True)
53+
assert(tb.hardMaskedBlocks("chr1") == [(0, 50), (100, 150)])
54+
assert(tb.hardMaskedBlocks("chr1", 25, 75) == [(0, 50)])
55+
assert(tb.hardMaskedBlocks("chr1", 75, 100) == [])
56+
assert(tb.hardMaskedBlocks("chr1", 75, 101) == [(100, 150)])
57+
assert(tb.hardMaskedBlocks("chr2") == [(50, 100)])
58+
tb.close()

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
include_dirs = ['lib2bit', sysconfig.get_config_var("INCLUDEPY")])
1818

1919
setup(name = 'py2bit',
20-
version = '0.2.2',
20+
version = '0.3.0',
2121
description = 'A package for accessing 2bit files using lib2bit',
2222
author = "Devon P. Ryan",
2323
author_email = "ryan@ie-freiburg.mpg.de",
24-
url = "https://github.com/dpryan79/py2bit",
24+
url = "https://github.com/deeptools/py2bit",
2525
license = "MIT",
26-
download_url = "https://github.com/dpryan79/py2bit/tarball/0.2.2",
26+
download_url = "https://github.com/deeptools/py2bit/tarball/0.3.0",
2727
keywords = ["bioinformatics", "2bit"],
2828
classifier = ["Development Status :: 5 - Production/Stable",
2929
"Intended Audience :: Developers",

0 commit comments

Comments
 (0)