Skip to content

Commit 8237ddb

Browse files
authored
Merge pull request #6 from deeptools/listHardMaskedBlocks
List hard masked blocks
2 parents da22244 + 6646ee5 commit 8237ddb

File tree

4 files changed

+208
-4
lines changed

4 files changed

+208
-4
lines changed

py2bit.c

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,150 @@ static PyObject *py2bitBases(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
306306
return NULL;
307307
}
308308

309+
static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
310+
PyObject *ret = NULL, *tup = NULL;
311+
TwoBit *tb = self->tb;
312+
char *chrom;
313+
unsigned long startl = 0, endl = 0, totalBlocks = 0, tid;
314+
uint32_t start, end, len, blockStart, blockEnd, i, j;
315+
static char *kwd_list[] = {"chrom", "start", "end", NULL};
316+
317+
if(!tb) {
318+
PyErr_SetString(PyExc_RuntimeError, "The 2bit file handle is not open!");
319+
return NULL;
320+
}
321+
322+
if(!PyArg_ParseTupleAndKeywords(args, kwds, "s|kk", kwd_list, &chrom, &startl, &endl)) {
323+
PyErr_SetString(PyExc_RuntimeError, "You must supply at least a chromosome!");
324+
return NULL;
325+
}
326+
327+
//Get the chromosome ID
328+
for(i=0; i<tb->hdr->nChroms; i++) {
329+
if(strcmp(tb->cl->chrom[i], chrom) == 0) {
330+
tid = i;
331+
break;
332+
}
333+
}
334+
335+
len = twobitChromLen(tb, chrom);
336+
if(len == 0) {
337+
PyErr_SetString(PyExc_RuntimeError, "The specified chromosome doesn't exist in the 2bit file!");
338+
return NULL;
339+
}
340+
if(endl == 0) endl = len;
341+
if(endl > len) endl = len;
342+
end = (uint32_t) endl;
343+
if(startl >= endl && startl > 0) {
344+
PyErr_SetString(PyExc_RuntimeError, "The start value must be less then the end value (and the end of the chromosome");
345+
return NULL;
346+
}
347+
start = (uint32_t) startl;
348+
349+
// Count the total number of overlapping N-masked blocks
350+
for(i=0; i<tb->idx->nBlockCount[tid]; i++) {
351+
blockStart = tb->idx->nBlockStart[tid][i];
352+
blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
353+
if(blockStart < end && blockEnd > start) totalBlocks++;
354+
}
355+
356+
// Form the output
357+
ret = PyList_New(totalBlocks);
358+
if(!ret) goto error;
359+
if(totalBlocks == 0) return ret;
360+
for(i=0, j=0; i<tb->idx->nBlockCount[tid]; i++) {
361+
blockStart = tb->idx->nBlockStart[tid][i];
362+
blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
363+
if(blockStart < end && blockEnd > start) {
364+
tup = Py_BuildValue("(kk)", (unsigned long) blockStart, (unsigned long) blockEnd);
365+
if(!tup) goto error;
366+
if(PyList_SetItem(ret, j++, tup)) goto error;
367+
}
368+
}
369+
370+
return ret;
371+
372+
error:
373+
if(ret) Py_XDECREF(ret);
374+
if(tup) Py_XDECREF(tup);
375+
PyErr_SetString(PyExc_RuntimeError, "Received an error while constructing the output list and tuples!");
376+
return NULL;
377+
}
378+
379+
static PyObject *py2bitSoftMaskedBlocks(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
380+
PyObject *ret = NULL, *tup = NULL;
381+
TwoBit *tb = self->tb;
382+
char *chrom;
383+
unsigned long startl = 0, endl = 0, totalBlocks = 0, tid;
384+
uint32_t start, end, len, blockStart, blockEnd, i, j;
385+
static char *kwd_list[] = {"chrom", "start", "end", NULL};
386+
387+
if(!tb) {
388+
PyErr_SetString(PyExc_RuntimeError, "The 2bit file handle is not open!");
389+
return NULL;
390+
}
391+
392+
if(!PyArg_ParseTupleAndKeywords(args, kwds, "s|kk", kwd_list, &chrom, &startl, &endl)) {
393+
PyErr_SetString(PyExc_RuntimeError, "You must supply at least a chromosome!");
394+
return NULL;
395+
}
396+
397+
//Get the chromosome ID
398+
for(i=0; i<tb->hdr->nChroms; i++) {
399+
if(strcmp(tb->cl->chrom[i], chrom) == 0) {
400+
tid = i;
401+
break;
402+
}
403+
}
404+
405+
len = twobitChromLen(tb, chrom);
406+
if(len == 0) {
407+
PyErr_SetString(PyExc_RuntimeError, "The specified chromosome doesn't exist in the 2bit file!");
408+
return NULL;
409+
}
410+
if(endl == 0) endl = len;
411+
if(endl > len) endl = len;
412+
end = (uint32_t) endl;
413+
if(startl >= endl && startl > 0) {
414+
PyErr_SetString(PyExc_RuntimeError, "The start value must be less then the end value (and the end of the chromosome");
415+
return NULL;
416+
}
417+
start = (uint32_t) startl;
418+
419+
if(!tb->idx->maskBlockStart) {
420+
PyErr_SetString(PyExc_RuntimeError, "The file was not opened with storeMasked=True! Consequently, there are no stored soft-masked regions.");
421+
return NULL;
422+
}
423+
424+
// Count the total number of overlapping soft-masked blocks
425+
for(i=0; i<tb->idx->maskBlockCount[tid]; i++) {
426+
blockStart = tb->idx->maskBlockStart[tid][i];
427+
blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
428+
if(blockStart < end && blockEnd > start) totalBlocks++;
429+
}
430+
431+
// Form the output
432+
ret = PyList_New(totalBlocks);
433+
if(!ret) goto error;
434+
if(totalBlocks == 0) return ret;
435+
for(i=0, j=0; i<tb->idx->maskBlockCount[tid]; i++) {
436+
blockStart = tb->idx->maskBlockStart[tid][i];
437+
blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
438+
if(blockStart < end && blockEnd > start) {
439+
tup = Py_BuildValue("(kk)", (unsigned long) blockStart, (unsigned long) blockEnd);
440+
if(!tup) goto error;
441+
if(PyList_SetItem(ret, j++, tup)) goto error;
442+
}
443+
}
444+
445+
return ret;
446+
error:
447+
if(ret) Py_XDECREF(ret);
448+
if(tup) Py_XDECREF(tup);
449+
PyErr_SetString(PyExc_RuntimeError, "Received an error while constructing the output list and tuples!");
450+
return NULL;
451+
}
452+
309453
#if PY_MAJOR_VERSION >= 3
310454
PyMODINIT_FUNC PyInit_py2bit(void) {
311455
PyObject *res;

py2bit.h

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <Python.h>
22
#include "2bit.h"
33

4-
#define pyTwoBitVersion "0.2.2"
4+
#define pyTwoBitVersion "0.3.0"
55

66
typedef struct {
77
PyObject_HEAD
@@ -16,6 +16,8 @@ static PyObject* py2bitClose(pyTwoBit_t *pybw, PyObject *args);
1616
static PyObject* py2bitChroms(pyTwoBit_t *pybw, PyObject *args);
1717
static PyObject *py2bitSequence(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
1818
static PyObject *py2bitBases(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
19+
static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
20+
static PyObject *py2bitSoftMaskedBlocks(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
1921
static void py2bitDealloc(pyTwoBit_t *pybw);
2022

2123
static PyMethodDef tbMethods[] = {
@@ -145,6 +147,48 @@ bases. Counts may sum to less than the length of the region for the same reason.
145147
{'A': 0.12, 'C': 0.12, 'T': 0.12, 'G': 0.12}\n\
146148
>>> tb.bases(tb, \"chr1\", 24, 74, True)\n\
147149
{'A': 6, 'C': 6, 'T': 6, 'G': 6}\n\
150+
>>> tb.close()"},
151+
{"hardMaskedBlocks", (PyCFunction)py2bitHardMaskedBlocks, METH_VARARGS|METH_KEYWORDS,
152+
"Retrieve a list of hard-masked blocks on a single-chromosome (or range on it).\n\
153+
\n\
154+
Positional arguments:\n\
155+
chr: Chromosome name\n\
156+
\n\
157+
Optional keyword arguments:\n\
158+
start: Starting position (0-based)\n\
159+
end: Ending position (1-based)\n\
160+
\n\
161+
Returns:\n\
162+
A list of tuples, with items start and end.\n\
163+
\n\
164+
>>> import py2bit\n\
165+
>>> tb = py2bit.open(\"test/test.2bit\")\n\
166+
>>> print(tb.hardMaskedBlocks(\"chr1\")\n\
167+
[(0, 50), (100, 150)]\n\
168+
>>> print(tb.hardMaskedBlocks(\"chr1\", 75, 100)\n\
169+
[]\n\
170+
>>> print(tb.hardMaskedBlocks(\"chr1\", 75, 101)\n\
171+
[(100, 150)]\n\
172+
>>> tb.close()"},
173+
{"softMaskedBlocks", (PyCFunction)py2bitSoftMaskedBlocks, METH_VARARGS|METH_KEYWORDS,
174+
"Retrieve a list of soft-masked blocks on a single-chromosome (or range on it).\n\
175+
\n\
176+
Positional arguments:\n\
177+
chr: Chromosome name\n\
178+
\n\
179+
Optional keyword arguments:\n\
180+
start: Starting position (0-based)\n\
181+
end: Ending position (1-based)\n\
182+
\n\
183+
Returns:\n\
184+
A list of tuples, with items start and end.\n\
185+
\n\
186+
>>> import py2bit\n\
187+
>>> tb = py2bit.open(\"test/test.2bit\", storeMasked=True)\n\
188+
>>> print(tb.softMaskedBlocks(\"chr1\")\n\
189+
[(62, 70)]\n\
190+
>>> print(tb.softMaskedBlocks(\"chr1\", 0, 50)\n\
191+
[]\n\
148192
>>> tb.close()"},
149193
{"__enter__", (PyCFunction) py2bitEnter, METH_NOARGS, NULL},
150194
{"__exit__", (PyCFunction) py2bitClose, METH_VARARGS, NULL},

py2bitTest/test.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,19 @@ def testSequence(self):
4646
tb = py2bit.open(self.fname, True)
4747
assert(tb.sequence("chr1", 1, 3) == "NN")
4848
assert(tb.sequence("chr1", 1, 2) == "N")
49+
tb.close()
50+
51+
def testHardMaskedBlocks(self):
52+
tb = py2bit.open(self.fname, True)
53+
assert(tb.hardMaskedBlocks("chr1") == [(0, 50), (100, 150)])
54+
assert(tb.hardMaskedBlocks("chr1", 25, 75) == [(0, 50)])
55+
assert(tb.hardMaskedBlocks("chr1", 75, 100) == [])
56+
assert(tb.hardMaskedBlocks("chr1", 75, 101) == [(100, 150)])
57+
assert(tb.hardMaskedBlocks("chr2") == [(50, 100)])
58+
tb.close()
59+
60+
def testSoftMaskedBlocks(self):
61+
tb = py2bit.open(self.fname, storeMasked=True)
62+
assert(tb.softMaskedBlocks("chr1") == [(62, 70)])
63+
assert(tb.softMaskedBlocks("chr1", 0, 50) == [])
64+
tb.close()

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
include_dirs = ['lib2bit', sysconfig.get_config_var("INCLUDEPY")])
1818

1919
setup(name = 'py2bit',
20-
version = '0.2.2',
20+
version = '0.3.0',
2121
description = 'A package for accessing 2bit files using lib2bit',
2222
author = "Devon P. Ryan",
2323
author_email = "ryan@ie-freiburg.mpg.de",
24-
url = "https://github.com/dpryan79/py2bit",
24+
url = "https://github.com/deeptools/py2bit",
2525
license = "MIT",
26-
download_url = "https://github.com/dpryan79/py2bit/tarball/0.2.2",
26+
download_url = "https://github.com/deeptools/py2bit/tarball/0.3.0",
2727
keywords = ["bioinformatics", "2bit"],
2828
classifier = ["Development Status :: 5 - Production/Stable",
2929
"Intended Audience :: Developers",

0 commit comments

Comments
 (0)