Skip to content

Commit b7901e3

Browse files
committed
soft-masked blocks
1 parent a412e89 commit b7901e3

File tree

3 files changed

+104
-1
lines changed

3 files changed

+104
-1
lines changed

py2bit.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *self, PyObject *args, PyObje
356356
// Form the output
357357
ret = PyList_New(totalBlocks);
358358
if(!ret) goto error;
359+
if(totalBlocks == 0) return ret;
359360
for(i=0, j=0; i<tb->idx->nBlockCount[tid]; i++) {
360361
blockStart = tb->idx->nBlockStart[tid][i];
361362
blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
@@ -375,6 +376,80 @@ static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *self, PyObject *args, PyObje
375376
return NULL;
376377
}
377378

379+
static PyObject *py2bitSoftMaskedBlocks(pyTwoBit_t *self, PyObject *args, PyObject *kwds) {
380+
PyObject *ret = NULL, *tup = NULL;
381+
TwoBit *tb = self->tb;
382+
char *chrom;
383+
unsigned long startl = 0, endl = 0, totalBlocks = 0, tid;
384+
uint32_t start, end, len, blockStart, blockEnd, i, j;
385+
static char *kwd_list[] = {"chrom", "start", "end", NULL};
386+
387+
if(!tb) {
388+
PyErr_SetString(PyExc_RuntimeError, "The 2bit file handle is not open!");
389+
return NULL;
390+
}
391+
392+
if(!PyArg_ParseTupleAndKeywords(args, kwds, "s|kk", kwd_list, &chrom, &startl, &endl)) {
393+
PyErr_SetString(PyExc_RuntimeError, "You must supply at least a chromosome!");
394+
return NULL;
395+
}
396+
397+
//Get the chromosome ID
398+
for(i=0; i<tb->hdr->nChroms; i++) {
399+
if(strcmp(tb->cl->chrom[i], chrom) == 0) {
400+
tid = i;
401+
break;
402+
}
403+
}
404+
405+
len = twobitChromLen(tb, chrom);
406+
if(len == 0) {
407+
PyErr_SetString(PyExc_RuntimeError, "The specified chromosome doesn't exist in the 2bit file!");
408+
return NULL;
409+
}
410+
if(endl == 0) endl = len;
411+
if(endl > len) endl = len;
412+
end = (uint32_t) endl;
413+
if(startl >= endl && startl > 0) {
414+
PyErr_SetString(PyExc_RuntimeError, "The start value must be less then the end value (and the end of the chromosome");
415+
return NULL;
416+
}
417+
start = (uint32_t) startl;
418+
419+
if(!tb->idx->maskBlockStart) {
420+
PyErr_SetString(PyExc_RuntimeError, "The file was not opened with storeMasked=True! Consequently, there are no stored soft-masked regions.");
421+
return NULL;
422+
}
423+
424+
// Count the total number of overlapping soft-masked blocks
425+
for(i=0; i<tb->idx->maskBlockCount[tid]; i++) {
426+
blockStart = tb->idx->maskBlockStart[tid][i];
427+
blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
428+
if(blockStart < end && blockEnd > start) totalBlocks++;
429+
}
430+
431+
// Form the output
432+
ret = PyList_New(totalBlocks);
433+
if(!ret) goto error;
434+
if(totalBlocks == 0) return ret;
435+
for(i=0, j=0; i<tb->idx->maskBlockCount[tid]; i++) {
436+
blockStart = tb->idx->maskBlockStart[tid][i];
437+
blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
438+
if(blockStart < end && blockEnd > start) {
439+
tup = Py_BuildValue("(kk)", (unsigned long) blockStart, (unsigned long) blockEnd);
440+
if(!tup) goto error;
441+
if(PyList_SetItem(ret, j++, tup)) goto error;
442+
}
443+
}
444+
445+
return ret;
446+
error:
447+
if(ret) Py_XDECREF(ret);
448+
if(tup) Py_XDECREF(tup);
449+
PyErr_SetString(PyExc_RuntimeError, "Received an error while constructing the output list and tuples!");
450+
return NULL;
451+
}
452+
378453
#if PY_MAJOR_VERSION >= 3
379454
PyMODINIT_FUNC PyInit_py2bit(void) {
380455
PyObject *res;

py2bit.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ static PyObject* py2bitChroms(pyTwoBit_t *pybw, PyObject *args);
1717
static PyObject *py2bitSequence(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
1818
static PyObject *py2bitBases(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
1919
static PyObject *py2bitHardMaskedBlocks(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
20+
static PyObject *py2bitSoftMaskedBlocks(pyTwoBit_t *pybw, PyObject *args, PyObject *kwds);
2021
static void py2bitDealloc(pyTwoBit_t *pybw);
2122

2223
static PyMethodDef tbMethods[] = {
@@ -163,7 +164,28 @@ Returns:\n\
163164
>>> import py2bit\n\
164165
>>> tb = py2bit.open(\"test/test.2bit\")\n\
165166
>>> print(tb.hardMaskedBlocks(\"chr1\")\n\
166-
>>> \n\
167+
[(0, 50), (100, 150)]\n\
168+
>>> print(tb.hardMaskedBlocks(\"chr1\", 75, 100)\n\
169+
[]\n\
170+
>>> print(tb.hardMaskedBlocks(\"chr1\", 75, 101)\n\
171+
[(100, 150)]\n\
172+
>>> tb.close()"},
173+
{"softMaskedBlocks", (PyCFunction)py2bitSoftMaskedBlocks, METH_VARARGS|METH_KEYWORDS,
174+
"Retrieve a list of soft-masked blocks on a single-chromosome (or range on it).\n\
175+
\n\
176+
Positional arguments:\n\
177+
chr: Chromosome name\n\
178+
\n\
179+
Optional keyword arguments:\n\
180+
start: Starting position (0-based)\n\
181+
end: Ending position (1-based)\n\
182+
\n\
183+
Returns:\n\
184+
A list of tuples, with items start and end.\n\
185+
\n\
186+
>>> import py2bit\n\
187+
>>> tb = py2bit.open(\"test/test.2bit\", storeMasked=True)\n\
188+
>>> print(tb.softMaskedBlocks(\"chr1\")\n\
167189
>>> tb.close()"},
168190
{"__enter__", (PyCFunction) py2bitEnter, METH_NOARGS, NULL},
169191
{"__exit__", (PyCFunction) py2bitClose, METH_VARARGS, NULL},

py2bitTest/test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,9 @@ def testHardMaskedBlocks(self):
5656
assert(tb.hardMaskedBlocks("chr1", 75, 101) == [(100, 150)])
5757
assert(tb.hardMaskedBlocks("chr2") == [(50, 100)])
5858
tb.close()
59+
60+
def testSoftMaskedBlocks(self):
61+
tb = py2bit.open(self.fname, storeMasked=True)
62+
assert(tb.softMaskedBlocks("chr1") == [(62, 70)])
63+
assert(tb.softMaskedBlocks("chr1", 0, 50) == [])
64+
tb.close()

0 commit comments

Comments
 (0)