Skip to content

Commit ec97130

Browse files
committed
Added support for minor alleles
1 parent 4d3a27c commit ec97130

File tree

9 files changed

+5408
-19
lines changed

9 files changed

+5408
-19
lines changed

src/main/groovy/com/milaboratory/migec/CdrBlast.groovy

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ cli._(longOpt: "same-sample", "Assembled data only (-a). Input files come from t
4747
"each UMI will be counted once.")
4848
cli._(longOpt: "all-segments",
4949
"Use full V/D/J segment library (including pseudogens, etc).")
50+
cli._(longOpt: "all-alleles",
51+
"Use full list of alleles (uses only major *01 alleles by default)")
5052
cli._(longOpt: "print-library",
5153
"Prints out allowed species-gene pairs. " +
5254
"To account non-functional segment data use together with --all-segments")
@@ -82,10 +84,10 @@ if (opt.h) {
8284
}
8385

8486
// SEGMENTS STUFF
85-
boolean includeNonFuncitonal = opt.'all-segments'
87+
boolean includeNonFuncitonal = opt.'all-segments', includeAlleles = opt.'all-alleles'
8688
if (opt.'print-library') {
8789
println "CDR3 extraction is possible for the following data (segments include non-functional = $includeNonFuncitonal):"
88-
Util.listAvailableSegments(includeNonFuncitonal)
90+
Util.listAvailableSegments(includeNonFuncitonal, includeAlleles)
8991
System.exit(0)
9092
}
9193

@@ -102,11 +104,11 @@ if (!chain) {
102104
System.exit(-1)
103105
}
104106

105-
if (!Util.isAvailable(species, chain, includeNonFuncitonal)) {
107+
if (!Util.isAvailable(species, chain, includeNonFuncitonal, includeAlleles)) {
106108
println "[ERROR] Sorry, no analysis could be performed for $species gene $chain " +
107109
"(include non-functional = $includeNonFuncitonal). " +
108110
"Possible variants are:\n"
109-
Util.listAvailableSegments(includeNonFuncitonal)
111+
Util.listAvailableSegments(includeNonFuncitonal, includeAlleles)
110112
System.exit(-1)
111113
}
112114

@@ -199,7 +201,7 @@ def segments = new HashMap<String, Segment>(), alleles = new HashMap<String, All
199201
def vAlleles = new ArrayList<Allele>(), jAlleles = new ArrayList<Allele>()
200202
def collapseAlleleMap = new HashMap<String, Allele>()
201203

202-
def resFile = Util.getSegmentsFile(includeNonFuncitonal)
204+
def resFile = Util.getSegmentsFile(includeNonFuncitonal, includeAlleles)
203205

204206
resFile.splitEachLine("\t") {
205207
if (species.toUpperCase() == it[0].toUpperCase() &&

src/main/groovy/com/milaboratory/migec/CdrBlastBatch.groovy

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ cli._(longOpt: "no-sort",
3636
"Could be used for full pipeline as FilterCdrBlastResults will provide final clonotype table in sorted format.")
3737
cli._(longOpt: "all-segments",
3838
"Use full V/D/J segment library (including pseudogens, etc).")
39+
cli._(longOpt: "all-alleles",
40+
"Use full list of alleles (uses only major *01 alleles by default)")
3941
cli._(longOpt: "print-library",
4042
"Prints out allowed species-gene pairs. " +
4143
"To account non-functional segment data use together with --all-segments")
@@ -64,10 +66,10 @@ if (opt == null) {
6466
}
6567

6668
// SEGMENTS
67-
boolean includeNonFuncitonal = opt.'all-segments'
69+
boolean includeNonFuncitonal = opt.'all-segments', includeAlleles = opt.'all-alleles'
6870
if (opt.'print-library') {
6971
println "CDR3 extraction is possible for the following data (segments include non-functional = $includeNonFuncitonal):"
70-
Util.listAvailableSegments(includeNonFuncitonal)
72+
Util.listAvailableSegments(includeNonFuncitonal, includeAlleles)
7173
System.exit(0)
7274
}
7375

@@ -172,11 +174,11 @@ sampleInfoLines.findAll { !it.startsWith("#") }.each { line ->
172174
qualityThreshold = qualityThreshold ? qualityThreshold.split(",") : null
173175
// todo: estimate q threshold by HistQ
174176

175-
if (!Util.isAvailable(species, chain, includeNonFuncitonal)) {
177+
if (!Util.isAvailable(species, chain, includeNonFuncitonal, includeAlleles)) {
176178
println "[ERROR] Sorry, no analysis could be performed for $species gene $chain " +
177179
"(include non-functional = $includeNonFuncitonal). " +
178180
"Possible variants are:\n"
179-
Util.listAvailableSegments(includeNonFuncitonal)
181+
Util.listAvailableSegments(includeNonFuncitonal, includeAlleles)
180182
System.exit(-1)
181183
}
182184
if (!Util.MASKS.any { mask == it }) {

src/main/groovy/com/milaboratory/migec/Util.groovy

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class Util {
3333
script.binding.setVariable("args", argArray)
3434
script.run()
3535
}
36-
36+
3737
static final String BLANK_PATH = "."
3838

3939
static final List<String> FILE_TYPES = ["paired", "unpaired", "overlapped"],
@@ -61,20 +61,18 @@ class Util {
6161
* Immune gene segment utils
6262
*/
6363

64-
static void listAvailableSegments(boolean includeNonFunctional) {
64+
static void listAvailableSegments(boolean includeNonFunctional, boolean includeAllAlleles) {
6565
println "SPECIES\tGENE"
66-
new InputStreamReader(Migec.class.classLoader.getResourceAsStream("segments" +
67-
(includeNonFunctional ? "_all" : "") + ".metadata")).splitEachLine("\t") { splitLine ->
66+
getSegmentMetadataFile(includeNonFunctional, includeAllAlleles).splitEachLine("\t") { splitLine ->
6867
if (!splitLine[0].startsWith("#") && splitLine[-1] == "1") {
6968
println "${splitLine[0]}\t${splitLine[1]}"
7069
}
7170
}
7271
}
7372

74-
static boolean isAvailable(String species, String gene, boolean includeNonFunctional) {
73+
static boolean isAvailable(String species, String gene, boolean includeNonFunctional, boolean includeAllAlleles) {
7574
boolean result = false
76-
new InputStreamReader(Migec.class.classLoader.getResourceAsStream("segments" +
77-
(includeNonFunctional ? "_all" : "") + ".metadata")).splitEachLine("\t") { splitLine ->
75+
getSegmentMetadataFile(includeNonFunctional, includeAllAlleles).splitEachLine("\t") { splitLine ->
7876
if (!splitLine[0].startsWith("#") && splitLine[-1] == "1" &&
7977
splitLine[0].toUpperCase() == species.toUpperCase() &&
8078
splitLine[1].toUpperCase() == gene.toUpperCase())
@@ -83,9 +81,26 @@ class Util {
8381
result
8482
}
8583

86-
static InputStreamReader getSegmentsFile(boolean includeNonFunctional) {
87-
new InputStreamReader(Migec.class.classLoader.getResourceAsStream("segments" +
88-
(includeNonFunctional ? "_all" : "") + ".txt"))
84+
private static getResourceAsStream(String resourceName) {
85+
new InputStreamReader(Migec.class.classLoader.getResourceAsStream(resourceName))
86+
}
87+
88+
private static getSegmentFilePrefix(boolean includeNonFunctional, boolean includeAllAlleles) {
89+
"segments" +
90+
(includeNonFunctional ? ".all" : "") +
91+
(includeAllAlleles ? ".minor" : "")
92+
}
93+
94+
static InputStreamReader getSegmentsFile(boolean includeNonFunctional, boolean includeAllAlleles) {
95+
getResourceAsStream(
96+
getSegmentFilePrefix(includeNonFunctional, includeAllAlleles) +
97+
".txt")
98+
}
99+
100+
static InputStreamReader getSegmentMetadataFile(boolean includeNonFunctional, boolean includeAllAlleles) {
101+
getResourceAsStream(
102+
getSegmentFilePrefix(includeNonFunctional, includeAllAlleles) +
103+
".metadata")
89104
}
90105

91106
/*
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#SPECIES GENE V D J VJ
2+
MusMusculus_C57BL/10 IGH 1 0 0 0
3+
CamelusDromedarius TRD 1 0 0 0
4+
CamelusDromedarius TRG 1 0 1 1
5+
SusScrofa IGK 1 0 1 1
6+
SusScrofa IGL 1 0 1 1
7+
MusMinutoides TRB 0 0 0 0
8+
MusMinutoides IGK 0 0 0 0
9+
MusMusculus TRA 1 0 1 1
10+
MusMusculus TRB 1 1 1 1
11+
MusMusculus TRD 1 1 1 1
12+
MusMusculus TRG 1 0 1 1
13+
MusMusculus IGH 1 1 1 1
14+
MusMusculus IGK 1 0 1 1
15+
MusMusculus IGL 1 0 1 1
16+
HomoSapiens CD1 0 0 0 0
17+
HomoSapiens TRA 1 0 1 1
18+
HomoSapiens TRB 1 1 1 1
19+
HomoSapiens MIC 0 0 0 0
20+
HomoSapiens TRD 1 1 1 1
21+
HomoSapiens TRG 1 0 1 1
22+
HomoSapiens VPR 1 0 0 0
23+
HomoSapiens IGH 1 1 1 1
24+
HomoSapiens IGK 1 0 1 1
25+
HomoSapiens IGL 1 0 1 1
26+
OncorhynchusMykiss TRB 1 1 1 1
27+
OncorhynchusMykiss MH2 0 0 0 0
28+
OncorhynchusMykiss MH1 0 0 0 0
29+
OncorhynchusMykiss IGH 1 1 1 1
30+
DanioRerio IGI 0 0 1 0
31+
DanioRerio IGH 0 1 1 0
32+
MusMusculus_BALB.K IGH 1 0 0 0
33+
MusMusculus_MRL/Lpr IGH 1 0 0 0
34+
PapioAnubisAnubis IGH 0 0 0 0
35+
CercocebusTorquatusAtys IGH 0 0 0 0
36+
MacacaMulatta TRB 1 1 1 1
37+
MacacaMulatta IGH 1 1 1 1
38+
MacacaMulatta IGK 1 0 1 1
39+
MacacaNemestrina IGH 0 0 0 0
40+
MusPahari TRB 0 0 0 0
41+
MusPahari IGK 0 0 0 0
42+
MusMusculus_C57BL/6 IGH 1 0 0 0
43+
MusCookii IGK 0 0 0 0
44+
RattusNorvegicus IGH 1 1 1 1
45+
RattusNorvegicus IGK 0 0 1 0
46+
RattusNorvegicus IGL 1 0 1 1
47+
CanisLupusFamiliaris TRB 1 1 1 1
48+
CanisLupusFamiliaris TRG 1 0 1 1
49+
MusMusculus_A/J IGH 1 0 0 0
50+
MacacaFascicularis IGH 0 0 0 0
51+
MusSpretus TRB 0 0 0 0
52+
MusSpretus IGK 1 0 0 0
53+
MusSpretus IGL 1 0 1 1
54+
MusMusculus_129/Sv IGH 1 0 0 0
55+
RattusRattus IGH 0 0 0 0
56+
BosTaurus TRA 0 0 1 0
57+
BosTaurus TRD 1 1 1 1
58+
BosTaurus TRG 1 0 1 1
59+
RattusNorvegicus_BN/SsNHsdMCW IGK 1 0 0 0
60+
OryctolagusCuniculus IGH 1 1 1 1
61+
OryctolagusCuniculus IGK 1 0 1 1
62+
OryctolagusCuniculus IGL 1 0 1 1
63+
MusSaxicola IGK 0 0 0 0
64+
MusMusculus_C57BL/6J IGK 1 0 0 0
65+
MusMusculus_BALB/C IGH 1 0 0 0
66+
MusMusculus_NZB IGH 1 0 0 0
67+
AnasPlatyrhynchos TRB 1 0 1 1
68+
GallusGallus TRB 1 0 1 1

0 commit comments

Comments
 (0)