Skip to content

Commit 78db660

Browse files
committed
added vcf bed intersection utility
1 parent 9be5d9a commit 78db660

File tree

15 files changed

+578
-36
lines changed

15 files changed

+578
-36
lines changed

src/bedUtils/BedUtils.java

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
package bedUtils;
2+
import java.beans.IntrospectionException;
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileNotFoundException;
6+
import java.io.FileReader;
7+
import java.io.IOException;
8+
import java.util.HashMap;
9+
import java.util.List;
10+
import java.util.Map;
11+
import java.util.Set;
12+
import java.util.TreeSet;
13+
14+
import liftoverutils.*;
15+
16+
public class BedUtils {
17+
18+
public static void main(String[] args) {
19+
20+
File f = new File("C:\\Users\\manojkumar_bhosale\\Desktop\\ForIntervalMerging\\DEF_662-1322126769L_AllTracks_amplicons.bed");
21+
File f1 = new File("C:\\Users\\manojkumar_bhosale\\Desktop\\d\\CancerAll-In-OneLung_hg38Mut.bed");
22+
//mergeBedFiles(f,null);
23+
Set<BedInterval> intersectBedFiles = mergeBedFiles(f);
24+
25+
intersectBedFiles.forEach(System.out::println);
26+
27+
}
28+
29+
public static Set<BedInterval> intersectBedFiles(File one, File two){
30+
31+
32+
//read a bed file record by record
33+
//query another files tree with record
34+
// intersect the resulting inervals each other
35+
//add the interval to resutl
36+
37+
Map<String, IntervalTree<String>> bedMapTwo = getIntervalTree(two);
38+
Map<String, IntervalTree<String>> bedMap1 = getIntervalTree(one);
39+
40+
41+
Set<BedInterval> result1 = mergeSingleFile(bedMap1, one);
42+
43+
44+
Set<BedInterval> result = new TreeSet<>();
45+
46+
for(BedInterval inter : result1) {
47+
48+
49+
IntervalTree<String> intervalTree = bedMapTwo.get(inter.getChromosome());
50+
List<Interval<String>> intervals = intervalTree.getIntervals(inter.getStart(), inter.getStop());
51+
if(!intervals.isEmpty()) {
52+
intervals.add(new Interval<String>(inter.getStart(), inter.getStop(),""));
53+
Interval<String> mergedInterval = IntervalUtils.intersectIntervals(intervals);
54+
result.add(new BedInterval(inter.getChromosome(),mergedInterval.getStart(),mergedInterval.getEnd()));
55+
}
56+
}
57+
58+
59+
60+
return result;
61+
62+
63+
}
64+
65+
public static Set<BedInterval> mergeBedFiles(File one){
66+
67+
Set<BedInterval> result1 = new TreeSet<BedInterval>();
68+
Set<BedInterval> result = new TreeSet<BedInterval>();
69+
Map<String, IntervalTree<String>> bedMap = getIntervalTree(one);
70+
result1 = mergeSingleFile(bedMap, one);
71+
boolean checkFirst = true;
72+
BedInterval first = null;
73+
for(BedInterval inter : result1) {
74+
75+
if(checkFirst == true) {
76+
first = inter;
77+
checkFirst = false;
78+
continue;
79+
}
80+
81+
if(first.intersects(inter)) {
82+
try {
83+
inter = BedInterval.mergeOverlappingIntervals(first, inter);
84+
} catch (Exception e) {
85+
// TODO Auto-generated catch block
86+
e.printStackTrace();
87+
}
88+
first = inter;
89+
}else {
90+
result.add(first);
91+
first = inter;
92+
}
93+
94+
}
95+
result.add(first);
96+
97+
return result;
98+
99+
}
100+
101+
102+
103+
public static Set<BedInterval> mergeBedFiles(File one, File two){
104+
105+
Set<BedInterval> result = new TreeSet<>();
106+
//Map<String, IntervalTree<String>> bedMap12 = getIntervalTree(one,two);
107+
Map<String, IntervalTree<String>> bedMap1 = getIntervalTree(one);
108+
Map<String, IntervalTree<String>> bedMap2 = getIntervalTree(two);
109+
110+
Set<BedInterval> result1 = mergeSingleFile(bedMap1, one);
111+
Set<BedInterval> result2 = mergeSingleFile(bedMap2, two);
112+
113+
result1.addAll(result2);
114+
115+
for(BedInterval interval : result1) {
116+
117+
IntervalTree<String> intervalTree = bedMap1.get(interval.getChromosome());
118+
List<Interval<String>> interList = intervalTree.get(interval.getStart(), interval.getStop());
119+
if(interList.isEmpty()) {
120+
result.add(interval);
121+
System.out.println("Manoj");
122+
continue;
123+
}
124+
Interval<String> mergedInterval = IntervalUtils.mergeIntervals(interList);
125+
result.add(new BedInterval(interval.getChromosome(),mergedInterval.getStart(),mergedInterval.getEnd()));
126+
127+
}
128+
129+
130+
return result;
131+
132+
}
133+
134+
135+
static Set<BedInterval> mergeSingleFile(Map<String, IntervalTree<String>> bedMap, File one) {
136+
Set<BedInterval> result = new TreeSet<>();
137+
try(BufferedReader br = new BufferedReader(new FileReader(one))){
138+
139+
String line= "";
140+
while((line = br.readLine()) != null) {
141+
if(line.startsWith("#")) {
142+
continue;
143+
}
144+
String[] splited = line.split("\t");
145+
String chromosome = splited[0];
146+
long start = Integer.parseInt(splited[1]);
147+
long end = Integer.parseInt(splited[2]);
148+
149+
BedInterval interval = new BedInterval(chromosome,start,end);
150+
IntervalTree<String> intervalTree = bedMap.get(chromosome);
151+
List<Interval<String>> intervals = intervalTree.getIntervals(start, end);
152+
//System.out.println(interval+" : "+ intervals);
153+
if(!intervals.isEmpty()) {
154+
Interval<String> mergedInterval = IntervalUtils.mergeIntervals(intervals);
155+
result.add(new BedInterval(chromosome,mergedInterval.getStart(),mergedInterval.getEnd()));
156+
}
157+
}
158+
159+
160+
} catch (FileNotFoundException e) {
161+
// TODO Auto-generated catch block
162+
e.printStackTrace();
163+
} catch (IOException e) {
164+
// TODO Auto-generated catch block
165+
e.printStackTrace();
166+
}
167+
168+
return result;
169+
170+
}
171+
172+
public BedInterval mergeIntervals(List<BedInterval> intervals) {
173+
BedInterval first = intervals.get(0);
174+
for(BedInterval inter : intervals) {
175+
try {
176+
first = BedInterval.mergeOverlappingIntervals(first, inter);
177+
} catch (Exception e) {
178+
// TODO Auto-generated catch block
179+
e.printStackTrace();
180+
}
181+
}
182+
return first;
183+
}
184+
185+
static void sortBedFile(File bedFile) {
186+
187+
188+
189+
}
190+
191+
static void addBedInterval(Map<String, IntervalTree<String>> bedMap, BedInterval interval) {
192+
193+
if(bedMap.containsKey(interval.getChromosome())) {
194+
IntervalTree<String> intervalTree = bedMap.get(interval.getChromosome());
195+
Interval<String> inter = new Interval(interval.getStart(), interval.getStop(),"");
196+
intervalTree.addInterval(inter);
197+
}else {
198+
IntervalTree<String> intervalTree = new IntervalTree<>();
199+
Interval<String> inter = new Interval(interval.getStart(), interval.getStop(),"");
200+
intervalTree.addInterval(inter);
201+
bedMap.put(interval.getChromosome(), intervalTree);
202+
}
203+
}
204+
205+
public static Map<String, IntervalTree<String>> getIntervalTree(File bedFile){
206+
Map<String, IntervalTree<String>> bedMap = new HashMap<>();
207+
try(BufferedReader br = new BufferedReader(new FileReader(bedFile))){
208+
209+
String line= "";
210+
while((line = br.readLine()) != null) {
211+
if(line.startsWith("#")) {
212+
continue;
213+
}
214+
String[] splited = line.split("\t");
215+
String chromosome = splited[0];
216+
long start = Integer.parseInt(splited[1]);
217+
long end = Integer.parseInt(splited[2]);
218+
219+
BedInterval interval = new BedInterval(chromosome,start,end);
220+
addBedInterval(bedMap, interval);
221+
222+
223+
}
224+
225+
226+
} catch (FileNotFoundException e) {
227+
// TODO Auto-generated catch block
228+
e.printStackTrace();
229+
} catch (IOException e) {
230+
// TODO Auto-generated catch block
231+
e.printStackTrace();
232+
}
233+
234+
return bedMap;
235+
236+
}
237+
238+
public static Map<String, IntervalTree<String>> getIntervalTree(File bedFile1, File bedFile2){
239+
Map<String, IntervalTree<String>> bedMap = new HashMap<>();
240+
try(BufferedReader br = new BufferedReader(new FileReader(bedFile1));BufferedReader br1 = new BufferedReader(new FileReader(bedFile2))){
241+
242+
String line= "";
243+
while((line = br.readLine()) != null) {
244+
if(line.startsWith("#")) {
245+
continue;
246+
}
247+
String[] splited = line.split("\t");
248+
String chromosome = splited[0];
249+
long start = Integer.parseInt(splited[1]);
250+
long end = Integer.parseInt(splited[2]);
251+
252+
BedInterval interval = new BedInterval(chromosome,start,end);
253+
addBedInterval(bedMap, interval);
254+
255+
256+
}
257+
258+
while((line = br1.readLine()) != null) {
259+
if(line.startsWith("#")) {
260+
continue;
261+
}
262+
String[] splited = line.split("\t");
263+
String chromosome = splited[0];
264+
long start = Integer.parseInt(splited[1]);
265+
long end = Integer.parseInt(splited[2]);
266+
267+
BedInterval interval = new BedInterval(chromosome,start,end);
268+
addBedInterval(bedMap, interval);
269+
270+
271+
}
272+
273+
274+
} catch (FileNotFoundException e) {
275+
// TODO Auto-generated catch block
276+
e.printStackTrace();
277+
} catch (IOException e) {
278+
// TODO Auto-generated catch block
279+
e.printStackTrace();
280+
}
281+
282+
return bedMap;
283+
284+
}
285+
286+
287+
}

src/bedUtils/Chromosome.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
package bedUtils;
2+
3+
public class Chromosome implements Comparable<Chromosome>{
4+
5+
String chromName;
6+
7+
@Override
8+
public int compareTo(Chromosome arg0) {
9+
10+
String noChr = chromName.replace("chr", "");
11+
12+
13+
return 0;
14+
}
15+
16+
17+
18+
}

src/gatkUtils/GatkThread.java

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,14 @@ public void run() {
6767
runAndGobbleCommand(mdupCommand, r);
6868

6969
stepFileName = newStepFileName;
70-
} else if (obj1.keySet().contains("BuildBamIndex")) {
70+
} else if (obj1.keySet().contains("BuildBamIndex") && ((JSONObject)obj1.get("BuildBamIndex")).values().contains("true")) {
7171
String bamIndexCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
7272
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.picardPath + " BuildBamIndex " + " I="
7373
+ inputFileLocation.resolve(stepFileName);
7474
System.out.println(bamIndexCommand);
7575

7676
runAndGobbleCommand(bamIndexCommand, r);
77-
} else if (obj1.keySet().contains("RealignerTargetCreator")) {
77+
} else if (obj1.keySet().contains("RealignerTargetCreator") && ((JSONObject)obj1.get("RealignerTargetCreator")).values().contains("true")) {
7878
listFileName = inputFileLocation.resolve(FilenameUtils.getBaseName(stepFileName)).toString() + ".list";
7979
String realignerCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
8080
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.gatkPath + " -T RealignerTargetCreator "
@@ -83,7 +83,7 @@ public void run() {
8383
System.out.println(realignerCommand);
8484
runAndGobbleCommand(realignerCommand, r);
8585

86-
} else if (obj1.keySet().contains("IndelRealigner")) {
86+
} else if (obj1.keySet().contains("IndelRealigner") && ((JSONObject)obj1.get("IndelRealigner")).values().contains("true")) {
8787
String newStepFileName = inputFileLocation.resolve(FilenameUtils.getBaseName(stepFileName)).toString()
8888
+ ".realigned.bam";
8989
String indelRealignerCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
@@ -96,7 +96,7 @@ public void run() {
9696
stepFileName = newStepFileName;
9797
}
9898

99-
else if (obj1.keySet().contains("FixMateInformation")) {
99+
else if (obj1.keySet().contains("FixMateInformation") && ((JSONObject)obj1.get("FixMateInformation")).values().contains("true")) {
100100
// java -Xmx1g -jar
101101
// tools\picard-tools-1.82\picard-tools-1.82\FixMateInformation.jar
102102
// INPUT=%sampleName%.sorted.realigned.bam
@@ -112,8 +112,15 @@ else if (obj1.keySet().contains("FixMateInformation")) {
112112
runAndGobbleCommand(mateFixCommand, r);
113113

114114
stepFileName = newStepFileName;
115+
116+
String bamIndexCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
117+
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.picardPath + " BuildBamIndex " + " I="
118+
+ inputFileLocation.resolve(stepFileName);
119+
System.out.println(bamIndexCommand);
120+
runAndGobbleCommand(bamIndexCommand, r);
121+
115122

116-
} else if (obj1.keySet().contains("BaseRecalibrator")) {
123+
} else if (obj1.keySet().contains("BaseRecalibrator") && ((JSONObject)obj1.get("BaseRecalibrator")).values().contains("true")) {
117124
recalGrpFile = inputFileLocation.resolve(FilenameUtils.getBaseName(stepFileName)).toString()
118125
+ ".recal.grp";
119126
String baseRecalCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
@@ -122,7 +129,7 @@ else if (obj1.keySet().contains("FixMateInformation")) {
122129
+ inputFileLocation.resolve(recalGrpFile) + " -bqsrBAQGOP 40 ";
123130
runAndGobbleCommand(baseRecalCommand, r);
124131

125-
} else if (obj1.keySet().contains("PrintReads")) {
132+
} else if (obj1.keySet().contains("PrintReads") && ((JSONObject)obj1.get("PrintReads")).values().contains("true")) {
126133
// java -Xmx1g -jar tools\GenomeAnalysisTK.jar -T PrintReads -R %genomeRef% -I
127134
// %sampleName%.sorted.realigned.fixed.bam -BQSR %sampleName%.recal.grp -o
128135
// %sampleName%.sorted.realigned.fixed.bqsr.bam
@@ -136,16 +143,20 @@ else if (obj1.keySet().contains("FixMateInformation")) {
136143
runAndGobbleCommand(printReadCommand, r);
137144

138145
stepFileName = newStepFile;
139-
} else if (obj1.keySet().contains("UnifiedGenotyper")) {
146+
} else if (obj1.keySet().contains("UnifiedGenotyper") && ((JSONObject)obj1.get("UnifiedGenotyper")).values().contains("true")) {
140147
// java -Xmx1g -jar tools\GenomeAnalysisTK.jar -R %genomeRef% -T
141148
// UnifiedGenotyper -I %sampleName%.sorted.realigned.fixed.bqsr.bam -mbq 13 -glm
142149
// BOTH -indelGCP 20 -indelGOP 40 -o %sampleName%_snps.raw.vcf -L %targetBed%
143150
String resultVcf = inputFileLocation.resolve(inputFileNameNoExt + ".raw.vcf").toString();
144151
String UnifiedGenotyperCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
145152
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.gatkPath + " -T UnifiedGenotyper " + " -R "
146153
+ GatkUtils.referencePath + " -I " + inputFileLocation.resolve(stepFileName) + " -o "
147-
+ inputFileLocation.resolve(resultVcf) + " -BQSR " + inputFileLocation.resolve(recalGrpFile)
154+
+ inputFileLocation.resolve(resultVcf)
148155
+ " -mbq 13 -glm BOTH -indelGCP 20 -indelGOP 40 " + " -L " + GatkUtils.targetBedFile;
156+
if(obj1.keySet().contains("BaseRecalibrator") && ((JSONObject)obj1.get("BaseRecalibrator")).values().contains("true")) {
157+
UnifiedGenotyperCommand = UnifiedGenotyperCommand+" -BQSR " + inputFileLocation.resolve(recalGrpFile);
158+
}
159+
149160
runAndGobbleCommand(UnifiedGenotyperCommand, r);
150161

151162
}

0 commit comments

Comments
 (0)