jts · hasindu2008 · Jun 7, 2018 · Jun 7, 2018 · Jun 11, 2018 · Jun 11, 2018
diff --git a/Makefile b/Makefile
@@ -142,6 +142,10 @@ EXE_SRC = src/main/nanopolish.cpp src/test/nanopolish_test.cpp
 CPP_OBJ = $(CPP_SRC:.cpp=.o)
 C_OBJ = $(C_SRC:.c=.o)
 
+ifdef cuda
+include cuda.mk
+endif
+
 # Generate dependencies
 .PHONY: depend
 depend: .depend
@@ -172,4 +176,5 @@ test: $(TEST_PROGRAM)
 .PHONY: clean
 clean:
 	rm -f $(PROGRAM) $(TEST_PROGRAM) $(CPP_OBJ) $(C_OBJ) \
+		src/cuda_kernels/gpu_aligner.o \
 		src/main/nanopolish.o src/test/nanopolish_test.o
diff --git a/README.md b/README.md
@@ -131,6 +131,14 @@ Then you can run nanopolish from the image:
 docker run -v /path/to/local/data/data/:/data/ -it :image_id  ./nanopolish eventalign -r /data/reads.fa -b /data/alignments.sorted.bam -g /data/ref.fa
 ```
 
+## GPU acceleration
+
+The nanopolish consensus improvement algorithm can be performed faster using CUDA-enabled GPU acceleration. This is an experimental feature, to try this feature run with the `--gpu=1` flag e.g:
+```
+nanopolish variants --consensus polished_gpu.fa -w "tig00000001:200000-230000" -r reads.fasta -b reads.sorted.bam -g draft.fa --threads=8 --gpu=1
+```
+Note that this feature requires nanopolish to be compiled with `make cuda=1`. You should have the [CUDA toolkit installed and configured](https://docs.nvidia.com/cuda/cuda-quick-start-guide/). If your CUDA installation is not in the default location, you can provide the path to make as `make cuda=1 NVCC=/path/to/nvidia_c_compiler CUDA_LIB=/path/to/cuda/lib CUDA_INCLUDE=/path/to/cuda/include`.
+
 ## Credits and Thanks
 
 The fast table-driven logsum implementation was provided by Sean Eddy as public domain code. This code was originally part of [hmmer3](http://hmmer.janelia.org/). Nanopolish also includes code from Oxford Nanopore's [scrappie](https://github.com/nanoporetech/scrappie) basecaller. This code is licensed under the MPL.
diff --git a/cuda.mk b/cuda.mk
@@ -0,0 +1,27 @@
+#Make file options for CUDA support
+
+NVCC ?= nvcc
+CUDA_ROOT = /usr/local/cuda
+CUDA_LIB ?= $(CUDA_ROOT)/lib64
+CUDA_INCLUDE ?= $(CUDA_ROOT)/include
+CURTFLAGS = -L$(CUDA_LIB) -lcudart_static -lrt 
+NVCCFLAGS ?= -g  -lineinfo -std=c++11 -I. -I$(CUDA_INCLUDE) -O3 -use_fast_math --default-stream per-thread -restrict
+
+CPPFLAGS += -I$(CUDA_INCLUDE)
+CPPFLAGS += -DHAVE_CUDA=1
+
+# Sub directories containing CUDA source code
+SUBDIRS += src/cuda_kernels
+# Find the source files by searching subdirectories
+CU_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.cu))
+# Automatically generated object names
+CU_OBJ = $(CU_SRC:.cu=.o)
+CPP_OBJ += $(CU_OBJ)
+LDFLAGS += $(CURTFLAGS)
+
+.SUFFIXES: .cu
+
+# Compile objects
+.cu.o:
+	$(NVCC) -o $@ -c $(NVCCFLAGS) $(CPPFLAGS) $<
+