Skip to content

Commit 5e01120

Browse files
authored
Merge pull request #17 from refresh-bio/update-to-2.3.0
Update to 2.3.0
2 parents fef3a3e + 303138d commit 5e01120

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1691
-5663
lines changed

.dockerignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,7 @@
66
!/src/splash.py
77
!/libs
88
!Makefile
9-
!download_kmc.sh
9+
!download_kmc.py
1010
!hash.git
11+
12+

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ src/common/kmc_api/*.o
66
src/satc_merge/*.o
77
src/satc_dump/*.o
88
src/compactors/*.o
9+
src/dsv_manip/*.o
910
*.fastq
1011
*.fastq.gz
1112
*.tsv

.gitmodules

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[submodule "libs/cloudflare-zlib"]
2+
path = libs/cloudflare-zlib
3+
url = https://github.com/refresh-bio/dependencies-zlib/
4+
[submodule "libs/zstd"]
5+
path = libs/zstd
6+
url = https://github.com/refresh-bio/dependencies-zstd/

COMPACTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Options:
1818
* `--beta <real>` - beta parameter for active set generation, lower values increase sensitivity (default: 5)
1919
* `--lower_bound <int>` - minimum kmer abundance to add it to an active set, lower values increase sensitivity (default: 10)
2020
* `--max_mismatch <int>` - maximum mismatch count for compactor candidates (default: 4)
21+
* `--all_anchors` - find all anchors' occurences in a read, not just the first one (default: off)
2122

2223
* `--no_extension` - disable recursive extension (default: enabled)
2324
* `--max_length <int>` - maximum compactor length in bases (used only with recursion; default: 2000)

Makefile

Lines changed: 108 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,116 @@
1-
all: satc satc_dump satc_merge sig_anch compactors download_kmc splash supervised_test
1+
all: satc satc_dump satc_merge sig_anch compactors download_kmc splash supervised_test dsv_manip
2+
3+
dummy := $(shell git submodule update --init --recursive)
24

35
SPLASH_LIBS_DIR = libs
46
LIBS_DIR = . #/usr/local/lib
57
INCLUDE_DIR= libs
8+
ZLIB_INCLUDE_DIR= libs/cloudflare-zlib
69

710
SATC_MAIN_DIR=src/satc
811
SATC_MERGE_MAIN_DIR=src/satc_merge
912
SATC_DUMP_MAIN_DIR=src/satc_dump
1013
SIG_ANCH_MAIN_DIR=src/sig_anch
14+
DSV_MANIP_MAIN_DIR=src/dsv_manip
1115
COMMON_DIR=src/common
12-
1316
COMPACTORS_MAIN_DIR=src/compactors
14-
OUT_BIN_DIR=bin
1517

16-
CC = g++
17-
CFLAGS = -fPIC -Wall -O3 -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -fpermissive
18-
CLINK = -lm -std=c++17 -lpthread -static-libstdc++
18+
OUT_BIN_DIR=bin
1919

20-
release: CLINK = -lm -std=c++17 -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
21-
release: CFLAGS = -fPIC -Wall -O3 -DNDEBUG -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -fpermissive
22-
release: all
2320

24-
debug: CFLAGS = -fPIC -Wall -O0 -g -m64 -std=c++17 -pthread -I $(INCLUDE_DIR) -fpermissive
25-
debug: all
2621

2722
ifdef MSVC # Avoid the MingW/Cygwin sections
28-
uname_S := Windows
23+
UNAME_S := Windows
2924
else # If uname not available => 'not'
30-
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
25+
UNAME_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
26+
UNAME_M := $(shell uname -m)
3127
endif
32-
ifeq ($(uname_S),Linux)
33-
CLINK+=-fabi-version=6
34-
LIB_ZLIB=cloudflare-zlib/libz.a
35-
LIB_ZSTD=zstd/linux/libzstd.a
28+
29+
D_OS =
30+
D_ARCH =
31+
32+
ifeq ($(UNAME_S),Darwin)
33+
D_OS=MACOS
34+
ifeq ($(UNAME_M),arm64)
35+
D_ARCH=ARM64
36+
else
37+
D_ARCH=X64
38+
endif
39+
else
40+
D_OS=LINUX
41+
D_ARCH=X64
42+
ifeq ($(UNAME_M),arm64)
43+
D_ARCH=ARM64
44+
endif
45+
ifeq ($(UNAME_M),aarch64)
46+
D_ARCH=ARM64
47+
endif
3648
endif
3749

38-
ifeq ($(uname_S),Darwin)
39-
LIB_ZLIB=cloudflare-zlib/libz.mac.a
40-
LIB_ZSTD=zstd/mac/libzstd.a
50+
CPU_FLAGS =
51+
STATIC_LFLAGS =
52+
PLATFORM_SPECIFIC_FLAGS =
53+
54+
#in some cases we can have different results on ARM
55+
#I guess this is exactly the same as here: https://bugs.mysql.com/bug.php?id=82760
56+
ifeq ($(D_ARCH),ARM64)
57+
PLATFORM_SPECIFIC_FLAGS = -ffp-contract=off
58+
endif
59+
60+
ifeq ($(D_OS),MACOS)
4161
CC = g++-11
62+
63+
ifeq ($(D_ARCH),ARM64)
64+
CPU_FLAGS = -march=armv8.4-a
65+
else
66+
CPU_FLAGS = -m64
67+
endif
68+
STATIC_LFLAGS = -static-libgcc -static-libstdc++ -pthread
69+
else
70+
CC = g++
71+
72+
ifeq ($(D_ARCH),ARM64)
73+
CPU_FLAGS = -march=armv8-a
74+
STATIC_LFLAGS = -static-libgcc -static-libstdc++ -lpthread
75+
else
76+
CPU_FLAGS = -m64
77+
STATIC_LFLAGS = -static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive
78+
endif
79+
endif
80+
81+
82+
CFLAGS = -fPIC -Wall -O3 $(PLATFORM_SPECIFIC_FLAGS) $(CPU_FLAGS) -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(ZLIB_INCLUDE_DIR) -fpermissive
83+
CLINK = -lm -std=c++17 -lpthread
84+
85+
release: CLINK = -lm -std=c++17 $(STATIC_LFLAGS)
86+
release: CLINK = -lm -std=c++17 $(STATIC_LFLAGS)
87+
88+
release: CFLAGS = -fPIC -Wall -O3 -DNDEBUG $(PLATFORM_SPECIFIC_FLAGS) $(CPU_FLAGS) -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(ZLIB_INCLUDE_DIR) -fpermissive
89+
release: all
90+
91+
debug: CFLAGS = -fPIC -Wall -O0 -g $(PLATFORM_SPECIFIC_FLAGS) $(CPU_FLAGS) -std=c++17 -pthread -I $(INCLUDE_DIR) -I $(ZLIB_INCLUDE_DIR) -fpermissive
92+
debug: all
93+
94+
ifeq ($(UNAME_S),Linux)
95+
CLINK+=-fabi-version=6
4296
endif
4397

98+
99+
LIB_ZLIB=$(SPLASH_LIBS_DIR)/cloudflare-zlib/libz.a
100+
LIB_ZSTD=$(SPLASH_LIBS_DIR)/zstd/lib/libzstd.a
101+
44102
# default install location (binary placed in the /bin folder)
45103

46104
ifeq ($(PREFIX),)
47105
PREFIX ?= /usr/local
48106
endif
49107

108+
$(LIB_ZLIB):
109+
cd $(SPLASH_LIBS_DIR)/cloudflare-zlib; ./configure; make libz.a
110+
111+
$(LIB_ZSTD):
112+
cd $(SPLASH_LIBS_DIR)/zstd; make -j
113+
50114
%.o: %.cpp
51115
$(CC) $(CFLAGS) -c $< -o $@
52116

@@ -56,29 +120,31 @@ $(OUT_BIN_DIR)/satc: $(SATC_MAIN_DIR)/satc.o \
56120
$(COMMON_DIR)/kmc_api/kmc_file.o \
57121
$(COMMON_DIR)/kmc_api/mmer.o \
58122
$(COMMON_DIR)/kmc_api/kmer_api.o \
59-
$(COMMON_DIR)/illumina_adapters_static.o
123+
$(COMMON_DIR)/illumina_adapters_static.o \
124+
$(LIB_ZSTD)
60125
-mkdir -p $(OUT_BIN_DIR)
61126
$(CC) -o $@ $^ \
62-
$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
127+
$(LIB_ZSTD) \
63128
$(CLINK)
64129

65130
satc_merge: $(OUT_BIN_DIR)/satc_merge
66131

67132
$(OUT_BIN_DIR)/satc_merge: $(SATC_MERGE_MAIN_DIR)/satc_merge.o \
68133
$(SATC_MERGE_MAIN_DIR)/pvals.o \
69134
$(SATC_MERGE_MAIN_DIR)/anchor.o \
70-
$(SATC_MERGE_MAIN_DIR)/extra_stats.o
135+
$(SATC_MERGE_MAIN_DIR)/extra_stats.o \
136+
$(LIB_ZSTD)
71137
-mkdir -p $(OUT_BIN_DIR)
72138
$(CC) -o $@ $^ \
73-
$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
139+
$(LIB_ZSTD) \
74140
$(CLINK)
75141

76142
satc_dump: $(OUT_BIN_DIR)/satc_dump
77143

78-
$(OUT_BIN_DIR)/satc_dump: $(SATC_DUMP_MAIN_DIR)/satc_dump.o
144+
$(OUT_BIN_DIR)/satc_dump: $(SATC_DUMP_MAIN_DIR)/satc_dump.o $(LIB_ZSTD)
79145
-mkdir -p $(OUT_BIN_DIR)
80146
$(CC) -o $@ $^ \
81-
$(SPLASH_LIBS_DIR)/$(LIB_ZSTD) \
147+
$(LIB_ZSTD) \
82148
$(CLINK)
83149

84150
sig_anch: $(OUT_BIN_DIR)/sig_anch
@@ -99,15 +165,26 @@ $(OUT_BIN_DIR)/compactors: $(COMPACTORS_MAIN_DIR)/main.o \
99165
$(COMPACTORS_MAIN_DIR)/read_select.o \
100166
$(COMMON_DIR)/edit_distance.o \
101167
$(COMMON_DIR)/illumina_adapters_static.o \
168+
$(LIB_ZLIB) \
102169
$(SPLASH_LIBS_DIR)/cdflib/cdflib.o
170+
103171
-mkdir -p $(OUT_BIN_DIR)
104172
$(CC) -o $@ $^ \
105-
$(SPLASH_LIBS_DIR)/$(LIB_ZLIB) \
173+
$(LIB_ZLIB) \
106174
$(CLINK)
107175

176+
dsv_manip: $(OUT_BIN_DIR)/dsv_manip
177+
178+
$(OUT_BIN_DIR)/dsv_manip: $(DSV_MANIP_MAIN_DIR)/dsv_manip.o \
179+
$(DSV_MANIP_MAIN_DIR)/dsv_common.o \
180+
$(DSV_MANIP_MAIN_DIR)/limit_mode.o \
181+
$(DSV_MANIP_MAIN_DIR)/sort_mode.o
182+
-mkdir -p $(OUT_BIN_DIR)
183+
$(CC) -o $@ $^ \
184+
$(CLINK)
108185
download_kmc:
109186
-mkdir -p $(OUT_BIN_DIR)
110-
./download_kmc.sh $(OUT_BIN_DIR)
187+
./download_kmc.py $(OUT_BIN_DIR)
111188

112189
splash:
113190
-mkdir -p $(OUT_BIN_DIR)
@@ -130,6 +207,7 @@ uninstall:
130207
-rm $(PREFIX)/bin/splash
131208
-rm $(PREFIX)/bin/kmc
132209
-rm $(PREFIX)/bin/kmc_tools
210+
-rm $(PREFIX)/bin/dsv_manip
133211
-rm $(PREFIX)/bin/supervised_test.R
134212

135213
clean:
@@ -140,4 +218,6 @@ clean:
140218
-rm $(COMPACTORS_MAIN_DIR)/*.o
141219
-rm $(COMMON_DIR)/*.o
142220
-rm $(SIG_ANCH_MAIN_DIR)/*.o
221+
-rm $(DSV_MANIP_MAIN_DIR)/*.o
143222
-rm -rf $(OUT_BIN_DIR)
223+
-rm $(SPLASH_LIBS_DIR)/cdflib/*.o

build_docker.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import os
44

55
if __name__ == "__main__":
6+
build_release.run_cmd("git submodule init")
7+
build_release.run_cmd("git submodule update")
68
# https://stackoverflow.com/questions/15715825/how-do-you-get-the-git-repositorys-name-in-some-git-repository
79
repo = build_release.run_cmd_get_stdout("basename -s .git `git config --get remote.origin.url`").strip().lower()
810
with open("hash.git", "w") as f:

build_release.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,37 @@ def get_ver(splash_path):
2020
print("Error: cannot read SPLASH_VERSION")
2121
sys.exit(1)
2222

23-
def run_cmd(cmd):
23+
def get_os():
24+
if os.name == 'nt':
25+
return 'windows'
26+
elif os.name == 'posix':
27+
if os.uname()[0] == 'Linux':
28+
return 'linux'
29+
elif os.uname()[0] == 'Darwin':
30+
return 'mac'
31+
else:
32+
print("Error: unknown os", os.uname()[0])
33+
sys.exit(1)
34+
else:
35+
print("Error: unknown os.name", os.name)
36+
sys.exit(1)
37+
38+
def get_hardware():
39+
if os.name == 'nt':
40+
return 'x64' # TODO: do a real check and support ARM also...
41+
elif os.name == 'posix':
42+
if os.uname()[4] == 'x86_64':
43+
return 'x64'
44+
elif os.uname()[4] == 'aarch64' or os.uname()[4] == 'arm64':
45+
return 'arm64'
46+
else:
47+
print("Error: unknown hardware", os.uname()[4])
48+
sys.exit(1)
49+
else:
50+
print("Error: unknown os.name", os.name)
51+
sys.exit(1)
52+
53+
def run_cmd(cmd):
2454
p = subprocess.Popen(cmd, shell=True)
2555
p.communicate()
2656

@@ -29,6 +59,9 @@ def run_cmd_get_stdout(cmd):
2959
return p.stdout.decode('utf-8')
3060

3161
if __name__ == "__main__":
62+
system = get_os()
63+
hardware = get_hardware()
64+
3265
run_cmd("git submodule init")
3366
run_cmd("git submodule update")
3467
run_cmd("make clean")
@@ -51,5 +84,5 @@ def run_cmd_get_stdout(cmd):
5184

5285
ver = get_ver("bin/splash")
5386

54-
run_cmd(f"cd bin; tar -c * | pigz > ../splash-{ver}.linux.x64.tar.gz; cd ..;")
87+
run_cmd(f"cd bin; tar -c * | pigz > ../splash-{ver}.{system}.{hardware}.tar.gz; cd ..;")
5588
run_cmd("rm -rf bin")

download_kmc.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import sys
4+
import subprocess
5+
6+
bin_dir=sys.argv[1]
7+
8+
os.chdir(bin_dir)
9+
10+
if os.path.exists("kmc") and os.path.exists("kmc_tools"):
11+
sys.exit(0)
12+
13+
def get_os():
14+
if os.name == 'nt':
15+
return 'windows'
16+
elif os.name == 'posix':
17+
if os.uname()[0] == 'Linux':
18+
return 'linux'
19+
elif os.uname()[0] == 'Darwin':
20+
return 'mac'
21+
else:
22+
print("Error: unknown os", os.uname()[0])
23+
sys.exit(1)
24+
else:
25+
print("Error: unknown os.name", os.name)
26+
sys.exit(1)
27+
28+
def get_hardware():
29+
if os.name == 'nt':
30+
return 'x64' # TODO: do a real check and support ARM also...
31+
elif os.name == 'posix':
32+
if os.uname()[4] == 'x86_64':
33+
return 'x64'
34+
elif os.uname()[4] == 'aarch64' or os.uname()[4] == 'arm64':
35+
return 'arm64'
36+
else:
37+
print("Error: unknown hardware", os.uname()[4])
38+
sys.exit(1)
39+
else:
40+
print("Error: unknown os.name", os.name)
41+
sys.exit(1)
42+
43+
def run_cmd(cmd):
44+
p = subprocess.Popen(cmd, shell=True)
45+
p.communicate()
46+
47+
system = get_os()
48+
hardware = get_hardware()
49+
50+
tar_name=f"KMC3.2.2.{system}.{hardware}.tar.gz"
51+
URL=f"https://github.com/refresh-bio/KMC/releases/download/v3.2.2/{tar_name}"
52+
53+
run_cmd(f"wget {URL}")
54+
run_cmd(f"tar -xvf {tar_name}")
55+
run_cmd("mv bin/kmc .")
56+
run_cmd("mv bin/kmc_tools .")
57+
run_cmd("rm -rf bin")
58+
run_cmd("rm -rf include")
59+
run_cmd(f"rm {tar_name}")

download_kmc.sh

Lines changed: 0 additions & 15 deletions
This file was deleted.

libs/cloudflare-zlib

Submodule cloudflare-zlib added at 3cccc96

libs/cloudflare-zlib/libz.a

-149 KB
Binary file not shown.

0 commit comments

Comments
 (0)