@@ -19,9 +19,13 @@ project(Tokenizers)
19
19
option (TOKENIZERS_BUILD_TEST "Build tests" OFF )
20
20
option (TOKENIZERS_BUILD_TOOLS "Build tools" OFF )
21
21
option (SUPPORT_REGEX_LOOKAHEAD
22
- "Support regex lookahead patterns (requires PCRE2)" OFF )
22
+ "Support regex lookahead patterns (requires PCRE2)" OFF
23
+ )
23
24
25
+ # Include CMakePackageConfigHelpers for configure_package_config_file
26
+ include (CMakePackageConfigHelpers )
24
27
include (Utils.cmake )
28
+
25
29
# Ignore weak attribute warning
26
30
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes" )
27
31
@@ -47,25 +51,31 @@ set(tokenizers_source_files
47
51
${CMAKE_CURRENT_SOURCE_DIR} /src/regex.cpp
48
52
${CMAKE_CURRENT_SOURCE_DIR} /src/sentencepiece.cpp
49
53
${CMAKE_CURRENT_SOURCE_DIR} /src/tiktoken.cpp
50
- ${CMAKE_CURRENT_SOURCE_DIR} /src/token_decoder.cpp )
54
+ ${CMAKE_CURRENT_SOURCE_DIR} /src/token_decoder.cpp
55
+ )
51
56
52
57
file (GLOB unicode_source_files
53
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/llama.cpp-unicode/src/*.cpp )
54
- add_library (tokenizers STATIC ${tokenizers_source_files}
55
- ${unicode_source_files} )
58
+ ${CMAKE_CURRENT_SOURCE_DIR} /third-party/llama.cpp-unicode/src/*.cpp
59
+ )
60
+ add_library (
61
+ tokenizers STATIC ${tokenizers_source_files} ${unicode_source_files}
62
+ )
56
63
57
64
# Using abseil from sentencepiece/third_party
58
65
target_include_directories (
59
66
tokenizers
60
- PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} /include
61
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/sentencepiece
62
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/sentencepiece/src
63
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/re2
64
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/json/single_include
65
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/llama.cpp-unicode/include )
67
+ PUBLIC
68
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /include>
69
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /third-party/sentencepiece>
70
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /third-party/sentencepiece/src>
71
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /third-party/re2>
72
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /third-party/json/single_include>
73
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /third-party/llama.cpp-unicode/include>
74
+ )
66
75
target_link_libraries (tokenizers PUBLIC sentencepiece-static re2::re2 )
67
76
68
- if (SUPPORT_REGEX_LOOKAHEAD OR TOKENIZERS_BUILD_TEST )
77
+ if (SUPPORT_REGEX_LOOKAHEAD )
78
+ set (PCRE2_STATIC_PIC ON )
69
79
set (PCRE2_BUILD_PCRE2_8 ON )
70
80
set (PCRE2_BUILD_PCRE2_16 OFF )
71
81
set (PCRE2_BUILD_PCRE2_32 OFF )
@@ -76,55 +86,85 @@ if(SUPPORT_REGEX_LOOKAHEAD OR TOKENIZERS_BUILD_TEST)
76
86
set (PCRE2_BUILD_DOCS OFF )
77
87
set (PCRE2_BUILD_LIBPCRE2_PDB OFF )
78
88
add_subdirectory (${CMAKE_CURRENT_SOURCE_DIR} /third-party/pcre2 )
89
+
90
+ # Set the INTERFACE_INCLUDE_DIRECTORIES property for pcre2-8-static
91
+ set_target_properties (
92
+ pcre2-8-static
93
+ PROPERTIES
94
+ INTERFACE_INCLUDE_DIRECTORIES
95
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /third-party/pcre2/src>
96
+ )
79
97
add_library (
80
98
regex_lookahead STATIC
81
99
${CMAKE_CURRENT_SOURCE_DIR} /src/pcre2_regex.cpp
82
100
${CMAKE_CURRENT_SOURCE_DIR} /src/regex_lookahead.cpp
83
- ${CMAKE_CURRENT_SOURCE_DIR} /src/std_regex.cpp )
101
+ ${CMAKE_CURRENT_SOURCE_DIR} /src/std_regex.cpp
102
+ )
84
103
target_link_libraries (regex_lookahead PUBLIC pcre2-8 )
85
104
target_include_directories (
86
- regex_lookahead PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} /include
87
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/pcre2/src )
88
- target_link_options_shared_lib (regex_lookahead )
105
+ regex_lookahead
106
+ PUBLIC $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /include>
107
+ $< BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR} /third-party/pcre2/src>
108
+ )
109
+ target_link_options_shared_lib (regex_lookahead )
89
110
target_link_libraries (tokenizers PUBLIC regex_lookahead )
90
- endif ()
91
-
92
- # Build test
93
- if (TOKENIZERS_BUILD_TEST )
94
- enable_testing ()
95
- include (FetchContent )
96
- # CMAKE
97
- FetchContent_Declare (
98
- googletest
99
- # Specify the commit you depend on and update it regularly.
100
- URL https://github.com/google/googletest/archive/5376968f6948923e2411081fd9372e71a59d8e77.zip
111
+ install (
112
+ TARGETS regex_lookahead pcre2-8-static
113
+ EXPORT tokenizers-targets
114
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
115
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
116
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
101
117
)
102
- set (gtest_force_shared_crt
103
- ON
104
- CACHE BOOL "" FORCE )
105
- FetchContent_MakeAvailable (googletest )
106
-
107
- file (GLOB test_source_files ${CMAKE_CURRENT_SOURCE_DIR} /test/test_*.cpp )
108
-
109
- set (test_env "RESOURCES_PATH=${CMAKE_CURRENT_SOURCE_DIR} /test/resources" )
110
- foreach (test_source_file ${test_source_files} )
111
- get_filename_component (test_name ${test_source_file} NAME_WE )
112
- message (STATUS "Configuring unit test ${test_name} " )
113
- add_executable (${test_name} ${test_source_file} )
114
- target_include_directories (
115
- ${test_name}
116
- PRIVATE GTEST_INCLUDE_PATH
117
- ${CMAKE_CURRENT_SOURCE_DIR} /include
118
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/sentencepiece
119
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/re2
120
- ${CMAKE_CURRENT_SOURCE_DIR} /third-party/json/single_include )
121
- target_link_libraries (${test_name} gtest_main GTest::gmock tokenizers )
122
- add_test (${test_name} "${test_name} " )
123
- set_tests_properties (${test_name} PROPERTIES ENVIRONMENT ${test_env} )
124
- endforeach ()
125
118
endif ()
126
119
127
120
# Build tools
128
121
if (TOKENIZERS_BUILD_TOOLS )
129
122
add_subdirectory (examples/tokenize_tool )
130
123
endif ()
124
+
125
+ # Installation rules
126
+ include (GNUInstallDirs )
127
+
128
+ # Install the library and its dependencies
129
+ install (
130
+ TARGETS tokenizers re2 sentencepiece-static
131
+ EXPORT tokenizers-targets
132
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
133
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
134
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
135
+ )
136
+
137
+ # Install header files
138
+ install (
139
+ DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} /include/
140
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
141
+ FILES_MATCHING
142
+ PATTERN "*.h"
143
+ )
144
+
145
+ install (
146
+ DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} /third-party/json/single_include/
147
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
148
+ FILES_MATCHING
149
+ PATTERN "*.hpp"
150
+ )
151
+
152
+ # Install the CMake config files
153
+ install (
154
+ EXPORT tokenizers-targets
155
+ FILE tokenizers-targets.cmake
156
+ NAMESPACE tokenizers::
157
+ DESTINATION ${CMAKE_INSTALL_LIBDIR} /cmake/tokenizers
158
+ )
159
+
160
+ # Generate and install the config file
161
+ configure_package_config_file (
162
+ ${CMAKE_CURRENT_SOURCE_DIR} /cmake/tokenizers-config.cmake.in
163
+ ${CMAKE_CURRENT_BINARY_DIR} /tokenizers-config.cmake
164
+ INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR} /cmake/tokenizers
165
+ PATH_VARS CMAKE_INSTALL_INCLUDEDIR
166
+ )
167
+
168
+ install (FILES ${CMAKE_CURRENT_BINARY_DIR} /tokenizers-config.cmake
169
+ DESTINATION ${CMAKE_INSTALL_LIBDIR} /cmake/tokenizers
170
+ )
0 commit comments