Skip to content

Commit dcffce9

Browse files
committed
Merge pull request #22 from bashtage/dSFMT
FIX: Fix alignment to enable SSE2 for dSFMT
2 parents 8ecbf26 + 504e14f commit dcffce9

File tree

3 files changed

+41
-13
lines changed

3 files changed

+41
-13
lines changed

appveyor.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ build_script:
2424

2525
test_script:
2626
- cd ..
27-
- nosetests randomstate
27+
- python -c "import randomstate.prng.dsfmt as d;d.RandomState(1)"
28+
- nosetests -vv randomstate
2829

2930
on_success:
3031
- cd %GIT_DIR%\randomstate

randomstate/interface.pyx

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ except:
1111
import numpy as np
1212
cimport numpy as np
1313
cimport cython
14-
from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t
14+
from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t, int8_t,
15+
int16_t, int32_t, int64_t, intptr_t)
1516
from cpython cimport Py_INCREF
17+
from cpython.mem cimport PyMem_Malloc, PyMem_Free
1618

1719
import randomstate
1820
from binomial cimport binomial_t
@@ -129,29 +131,49 @@ cdef double kahan_sum(double *darr, np.npy_intp n):
129131
cdef class RandomState:
130132
CLASS_DOCSTRING
131133

134+
# cdef rng_t rng
135+
cdef void *rng_loc
132136
cdef binomial_t binomial_info
133-
cdef rng_t rng
134137
cdef aug_state rng_state
135138
cdef object lock
136139
poisson_lam_max = POISSON_LAM_MAX
137140
__MAXSIZE = <uint64_t>sys.maxsize
138141

139142
IF RNG_SEED==1:
140143
def __init__(self, seed=None):
141-
self.rng_state.rng = &self.rng
144+
IF RNG_MOD_NAME == 'dsfmt':
145+
cdef int8_t *iptr
146+
cdef int8_t offset = 0
147+
cdef intptr_t alignment = 0
148+
self.rng_loc = PyMem_Malloc(sizeof(rng_t))
149+
self.rng_state.rng = <rng_t *>self.rng_loc
150+
alignment = <intptr_t>(&(self.rng_state.rng.status[0].u32[0]))
151+
if (alignment % 16) != 0:
152+
iptr = <int8_t *>self.rng_state.rng
153+
offset = 16 - (alignment % 16)
154+
if offset < 0:
155+
offset += 16
156+
self.rng_state.rng = <rng_t *>(iptr + offset)
157+
ELSE:
158+
self.rng_loc = PyMem_Malloc(sizeof(rng_t))
159+
self.rng_state.rng = <rng_t *>self.rng_loc
160+
142161
self.rng_state.binomial = &self.binomial_info
143162
self._reset_state_variables()
144163
self.lock = Lock()
145164
self.seed(seed)
146165

147166
ELSE:
148167
def __init__(self, seed=None, inc=None):
149-
self.rng_state.rng = &self.rng
168+
self.rng_state.rng = <rng_t *>PyMem_Malloc(sizeof(rng_t)) # &self.rng
150169
self.rng_state.binomial = &self.binomial_info
151170
self._reset_state_variables()
152171
self.lock = Lock()
153172
self.seed(seed, inc)
154173

174+
def __dealloc__(self):
175+
PyMem_Free(self.rng_loc)
176+
155177
# Pickling support:
156178
def __getstate__(self):
157179
return self.get_state()

setup.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
mod_dir = './randomstate'
1414
configs = []
1515

16-
rngs = ['RNG_MLFG_1279_861', 'RNG_PCG32', 'RNG_PCG64', 'RNG_DSFMT',
17-
'RNG_MT19937', 'RNG_XORSHIFT128', 'RNG_XORSHIFT1024', 'RNG_MRG32K3A']
16+
rngs = ['RNG_DSFMT', 'RNG_MLFG_1279_861', 'RNG_PCG32', 'RNG_PCG64', 'RNG_MT19937',
17+
'RNG_XORSHIFT128', 'RNG_XORSHIFT1024', 'RNG_MRG32K3A']
1818

19-
compile_rngs = rngs[:]
19+
compile_rngs = rngs[:] # ['RNG_DSFMT'] # rngs[:]
2020

2121
extra_defs = []
2222
extra_link_args = ['Advapi32.lib', 'Kernel32.lib'] if os.name == 'nt' else []
23-
extra_compile_args = [] if os.name == 'nt' else ['-std=c99']
23+
base_extra_compile_args = [] if os.name == 'nt' else ['-std=c99']
2424

2525

2626
def write_config(file_name, config):
@@ -48,6 +48,7 @@ def write_config(file_name, config):
4848
join(mod_dir, 'src', 'common', 'entropy.c'),
4949
join(mod_dir, 'distributions.c')]
5050
include_dirs = base_include_dirs[:]
51+
extra_compile_args = base_extra_compile_args[:]
5152

5253
if rng == 'RNG_PCG32':
5354
sources += [join(mod_dir, 'src', 'pcg', 'pcg32.c')]
@@ -117,15 +118,19 @@ def write_config(file_name, config):
117118
sources += [join(mod_dir, 'shims', 'dSFMT', 'dSFMT-shim.c')]
118119
# TODO: HAVE_SSE2 should only be for platforms that have SSE2
119120
# TODO: But how to reliable detect?
120-
defs = [('DSFMT_RNG', '1'),('DSFMT_MEXP','19937')] # ('HAVE_SSE2', '1'),
121+
defs = [('DSFMT_RNG', '1'),('DSFMT_MEXP','19937')]
122+
defs += [('HAVE_SSE2', '1')]
123+
if os.name == 'nt':
124+
extra_compile_args = base_extra_compile_args + ['/arch:SSE2']
121125

122126
include_dirs += [join(mod_dir, 'src', 'dSFMT')]
123127

124128
config = {'file_name': file_name,
125129
'sources': sources,
126130
'include_dirs': include_dirs,
127131
'defs': defs,
128-
'flags': dict([(k, v) for k, v in flags.items()])
132+
'flags': dict([(k, v) for k, v in flags.items()]),
133+
'compile_args': extra_compile_args
129134
}
130135

131136
configs.append(config)
@@ -136,7 +141,7 @@ def write_config(file_name, config):
136141
join(mod_dir, 'src', 'common', 'entropy.c')],
137142
include_dirs=base_include_dirs,
138143
define_macros=extra_defs,
139-
extra_compile_args=extra_compile_args,
144+
extra_compile_args=base_extra_compile_args,
140145
extra_link_args=extra_link_args)]
141146

142147
for config in configs:
@@ -157,7 +162,7 @@ def write_config(file_name, config):
157162
sources=config['sources'],
158163
include_dirs=config['include_dirs'],
159164
define_macros=config['defs'] + extra_defs,
160-
extra_compile_args=extra_compile_args,
165+
extra_compile_args=config['compile_args'],
161166
extra_link_args=extra_link_args)
162167
extensions.append(ext)
163168

0 commit comments

Comments
 (0)