Skip to content

Commit 31d4204

Browse files
committed
Merge branch 'release/3.1'
2 parents bfc437b + 93493ca commit 31d4204

36 files changed

+1043
-965
lines changed

appveyor.yml renamed to .appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ environment:
2020
test_script:
2121
# - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
2222
- cd build
23-
- cmake . -DBUILD_PYTHON=OFF -DBUILD_NUMPY_SUPPORT=OFF
23+
- cmake . -DBUILD_PYTHON=OFF -DBUILD_NUMPY=OFF
2424
-DBOOST_ROOT="%BOOST_ROOT%" -DBoost_USE_STATIC_LIBS="ON"
2525
- cmake --build .
2626
- ctest -V

.travis.yml

Lines changed: 38 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -16,65 +16,52 @@ branches:
1616
- master
1717
- develop
1818

19-
# addons:
20-
# apt:
21-
# sources: deadsnakes
22-
# packages:
23-
# - python2.7
24-
# - python3.5
25-
# - python3-pip
26-
# - libpython2.7-dev
27-
# - libpython3.5-dev
28-
2919
matrix:
3020
include:
31-
- os: linux # minimum gcc
32-
env:
33-
CC=gcc CXX=g++ PYTHON_VERSION=2.7
34-
BUILD_PYTHON=OFF
35-
BUILD_NUMPY=OFF
36-
BUILD_SERIALIZATION=OFF
37-
- os: linux # maximum gcc
38-
env:
39-
CC=gcc CXX=g++ PYTHON_VERSION=2.7
40-
BUILD_PYTHON=ON
41-
BUILD_NUMPY=ON
42-
BUILD_SERIALIZATION=ON
43-
- os: linux # maximum gcc
44-
env:
45-
CC=gcc CXX=g++ PYTHON_VERSION=3.6
46-
BUILD_PYTHON=ON
47-
BUILD_NUMPY=OFF
48-
BUILD_SERIALIZATION=ON
49-
- os: linux # maximum clang
50-
env:
51-
CC=clang CXX=clang++
52-
BUILD_PYTHON=ON PYTHON_VERSION=2.7
53-
BUILD_NUMPY=ON
54-
BUILD_SERIALIZATION=ON
55-
- os: linux # coverage gcc
56-
env:
57-
CC=gcc CXX=g++ GCOV=gcov PYTHON_VERSION=2.7
58-
CMAKE_BUILD_TYPE=coverage
21+
- os: linux # gcc minimum
22+
env: PYVER=2.7 CC=gcc CXX=g++ PY=OFF NUMPY=OFF SERIAL=OFF
23+
- os: linux # gcc py27 w/o numpy
24+
env: PYVER=2.7 CC=gcc CXX=g++ PY=ON NUMPY=OFF SERIAL=ON
25+
- os: linux # gcc py27
26+
env: PYVER=2.7 CC=gcc CXX=g++ PY=ON NUMPY=ON SERIAL=ON
27+
- os: linux # gcc py36
28+
env: PYVER=3.6 CC=gcc CXX=g++ PY=ON NUMPY=ON SERIAL=ON
29+
- os: linux # clang py36
30+
env: PYVER=3.6 CC=clang CXX=clang++ PY=ON NUMPY=ON SERIAL=ON
31+
- os: linux # coverage py27
32+
env: PYVER=2.7 CC=gcc CXX=g++ GCOV=gcov
33+
- os: osx # minimum osx Xcode 8.3
34+
osx_image: xcode8.3
35+
env: PY=OFF NUMPY=OFF SERIAL=OFF
36+
allow_failures:
37+
- os: osx
5938

6039
git:
61-
depth: 1
40+
depth: 10
6241

63-
# Install packages (pre-installed: pytest numpy)
42+
# Install packages (pre-installed: pytest)
6443
install:
65-
- pyenv versions
66-
- pyenv global ${PYTHON_VERSION}
44+
- if [[ ${TRAVIS_OS_NAME} == "osx" ]]; then
45+
export PATH="/usr/local/opt/python/libexec/bin:$PATH";
46+
else
47+
pyenv versions;
48+
pyenv global ${PYVER};
49+
fi
50+
51+
- python --version
52+
- python build/get_python_include.py
53+
- python build/get_python_library.py
6754

68-
- pip install --user numpy
55+
- pip install --upgrade numpy # update numpy to avoid segfaults later
6956
- source build/travis_install_boost.sh
7057

71-
- if [ "${CMAKE_BUILD_TYPE}" = "coverage" ]; then
72-
pip install --user cpp-coveralls urllib3[secure];
58+
- if [ -n "$GCOV" ]; then
59+
pip install cpp-coveralls urllib3[secure];
7360
fi
7461

7562
script:
7663
- cd build
77-
- if [ "${CMAKE_BUILD_TYPE}" = "coverage" ]; then
64+
- if [ -n "$GCOV" ]; then
7865
cmake . -DBOOST_ROOT=${BOOST_DIR}
7966
-DBUILD_PYTHON=OFF
8067
-DBUILD_SERIALIZATION=ON
@@ -95,17 +82,17 @@ script:
9582
ctest;
9683
else
9784
cmake . -DBOOST_ROOT=${BOOST_DIR}
98-
-DBUILD_PYTHON=${BUILD_PYTHON}
99-
-DBUILD_NUMPY=${BUILD_NUMPY}
100-
-DBUILD_SERIALIZATION=${BUILD_SERIALIZATION}
101-
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} &&
85+
-DBUILD_PYTHON=${PY}
86+
-DBUILD_NUMPY=${NUMPY}
87+
-DBUILD_SERIALIZATION=${SERIAL}
88+
-DCMAKE_BUILD_TYPE=Debug &&
10289
make -j2 &&
10390
ctest -V;
10491
fi
10592

10693
# Calculate coverage
10794
after_success:
108-
if [ "${CMAKE_BUILD_TYPE}" = "coverage" ]; then
95+
if [ -n "$GCOV" ]; then
10996
coveralls -r .. -b . --verbose --exclude ${TRAVIS_BUILD_DIR}/deps --gcov=`which ${GCOV}` --gcov-options '\-lpbc';
11097
fi
11198

build/CMakeLists.txt

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
5050
endif()
5151

5252
if(TRACE_ALLOCS)
53-
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
54-
add_compile_options(/DBOOST_HISTOGRAM_TRACE_ALLOCS)
55-
else()
56-
add_compile_options(-DBOOST_HISTOGRAM_TRACE_ALLOCS)
57-
endif()
53+
add_definitions(-DBOOST_HISTOGRAM_TRACE_ALLOCS)
5854
endif()
5955

6056
if(BUILD_PYTHON)
@@ -109,9 +105,9 @@ else()
109105
# serialization only required for tests
110106
if (BUILD_SERIALIZATION)
111107
find_package(Boost ${MIN_BOOST_VERSION} REQUIRED serialization)
112-
add_definitions(-DHAVE_SERIALIZATION)
113108
else ()
114109
find_package(Boost ${MIN_BOOST_VERSION} REQUIRED)
110+
add_definitions(-DBOOST_HISTOGRAM_NO_SERIALIZATION)
115111
endif()
116112
set(LIBRARIES ${Boost_LIBRARIES})
117113
endif()
@@ -191,12 +187,6 @@ file(GLOB_RECURSE
191187
../test/*_test.cpp ../include/*.hpp
192188
)
193189

194-
add_custom_target(clf
195-
COMMAND clang-format
196-
-i
197-
${ALL_SOURCE_FILES}
198-
)
199-
200190
get_property(INCLUDE_DIRS DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
201191
set(TIDY_INCLUDE)
202192
foreach(x ${INCLUDE_DIRS})

build/get_python_library.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,37 @@
11
from distutils import sysconfig
2-
import os.path
2+
import os
33
import sys
4-
import glob
4+
from pprint import pprint
5+
from glob import glob
56
pj = os.path.join
67

7-
pyver = sysconfig.get_config_var('VERSION')
8-
getvar = sysconfig.get_config_var
8+
LIB_KEYS = ('LIBDEST', 'LIBDIR', 'LIBPL')
99

10-
libname = "python" + pyver
10+
if sys.platform == "darwin":
11+
so_ext = "dylib"
12+
elif sys.platform.startswith("linux"):
13+
so_ext = "so"
14+
else:
15+
so_ext = "dll"
1116

12-
for libvar in ('LIBDIR', 'LIBPL'):
13-
for ext in ('so', 'dylib', 'dll'):
14-
match = pj(getvar(libvar), "*" + libname + "*." + ext)
15-
lib = glob.glob(match)
16-
if lib:
17-
assert len(lib) == 1
18-
sys.stdout.write(lib[0])
19-
raise SystemExit
17+
config = sysconfig.get_config_vars()
18+
19+
library = "*python%s*%s" % (sysconfig.get_python_version(), so_ext)
20+
for libpath in LIB_KEYS:
21+
p = pj(config[libpath], library)
22+
cand = glob(p)
23+
if cand and len(cand) == 1:
24+
sys.stdout.write(cand[0])
25+
raise SystemExit
26+
27+
pprint("no library found, dumping library pattern, config, and directory contents:")
28+
pprint(library)
29+
pprint(config)
30+
31+
for libpath in LIB_KEYS:
32+
pprint(libpath)
33+
p = config[libpath]
34+
if os.path.exists(p):
35+
pprint(os.listdir(p))
36+
37+
raise SystemExit(1)

build/make_user_config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import sys
2+
from distutils import sysconfig
3+
4+
s = "using python : {version} : {prefix} : {inc} ;\n".format(
5+
version=sysconfig.get_python_version(),
6+
prefix=sysconfig.get_config_var("prefix"),
7+
inc=sysconfig.get_python_inc())
8+
9+
sys.stdout.write(s)

build/travis_install_boost.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,16 @@ if [[ -z "${TRAVIS_BUILD_DIR}" ]]; then
77
fi
88
PYVER=$(python -c 'import sys; sys.stdout.write("%i"%sys.version_info.major)')
99
BOOST_DIR=${TRAVIS_BUILD_DIR}/deps/boost-${BOOST_VERSION}-py${PYVER}
10+
PROJECT_DIR=$(pwd)
1011
echo "Boost: ${BOOST_DIR}"
1112
mkdir -p ${BOOST_DIR}
12-
BOOSTRAP_PATCH_REGEX="s|\( *using python.*\);|\1: $(python build/get_python_include.py) ;|"
1313
if [[ -z "$(ls -A ${BOOST_DIR})" ]]; then
1414
BOOST_URL="http://sourceforge.net/projects/boost/files/boost/${BOOST_VERSION}/boost_${BOOST_VERSION//\./_}.tar.gz"
1515
{ wget --quiet -O - ${BOOST_URL} | tar --strip-components=1 -xz -C ${BOOST_DIR}; } || exit 1
16-
(cd ${BOOST_DIR} && ./bootstrap.sh > /dev/null && \
17-
sed -i "${BOOSTRAP_PATCH_REGEX}" project-config.jam && \
18-
./b2 install --prefix=${BOOST_DIR} --with-serialization --with-iostreams --with-python | grep -v -e common\.copy -e common\.mkdir)
16+
( cd ${BOOST_DIR}
17+
./bootstrap.sh > /dev/null
18+
python ${PROJECT_DIR}/build/make_user_config.py > $HOME/user-config.jam
19+
cat $HOME/user-config.jam
20+
(./b2 install --prefix=${BOOST_DIR} --with-serialization --with-iostreams --with-python | grep -v -e common\.copy -e common\.mkdir) )
1921
fi
20-
ls ${BOOST_DIR}/lib | grep libboost
22+
ls ${BOOST_DIR}/lib | grep libboost || exit 1

doc/changelog.qbk

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@
22

33
[master]
44

5+
[heading 3.1 (not in boost)]
6+
7+
* Renamed `bincount` method to `size`
8+
* Support for axes with only overflow and no underflow bin
9+
* category axis now by default has bin for "other" input that does not fall
10+
into the predefined categories, making it consistent with other axes
11+
* NaN is now consistently put into overflow bin for all axes
12+
* Eliminated warnings about safe internal conversions on MSVC
13+
* Established a cpp house style with corresponding .clang-format file
14+
* Better detection of Python library on all systems
15+
* Improved code coverage by testing more input errors
16+
* Raise ValueError instead of generic RuntimeError in Python on input errors
17+
518
[heading 3.0 (not in boost)]
619

720
* Support for efficient adding of multiple histograms and scaling

doc/guide.qbk

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@ This guide covers the basic and more advanced usage of the library. It is design
44

55
[section Introduction]
66

7-
This library provides a templated [@https://en.wikipedia.org/wiki/Histogram histogram] class for multi-dimensional data. A histogram consists a number of non-overlapping cells in the data space, called *bins*. When a value tuple is passed to the histogram, the corresponding bin that envelopes the value tuple is found and a counter associated to the bin is incremented by one. Keeping the bin counts in memory for analysis requires fewer resources than keeping all the original value tuples around. If the bins are small enough[footnote What small enough means has to be decided case by case.], they still represent the original information in the data distribution. A histogram is therefore a useful lossy compression. It is also often used as a simple estimator for the [@https://en.wikipedia.org/wiki/Probability_density_function probability density function] of the input data. More complex density estimators exist, but histograms have the appeal that they are easy to reason about.
7+
This library provides a templated [@https://en.wikipedia.org/wiki/Histogram histogram] class for multi-dimensional data. A histogram consists a number of non-overlapping consecutive cells in data space, called *bins*. When a value is passed to the histogram, the corresponding bin that envelopes the value is found and an associated counter is incremented. In large data sets, keeping the bin counts in memory for analysis requires fewer resources than keeping the original value tuples. If the bins are small enough[footnote What small enough means has to be decided case by case.], they still represent the original information in the data distribution. A histogram is therefore a useful lossy compression. It is also often used as a simple estimator for the [@https://en.wikipedia.org/wiki/Probability_density_function probability density function] of the input data. More complex density estimators exist, but histograms are easy to reason about.
88

9-
Input for the histogram can be one- or multi-dimensional. In the multi-dimensional case, the input consist of tuples of values which belong together, describing different aspects of the same entity. A point in space is an example. You need three coordinate values to describe a point. The entity here is the point, and to fully characterize a point distribution in space you need three values and therefore a three-dimensional (3d) histogram.
9+
Input for the histogram can be one- or multi-dimensional. In the multi-dimensional case, the input consist of tuples of values which belong together, describing different aspects of the same entity. A point in space is an example. You need three coordinate values to describe a point. The entity here is the point, and to fully characterize a point distribution in space you need three values and therefore a three-dimensional (3d) histogram. The advantage of using a 3d histogram over three separate 1d histograms, one for each coordinate, is that the 3d histogram is able to capture more information. For example, you could have a point distribution that looks like a checker board in three dimensions (a checker cube): high and low densities are alternating along each coordinate. Then the 1d histograms for each separate coordinate would look like flat distributions, completely hiding the complex structure, while the 3d histogram would retain the structure for further analysis.
1010

11-
The advantage of using a 3d histogram over three separate 1d histograms, one for each coordinate, is that the 3d histogram is able to capture more information. For example, you could have a point distribution that looks like a checker board in three dimensions (a checker cube): high and low densities are alternating along each coordinate. Then the 1d histograms for each separate coordinate would look like flat distributions, completely hiding the complex structure, while the 3d histogram would retain the structure for further analysis.
12-
13-
The term /histogram/ is usually strictly used for something with bins over continuous data. The histogram class in this library generalize this concept. It can also process categorical variables and it even allows for non-consecutive bins. There is no restriction to numbers as input. Any type can be fed into the histogram, if there is a specialized axis object that maps values of this type to a bin index. The only remaining restriction is that bins are non-overlapping, since there must be a unique mapping from input value to bin.
11+
The term /histogram/ is usually strictly used for something with bins over discrete or continuous data. The histogram class can also process categorical variables and it even allows for non-consecutive bins if that is desired. There is no restriction to numbers as input. Any type can be fed into the histogram, if the user provides a specialized axis class that maps values of this type to a bin index. The only remaining restriction is that bins are non-overlapping, since there must be a unique mapping from input value to bin. The library is not able to automatically ensure this for user-provided axis classes, so the responsibily is on the implementer.
1412

1513
[endsect]
1614

@@ -42,7 +40,7 @@ When you work with dynamic histograms, you can also create a sequence of axes at
4240

4341
[funcref boost::histogram::make_static_histogram make_static_histogram] cannot handle this case because a static histogram can only be constructed when the number and types of all axes are known already at compile time. While strictly speaking that is also true in this example, you could have filled the vector also at run-time, based on run-time user input.
4442

45-
[note Memory for bin counters is allocated lazily, because if the default storage policy [classref boost::histogram::adaptive_storage adaptive_storage] is used. Allocation is deferred to the first time, when input values are passed to the histogram. Therefore memory allocation exceptions are not thrown when the histogram is created, but possibly later. This gives you a chance to check how much memory the histogram will allocate and possible give a warning if that amount is excessively large. Use the method `histogram::bincount()` to see how many bins your axis layout requires. At the first fill, that many bytes will be allocated. The allocated amount of memory may grow further later when the capacity of the bin counters needs to grow.]
43+
[note Memory for bin counters is allocated lazily, because if the default storage policy [classref boost::histogram::adaptive_storage adaptive_storage] is used. Allocation is deferred to the first time, when input values are passed to the histogram. Therefore memory allocation exceptions are not thrown when the histogram is created, but possibly later. This gives you a chance to check how much memory the histogram will allocate and possibly give a warning if that amount is excessively large. Use the method `histogram::size()` to see how many bins your axis layout requires. At the first fill, that many bytes will be allocated. The allocated amount of memory may grow further later when the capacity of the bin counters needs to grow.]
4644

4745
[endsect]
4846

@@ -70,7 +68,7 @@ By default, additional under- and overflow bins are added automatically for each
7068

7169
We use an [classref boost::histogram::axis::integer integer axis] here, because the input values are integers and we want one bin for each eye value.
7270

73-
[note The [classref boost::histogram::axis::circular circular axis] never creates under- and overflow bins. The highest bin wraps around to the lowest bin and vice versa, so there is no possibility for overflow. Similarly, the [classref boost::histogram::axis::category category axis] comes without under- and overflow bins, because these terms have no meaning for categorical variables.]
71+
[note The [classref boost::histogram::axis::circular circular axis] never creates under- and overflow bins. The highest bin wraps around to the lowest bin and vice versa, so there is no possibility for overflow. The [classref boost::histogram::axis::category category axis] comes only with an "overflow" bin, which counts all types of categorical input that was not recognized.]
7472

7573
[endsect]
7674

doc/rationale.qbk

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,15 @@ In a sense, [classref boost::histogram::adaptive_storage adaptive_storage] is th
8888

8989
[section:uoflow Under- and overflow bins]
9090

91-
Axis instances by default add extra bins that count values which fall below or above the range covered by the axis (for those types where that makes sense). These extra bins are called under- and overflow bins, respectively. The extra bins can be turned off individually for each axis to conserve memory, but it is generally recommended to keep them. The extra bins do not interfere with normal bin counting. On an axis with `n` bins, the first bin has the index `0`, the last bin `n-1`, while the under- and overflow bins are accessible at the indices `-1` and `n`, respectively.
91+
Axis instances by default add extra bins that count values which fall below or above the range covered by the axis (for those types where that makes sense). These extra bins are called under- and overflow bins, respectively. The extra bins can be turned off individually for each axis to conserve memory, but it is generally recommended to have them. The extra bins do not interfere with normal bin counting. On an axis with `n` bins, the first bin has the index `0`, the last bin `n-1`, while the under- and overflow bins are accessible at the indices `-1` and `n`, respectively.
9292

9393
Under- and overflow bins are useful in one-dimensional histograms, and nearly essential in multi-dimensional histograms. Here are the advantages:
9494

95-
* No loss: The total sum over all bin counts is strictly equal to the number of times `fill(...)` was called. Even NaN values are counted, they end up in the underflow bin by convention.
95+
* No loss: The total sum over all bin counts is strictly equal to the number of times the histogram was filled. Even NaN values are counted, they are put in the overflow-bin by convention.
9696

9797
* Diagnosis: Unexpected extreme values show up in the extra bins, which otherwise may be overlooked.
9898

99-
* Reducibility: In multi-dimensional histograms, an out-of-range value along one axis may be paired with an in-range value along another axis. If under- and overflow bins are missing, such a value pair is lost completely. If you apply a `reduce` operation on a histogram, which removes somes axes by resummation of the bin counts, this would lead to distortions of the histogram along the remaining axes. When under- and overflow bins are present, the `reduce` operation always produces the same sub-histogram that would have been obtained if it was filled from scratch with the original data.
99+
* Ability to reduce histograms: In multi-dimensional histograms, an out-of-range value along one axis may be paired with an in-range value along another axis. If under- and overflow bins are missing, such a value pair is lost completely. If you apply a `reduce` operation on a histogram, which removes somes axes by summing counts over that dimension, this would lead to distortions of the histogram along the remaining axes. When under- and overflow bins are present, the `reduce` operation always produces a sub-histogram identical to one obtained if it was filled from scratch with the original data.
100100

101101
[endsect]
102102

0 commit comments

Comments
 (0)