version increment to SLiM 4.1

bhaller · bhaller · commit 45b0e9ba1fd6 · 2023-12-04T23:04:59.000-05:00
diff --git a/EidosScribe/EidosScribe-Info.plist b/EidosScribe/EidosScribe-Info.plist
@@ -17,7 +17,7 @@
 	<key>CFBundlePackageType</key>
 	<string>APPL</string>
 	<key>CFBundleShortVersionString</key>
-	<string>3.0.1</string>
+	<string>3.1</string>
 	<key>CFBundleSignature</key>
 	<string>????</string>
 	<key>CFBundleVersion</key>
diff --git a/EidosScribe/EidosScribe_multi-Info.plist b/EidosScribe/EidosScribe_multi-Info.plist
@@ -17,7 +17,7 @@
 	<key>CFBundlePackageType</key>
 	<string>APPL</string>
 	<key>CFBundleShortVersionString</key>
-	<string>3.0.1</string>
+	<string>3.1</string>
 	<key>CFBundleSignature</key>
 	<string>????</string>
 	<key>CFBundleVersion</key>
diff --git a/PARALLEL b/PARALLEL
@@ -0,0 +1,86 @@
+This file contains unreleased/unannounced changes for parallelization, in addition to those listed in VERSIONS.  It will be merged into VERSIONS once parallel SLiM is released publicly.
+
+PARALLEL changes (now in the master branch, but disabled):
+	TO DO: the openmp branch had changes to travis.yml to test the openmp branch, which should be replicated somehow for GitHub Actions:
+		using macos xcode11.6/clang and linux bionic/gcc
+		on osx, "brew install libomp;" "ls -l /usr/local/opt/libomp/include/omp.h;" "ls -l /usr/local/include;"
+		note the paths for the headers/lib are likely different, so that will require a bit of legwork
+	updated SLiM.xcodeproj for OpenMP configuration:
+		duplicate the entire SLiM directory to SLiM_temp, open the SLiM.xcodeproj file in the copy, rename it in the Project Navigator to "SLiM_OpenMP", copy it back into the SLiM folder and delete the rest of the copy
+		use grep to find "SLiM_temp" inside files in the new SLiM_OpenMP.xcodeproj and fix them to refer to the correct directory (might be just one, or none)
+		add system header search path for targets eidos_multi and SLiM_multi, in Build Settings: /usr/local/include/ (non-recursive)
+		add library search path for targets eidos_multi and SLiM_multi, in Build Settings: /usr/local/lib/ (non-recursive)
+		add link to binary library, under "Build Phases" (Link Binary With Libraries) on targets eidos_multi and SLiM_multi, by dragging in /usr/local/lib/libomp.dylib
+		add to Other C Flags and Other C++ Flags for targets eidos_multi and SLiM_multi, in Build Settings: -Xclang and -fopenmp
+		disable library validation for targets eidos_multi and SLiM_multi, in Signing & Capabilities: https://developer.apple.com/documentation/bundleresources/entitlements/com_apple_security_cs_disable-library-validation
+		set Enable Index-While-Building Functionality to NO in Build Settings on targets eidos_multi and SLiM_multi
+		disable Link-Time Optimization, which appears to be incompatible with OpenMP
+		switched "Build Active Architecture Only" in Build Settings to YES for Release builds, for targets eidos_multi and SLiM_multi, because the older libomp.dylib builds do not have arm64 in them
+		see http://antonmenshov.com/2017/09/09/clang-openmp-setup-in-xcode/
+		added -Rpass="loop|vect" -Rpass-missed="loop|vect" -Rpass-analysis="loop|vect" to Other C++ Flags for eidos_multi, to aid in optimization work
+		added "-fno-math-errno" to the project-level Other C++ Flags, to match the main project
+		added $(OTHER_CPLUSPLUSFLAGS) to that build setting for slim_multi and eidos_multi, to inherit from the project's settings
+		require 10.13 or later for eidos_multi and slim_multi, since that's the macOS version that the R libomp packages target
+		added SLiMguiLegacy_multi and EidosScribe_multi targets/schemes to the project for debugging/testing purposes; not intended for end users!
+	modify CMakeLists.txt to add a "PARALLEL" flag that can be set to "ON" to enable OpenMP with the -fopenmp flag
+	modify CMakeLists.txt to add /usr/local/include to all targets (through GSL_INCLUDES, which should maybe get cleaned up somehow)
+	modify CMakeLists.txt to build executables eidos_multi and slim_multi when PARALLEL is ON, and to link in libomp
+	disable parallel builds of EidosScribe, SLiMgui, and SLiMguiLegacy with #error directives; parallelization is for the command line only (active threads peg the CPU, passive threads are slower than single-threaded)
+	add a command-line option, -maxThreads <n>, to control the maximum number of threads used in OpenMP
+	add a header eidos_openmp.h that should be used instead of #include "omp.h", providing various useful definitions etc.
+	add an initialization function for OpenMP, Eidos_WarmUpOpenMP(), that must be called when warming up; sets various options, logs diagnostic info
+	set default values for OMP_WAIT_POLICY (active) and OMP_PROC_BIND (false) and OMP_DYNAMIC (false) for slim and eidos
+	add THREAD_SAFETY_CHECK() convention for marking areas that are known not to be thread-safe, such as due to use of statics or globals
+	add gEidosMaxThreads global that has the max number of threads for the session (for pre-allocating per-thread data structures)
+	create per-thread SparseVector pools to allow high-scalability parallelization of spatial interactions
+	add parallel sorting code - quicksort and mergesort, rather experimental at this point, just early work
+	create per-thread random number generators to allow parallelization of code that involves RNG usage
+	parallelize dnorm(), rbinom(), rdunif(), rexp(), rnorm(), rpois(), runif()
+	parallelize tallying mutation refcounts, for the optimized fast case
+	parallelize fitness evaluation for mutations, when there are no mutationEffect() or fitnessEffect() callbacks
+	parallelize non-mutation-based fitness evaluation (i.e., only fitnessScaling values)
+	parallelize some minor tick cycle bookkeeping: incrementing individual ages, clearing the migrant flag
+	parallelize the viability/survival tick cycle stage's survival evaluation (without callbacks)
+	parallelize localPopulationDensity(), clippedIntegral(), and neighborCount()
+	add gEidosNumThreads global that has the currently requested number of threads (usually equal to gEidosMaxThreads)
+	add functions to Eidos to support parallel execution: parallelGetNumThreads(), parallelGetMaxThreads(), and parallelSetNumThreads()
+	add unit tests for parallelized functions in Eidos, comparing against the same functions non-parallel
+	add nowait clause on parallel for loops that would otherwise have a double barrier
+	shift to allocating per-thread RNGs in parallelized allocations, so "first touch" places each RNG in memory close to that thread's core
+	switch to dynamic scheduling for rbinom() and rdunif(), which seem to need it for unclear reasons
+	use a minimum chunk size of 16 for all dynamic schedules; problems that parallelize should always be at least that big, I imagine
+		the exception is countOfMutationsOfType() / sumOfMutationsOfType() and similar, because they do so much work per iteration that they can be worth going parallel even for two iterations
+		note that we never use schedule(guided) because its implementation in GCC is brain-dead; see https://stackoverflow.com/a/43047074/2752221
+	switch to OMP_PROC_BIND == true by default; keep threads with the data they're working on, recommended practice
+	parallelize more Eidos math functions: abs(float), ceil(), exp(float), floor(), log(float), log10(float), log2(float), round(), sqrt(float), trunc()
+	parallelize Eidos min/max functions: max(int), max(float), min(int), min(float), pmax(int), pmax(float), pmin(int), pmin(float)
+	parallelize match(), tabulate(), sample (int/float/object, replace=T, weights=NULL), mean(l/i/f)
+	parallelize Individual -relatedness(), -countOfMutationsOfType(), -setSpatialPosition(); Species -individualsWithPedigreeIDs()
+	parallelize Subpopulation -pointInBounds(), -pointReflected(), -pointStopped(), pointPeriodic(), pointUniform()
+	parallelize Genome -containsMarkerMutation()
+	change how RNG seeding works, for single-threaded and multi-threaded runs: use /dev/random to generate initial seeds, and for multi-threaded runs, use /dev/random for seeds for all threads beyond thread 0
+	parallelize Genome -countOfMutationsOfType(), Subpopulation -spatialMapValue() and -sampleIndividuals()
+	add unit tests for various SLiM methods that have been parallelized
+	parallelize InteractionType nearestNeighbors(), nearestInteractingNeighbors(), drawByStrength() by adding a returnDict parameter that lets the user get a Dictionary as a result, with one entry per receiver
+		their "receiver" parameter is no longer required to be singleton (when returnDict == T), and their return value is now just "object" since it can be Individual or Dictionary
+	overhaul the internal MutationRun design to allow it to be used multithreaded - get rid of refcounting, switch to usage tallying
+	parallelize offspring generation in WF models, when no callbacks (mateChoice(), modifyChild(), recombination(), mutation()) are active; no tree-seq, no cloning
+	parallelize offspring generation in WF model: add support for cloning
+	parallelize tallying of mutation runs, clearing of parental genomes at the end of WF model ticks, and mutation run uniquing
+	add support for parallel reproduction when a non-default stacking policy is in use
+	add support for parallel reproduction with tree-sequence recording
+	add parallel reproduction in nonWF models, with no callbacks (recombination(), mutation()) active - modifyChild() callbacks are legal but cannot access child genomes since they are deferred
+		this is achieved by passing defer=T to addCrossed(), addCloned(), addSelfed(), or addRecombinant()
+	thread-safety work - break in backward reproducibility for scripts that use a type 's' DFE, because the code path for that shifted
+	algorithm change for nearestNeighbors() and nearestNeighborsOfPoint(), when count is >1 and <N; the old algorithm was not thread-safe, and was inefficient; breaks backward reproducibility for the relevant case
+		the break in backward reproducibility is because the old algorithm returned the neighbors in a scrambled order, whereas the new algorithm returns them in sorted order; the order is not documented, so both are compliant
+	add max thread counts for each parallel loop, allowing flexible customization both by Eidos and by the user
+	tweak the semantics of parallelSetNumThreads(): an integer requests that exact number of threads, whereas NULL requests "up to" the max number of threads (the default behavior)
+		add gEidosNumThreadsOverride flag indicating whether the number of threads has been overridden explicitly
+	add parallelSetTaskThreadCounts() function to set per-task thread counts, and parallelGetTaskThreadCounts() to get them
+	add recipe 22.7, Parallelizing nonWF reproduction and spatial interactions
+	add internal EidosBenchmark facility for timing of internal loops, for benchmarking of parallelization scaling
+	add a command-line option, -perTaskThreads, to let the user control how many threads to use for different tasks (if not overridden)
+	parallelize sorting, where possible/efficient, add requisite per-task thread count keys
+	parallelize edge table sorting during simplification, add requisite per-task thread count keys
+
diff --git a/QtSLiM/QtSLiM.pro b/QtSLiM/QtSLiM.pro
@@ -18,7 +18,7 @@ QMAKE_INFO_PLIST = QtSLiM_Info.plist
 ICON = QtSLiM_AppIcon.icns
 QMAKE_TARGET_BUNDLE_PREFIX = "org.messerlab"
 QMAKE_BUNDLE = "SLiMgui"		# This governs the location of our prefs, which we keep under org.messerlab.SLiMgui
-VERSION = 4.0.1
+VERSION = 4.1
 
 docIconFiles.files = $$PWD/QtSLiM_DocIcon.icns
 docIconFiles.path = Contents/Resources
diff --git a/SLiM.xcodeproj/project.pbxproj b/SLiM.xcodeproj/project.pbxproj
@@ -1888,6 +1888,7 @@
 		985A11861BBA07CB009EE1FF /* eidos_object_pool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eidos_object_pool.h; sourceTree = "<group>"; };
 		985D1D892808B84F00461CFA /* sparse_vector.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = sparse_vector.cpp; sourceTree = "<group>"; };
 		985D1D8A2808B84F00461CFA /* sparse_vector.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparse_vector.h; sourceTree = "<group>"; };
+		985DCFD52B1ED7340025B0D5 /* PARALLEL */ = {isa = PBXFileReference; lastKnownFileType = text; path = PARALLEL; sourceTree = "<group>"; };
 		985F3EE924BA27EC00E712E0 /* slim_test_genetics.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = slim_test_genetics.cpp; sourceTree = "<group>"; };
 		985F3EEC24BA2A5D00E712E0 /* eidos_test_operators_other.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = eidos_test_operators_other.cpp; sourceTree = "<group>"; };
 		985F3EF124BA2A8C00E712E0 /* eidos_test_functions_other.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = eidos_test_functions_other.cpp; sourceTree = "<group>"; };
@@ -2417,6 +2418,7 @@
 		982663471A3BABD300A0CBBF = {
 			isa = PBXGroup;
 			children = (
+				985DCFD52B1ED7340025B0D5 /* PARALLEL */,
 				98E9A6B21A3CE299000AD4FC /* TO_DO */,
 				986021081A3BB504001BDCFE /* LICENSE */,
 				98AF84641CB307300030D1EB /* VERSIONS */,
diff --git a/SLiMgui/SLiMguiLegacy-Info.plist b/SLiMgui/SLiMguiLegacy-Info.plist
@@ -88,7 +88,7 @@
 	<key>CFBundlePackageType</key>
 	<string>APPL</string>
 	<key>CFBundleShortVersionString</key>
-	<string>4.0.1</string>
+	<string>4.1</string>
 	<key>CFBundleSignature</key>
 	<string>????</string>
 	<key>CFBundleVersion</key>
diff --git a/SLiMgui/SLiMguiLegacy_multi-Info.plist b/SLiMgui/SLiMguiLegacy_multi-Info.plist
@@ -88,7 +88,7 @@
 	<key>CFBundlePackageType</key>
 	<string>APPL</string>
 	<key>CFBundleShortVersionString</key>
-	<string>4.0.1</string>
+	<string>4.1</string>
 	<key>CFBundleSignature</key>
 	<string>????</string>
 	<key>CFBundleVersion</key>
diff --git a/VERSIONS b/VERSIONS
diff --git a/core/slim_globals.h b/core/slim_globals.h
diff --git a/eidos/eidos_globals.h b/eidos/eidos_globals.h