feat: double precision for ML backends (#701)

lukasheinrich · web-flow · commit 73fd15e97e28 · 2019-12-22T16:30:11.000+01:00
* Add option to use double precision in ML backends
diff --git a/src/pyhf/cli/infer.py b/src/pyhf/cli/infer.py
@@ -66,11 +66,11 @@ def cls(
 
     # set the backend if not NumPy
     if backend in ['pytorch', 'torch']:
-        set_backend(tensor.pytorch_backend())
+        set_backend(tensor.pytorch_backend(float='float64'))
     elif backend in ['tensorflow', 'tf']:
         from tensorflow.compat.v1 import Session
 
-        set_backend(tensor.tensorflow_backend(session=Session()))
+        set_backend(tensor.tensorflow_backend(session=Session(), float='float64'))
     tensorlib, _ = get_backend()
 
     optconf = {k: v for item in optconf for k, v in item.items()}
diff --git a/src/pyhf/optimize/opt_tflow.py b/src/pyhf/optimize/opt_tflow.py
@@ -38,10 +38,10 @@ def setup_minimize(
         variable_bounds = [par_bounds[i] for i in variable_idx]
 
         data_placeholder = tf.placeholder(
-            tf.float32, (pdf.config.nmaindata + pdf.config.nauxdata,)
+            tensorlib.dtypemap['float'], (pdf.config.nmaindata + pdf.config.nauxdata,)
         )
         variable_pars_placeholder = tf.placeholder(
-            tf.float32, (pdf.config.npars - len(fixed_vals),)
+            tensorlib.dtypemap['float'], (pdf.config.npars - len(fixed_vals),)
         )
 
         tv = _TensorViewer([fixed_idx, variable_idx])
diff --git a/src/pyhf/tensor/numpy_backend.py b/src/pyhf/tensor/numpy_backend.py
@@ -1,3 +1,4 @@
+"""NumPy Tensor Library Module."""
 import numpy as np
 import logging
 from scipy.special import gammaln
diff --git a/src/pyhf/tensor/pytorch_backend.py b/src/pyhf/tensor/pytorch_backend.py
@@ -1,3 +1,4 @@
+"""PyTorch Tensor Library Module."""
 import torch
 import torch.autograd
 import logging
@@ -10,6 +11,11 @@ class pytorch_backend(object):
 
     def __init__(self, **kwargs):
         self.name = 'pytorch'
+        self.dtypemap = {
+            'float': getattr(torch, kwargs.get('float', 'float32')),
+            'int': getattr(torch, kwargs.get('float', 'int32')),
+            'bool': torch.bool,
+        }
 
     def clip(self, tensor_in, min_value, max_value):
         """
@@ -100,9 +106,8 @@ def astensor(self, tensor_in, dtype='float'):
         Returns:
             torch.Tensor: A multi-dimensional matrix containing elements of a single data type.
         """
-        dtypemap = {'float': torch.float, 'int': torch.int, 'bool': torch.bool}
         try:
-            dtype = dtypemap[dtype]
+            dtype = self.dtypemap[dtype]
         except KeyError:
             log.error('Invalid dtype: dtype must be float, int, or bool.')
             raise
@@ -141,10 +146,10 @@ def abs(self, tensor):
         return torch.abs(tensor)
 
     def ones(self, shape):
-        return torch.Tensor(torch.ones(shape))
+        return torch.ones(shape, dtype=self.dtypemap['float'])
 
     def zeros(self, shape):
-        return torch.Tensor(torch.zeros(shape))
+        return torch.zeros(shape, dtype=self.dtypemap['float'])
 
     def power(self, tensor_in_1, tensor_in_2):
         return torch.pow(tensor_in_1, tensor_in_2)
diff --git a/src/pyhf/tensor/tensorflow_backend.py b/src/pyhf/tensor/tensorflow_backend.py
@@ -1,3 +1,4 @@
+"""Tensorflow Tensor Library Module."""
 import logging
 import tensorflow as tf
 import tensorflow_probability as tfp
@@ -11,13 +12,17 @@ class tensorflow_backend(object):
     def __init__(self, **kwargs):
         self.session = kwargs.get('session')
         self.name = 'tensorflow'
+        self.dtypemap = {
+            'float': getattr(tf, kwargs.get('float', 'float32')),
+            'int': getattr(tf, kwargs.get('int', 'int32')),
+            'bool': tf.bool,
+        }
 
     def clip(self, tensor_in, min_value, max_value):
         """
         Clips (limits) the tensor values to be within a specified min and max.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -36,6 +41,7 @@ def clip(self, tensor_in, min_value, max_value):
 
         Returns:
             TensorFlow Tensor: A clipped `tensor`
+
         """
         if min_value is None:
             min_value = tf.reduce_min(tensor_in)
@@ -48,7 +54,6 @@ def tile(self, tensor_in, repeats):
         Repeat tensor data along a specific dimension
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -67,6 +72,7 @@ def tile(self, tensor_in, repeats):
 
         Returns:
             TensorFlow Tensor: The tensor with repeated axes
+
         """
         return tf.tile(tensor_in, repeats)
 
@@ -75,7 +81,6 @@ def conditional(self, predicate, true_callable, false_callable):
         Runs a callable conditional on the boolean value of the evaulation of a predicate
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -97,6 +102,7 @@ def conditional(self, predicate, true_callable, false_callable):
 
         Returns:
             TensorFlow Tensor: The output of the callable that was evaluated
+
         """
         return tf.cond(predicate, true_callable, false_callable)
 
@@ -157,10 +163,10 @@ def astensor(self, tensor_in, dtype='float'):
 
         Returns:
             `tf.Tensor`: A symbolic handle to one of the outputs of a `tf.Operation`.
+
         """
-        dtypemap = {'float': tf.float32, 'int': tf.int32, 'bool': tf.bool}
         try:
-            dtype = dtypemap[dtype]
+            dtype = self.dtypemap[dtype]
         except KeyError:
             log.error('Invalid dtype: dtype must be float, int, or bool.')
             raise
@@ -198,10 +204,10 @@ def abs(self, tensor):
         return tf.abs(tensor)
 
     def ones(self, shape):
-        return tf.ones(shape)
+        return tf.ones(shape, dtype=self.dtypemap['float'])
 
     def zeros(self, shape):
-        return tf.zeros(shape)
+        return tf.zeros(shape, dtype=self.dtypemap['float'])
 
     def power(self, tensor_in_1, tensor_in_2):
         return tf.pow(tensor_in_1, tensor_in_2)
@@ -249,7 +255,6 @@ def simple_broadcast(self, *args):
         Broadcast a sequence of 1 dimensional arrays.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -266,6 +271,7 @@ def simple_broadcast(self, *args):
 
         Returns:
             list of Tensors: The sequence broadcast together.
+
         """
         max_dim = max(map(lambda arg: arg.shape[0], args))
         try:
@@ -308,7 +314,6 @@ def poisson_logpdf(self, n, lam):
         at :code:`n` given the parameter :code:`lam`.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -343,7 +348,6 @@ def poisson(self, n, lam):
         at :code:`n` given the parameter :code:`lam`.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -378,7 +382,6 @@ def normal_logpdf(self, x, mu, sigma):
         of :code:`sigma`.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -414,7 +417,6 @@ def normal(self, x, mu, sigma):
         of :code:`sigma`.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -443,12 +445,11 @@ def normal(self, x, mu, sigma):
         normal = tfp.distributions.Normal(mu, sigma)
         return normal.prob(x)
 
-    def normal_cdf(self, x, mu=0, sigma=1):
+    def normal_cdf(self, x, mu=0.0, sigma=1):
         """
-        The cumulative distribution function for the Normal distribution
+        Compute the value of cumulative distribution function for the Normal distribution at x.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -472,15 +473,16 @@ def normal_cdf(self, x, mu=0, sigma=1):
         Returns:
             TensorFlow Tensor: The CDF
         """
-        normal = tfp.distributions.Normal(mu, sigma)
+        normal = tfp.distributions.Normal(
+            self.astensor(mu, dtype='float')[0], self.astensor(sigma, dtype='float')[0],
+        )
         return normal.cdf(x)
 
     def poisson_dist(self, rate):
         r"""
-        The Poisson distribution with rate parameter :code:`rate`.
+        Construct a Poisson distribution with rate parameter :code:`rate`.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
@@ -505,10 +507,9 @@ def poisson_dist(self, rate):
 
     def normal_dist(self, mu, sigma):
         r"""
-        The Normal distribution with mean :code:`mu` and standard deviation :code:`sigma`.
+        Construct a Normal distribution with mean :code:`mu` and standard deviation :code:`sigma`.
 
         Example:
-
             >>> import pyhf
             >>> import tensorflow as tf
             >>> sess = tf.compat.v1.Session()
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -44,13 +44,14 @@ def reset_backend():
     params=[
         (pyhf.tensor.numpy_backend(), None),
         (pyhf.tensor.pytorch_backend(), None),
+        (pyhf.tensor.pytorch_backend(float='float64', int='int64'), None),
         (pyhf.tensor.tensorflow_backend(session=tf.compat.v1.Session()), None),
         (
             pyhf.tensor.numpy_backend(poisson_from_normal=True),
             pyhf.optimize.minuit_optimizer(),
         ),
     ],
-    ids=['numpy', 'pytorch', 'tensorflow', 'numpy_minuit'],
+    ids=['numpy', 'pytorch', 'pytorch64', 'tensorflow', 'numpy_minuit'],
 )
 def backend(request):
     # a better way to get the id? all the backends we have so far for testing

Original file line number	Diff line number	Diff line change
`@@ -38,10 +38,10 @@ def setup_minimize(`
`38`	`38`	`variable_bounds = [par_bounds[i] for i in variable_idx]`
`39`	`39`
`40`	`40`	`data_placeholder = tf.placeholder(`
`41`		`- tf.float32, (pdf.config.nmaindata + pdf.config.nauxdata,)`
	`41`	`+ tensorlib.dtypemap['float'], (pdf.config.nmaindata + pdf.config.nauxdata,)`
`42`	`42`	`)`
`43`	`43`	`variable_pars_placeholder = tf.placeholder(`
`44`		`- tf.float32, (pdf.config.npars - len(fixed_vals),)`
	`44`	`+ tensorlib.dtypemap['float'], (pdf.config.npars - len(fixed_vals),)`
`45`	`45`	`)`
`46`	`46`
`47`	`47`	`tv = _TensorViewer([fixed_idx, variable_idx])`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+"""NumPy Tensor Library Module."""`
`1`	`2`	`import numpy as np`
`2`	`3`	`import logging`
`3`	`4`	`from scipy.special import gammaln`