From c6f751d83db5d7311113b7f06c8dcdec2a384228 Mon Sep 17 00:00:00 2001
From: Shyam Sundar Sankaran <shyamss.1995@gmail.com>
Date: Fri, 22 Sep 2017 11:08:07 +0530
Subject: [PATCH 1/8] WIP Poisson solver debugging

---
 .../EM_fields_solver/electrostatic.py         |  2 +-
 .../test_compute_electrostatic_fields.py      | 55 +++++++++++-
 .../tests/test_fft_poisson.py                 | 86 ++++++++++++++++---
 3 files changed, 127 insertions(+), 16 deletions(-)

diff --git a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
index 3eb651c2..b23ac42d 100644
--- a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
+++ b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
@@ -152,7 +152,7 @@ def fft_poisson(self, performance_test_flag = False):
     else:
         N_g = self.N_ghost
         rho =   self.physical_system.params.charge_electron \
-              * self.compute_moments('density')[N_g:-N_g, N_g:-N_g]
+              * (self.compute_moments('density')[N_g:-N_g, N_g:-N_g])
 
         k_q1 = fftfreq(rho.shape[0], self.dq1)
         k_q2 = fftfreq(rho.shape[1], self.dq2)
diff --git a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
index 4cf9b2a2..e38b1198 100644
--- a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
+++ b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
@@ -18,6 +18,20 @@
 from bolt.lib.nonlinear_solver.EM_fields_solver.electrostatic \
     import compute_electrostatic_fields
 
+def compute_moments_sinusoidal(self, *args):
+    return (1 + af.sin(2 * np.pi * self.q1 + 4 * np.pi * self.q2))
+
+def compute_moments_gaussian(self, *args):
+    q2_minus = 0.25
+    q2_plus  = 0.75
+
+    regulator = 20  # larger value makes the transition sharper
+
+    rho = 1 + 0.5 * (  af.tanh(( self.q2 - q2_minus)*regulator) 
+                     - af.tanh(( self.q2 - q2_plus )*regulator)
+                    )
+
+    return(rho)
 
 class test(object):
     def __init__(self, N):
@@ -77,11 +91,9 @@ def __init__(self, N):
                                             stencil_type=1,
                                           )
 
-    def compute_moments(self, *args):
-        return (1 + af.sin(2 * np.pi * self.q1 + 4 * np.pi * self.q2))
-
+    compute_moments = compute_moments_sinusoidal
 
-def test_compute_electrostatic_fields():
+def test_compute_electrostatic_fields_1():
 
     error_E1 = np.zeros(5)
     error_E2 = np.zeros(5)
@@ -119,3 +131,38 @@ def test_compute_electrostatic_fields():
 
     assert (abs(poly_E1[0] + 2) < 0.2)
     assert (abs(poly_E2[0] + 2) < 0.2)
+
+def test_compute_electrostatic_fields_2():
+
+    error_E1 = np.zeros(5)
+    error_E2 = np.zeros(5)
+
+    N = 2**np.arange(5, 10)
+
+    for i in range(N.size):
+        obj = test(N[i])
+        compute_electrostatic_fields(obj)
+
+        E1_expected = 0
+
+        E2_expected = -0.5/20 * (  af.log(af.cosh(( obj.q2 - q2_minus)*20)) 
+                                 - af.log(af.cosh(( obj.q2 - q2_plus )*20))
+                                ) 
+
+        N_g = obj.N_ghost
+
+        error_E1[i] = af.sum(af.abs(  obj.E1[N_g:-N_g, N_g:-N_g]
+                                    - E1_expected[N_g:-N_g, N_g:-N_g]
+                                   )
+                            ) / (obj.E1[N_g:-N_g, N_g:-N_g].elements())
+
+        error_E2[i] = af.sum(af.abs(  obj.E2[N_g:-N_g, N_g:-N_g]
+                                    - E2_expected[N_g:-N_g, N_g:-N_g]
+                                   )
+                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
+
+    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
+    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
+
+    assert (abs(poly_E1[0] + 2) < 0.2)
+    assert (abs(poly_E2[0] + 2) < 0.2)
diff --git a/bolt/lib/nonlinear_solver/tests/test_fft_poisson.py b/bolt/lib/nonlinear_solver/tests/test_fft_poisson.py
index a9605ff0..30098b5a 100644
--- a/bolt/lib/nonlinear_solver/tests/test_fft_poisson.py
+++ b/bolt/lib/nonlinear_solver/tests/test_fft_poisson.py
@@ -12,11 +12,33 @@
 
 import numpy as np
 import arrayfire as af
+af.set_backend('cpu')
+import pylab as pl
 from petsc4py import PETSc
 
 from bolt.lib.nonlinear_solver.EM_fields_solver.electrostatic import fft_poisson
 from bolt.lib.nonlinear_solver.communicate import communicate_fields
 
+def compute_moments_sinusoidal(self, *args):
+    return (1 + af.sin(2 * np.pi * self.q1 + 4 * np.pi * self.q2))
+
+def compute_moments_hyperbolic(self, *args):
+    q2_minus = 0.25
+    q2_plus  = 0.75
+
+    regulator = 20 # larger value makes the transition sharper
+
+    rho = 1 + 0.5 * (  af.tanh(( self.q2 - q2_minus)*regulator) 
+                     - af.tanh(( self.q2 - q2_plus )*regulator)
+                    )
+
+    rho[:self.N_ghost]  = rho[-2*self.N_ghost:-self.N_ghost]
+    rho[-self.N_ghost:] = rho[self.N_ghost:2*self.N_ghost]
+
+    rho[:, :self.N_ghost]  = rho[:, -2*self.N_ghost:-self.N_ghost]
+    rho[:, -self.N_ghost:] = rho[:, self.N_ghost:2*self.N_ghost]
+    
+    return(rho)
 
 class test(object):
     def __init__(self):
@@ -85,26 +107,68 @@ def __init__(self):
         self._local_value_fields = self._da_fields.getVecArray(self._local_fields)
         self._glob_value_fields  = self._da_fields.getVecArray(self._glob_fields)
 
-
-    def compute_moments(self, *args):
-        return (af.sin(2 * np.pi * self.q1 + 4 * np.pi * self.q2))
-
     _communicate_fields = communicate_fields
-
+    compute_moments     = compute_moments_hyperbolic
 
 def test_fft_poisson():
     obj = test()
     fft_poisson(obj)
 
-    E1_expected = (0.1 / np.pi) * af.cos(  2 * np.pi * obj.q1
-                                         + 4 * np.pi * obj.q2
-                                        )
+    # E1_expected = (0.1 / np.pi) * af.cos(  2 * np.pi * obj.q1
+    #                                      + 4 * np.pi * obj.q2
+    #                                     )
+
+    # E2_expected = (0.2 / np.pi) * af.cos(  2 * np.pi * obj.q1
+    #                                      + 4 * np.pi * obj.q2
+    #                                     )
+
+    E1_expected = 0
 
-    E2_expected = (0.2 / np.pi) * af.cos(  2 * np.pi * obj.q1
-                                         + 4 * np.pi * obj.q2
-                                        )
+    E2_expected = -0.5/20 * (   af.log(af.cosh(( obj.q2 - 0.25)*20)) 
+                              - af.log(af.cosh(( obj.q2 - 0.75)*20))
+                            ) 
+
+    # E1_expected = 0
+
+    # E2_expected = -0.5/5 * 0.5 * (  af.exp(-10*( obj.q2 - 0.25)**2) 
+    #                               - af.exp(-10*( obj.q2 - 0.75)**2)
+    #                              )
+
+    # E1_expected = -0.5 * af.log(1+obj.q1+obj.q2)
+
+    # E2_expected = -0.5 * af.log(1+obj.q1+obj.q2) 
+
+    N_g = obj.N_ghost
+
+    pl.contourf(-np.array(obj.compute_moments('density') - 1)[N_g:-N_g, N_g:-N_g], 100)
+    pl.colorbar()
+    pl.show()
+    pl.clf()
+
+    pl.contourf(np.array(af.convolve2_separable(af.Array([0, 1, 0]), 
+                                                af.Array([1, 0, -1]),
+                                                E2_expected
+                                               )
+                        )[N_g:-N_g, N_g:-N_g]/(2*obj.dq2), 
+                100
+               )
+    
+    pl.colorbar()
+    pl.show()
+    pl.clf()
+
+    pl.contourf(np.array(E2_expected)[N_g:-N_g, N_g:-N_g], 100)
+    pl.colorbar()
+    pl.show()
+    pl.clf()
+
+    pl.contourf(np.array(obj.E2)[N_g:-N_g, N_g:-N_g], 100)
+    pl.colorbar()
+    pl.show()
 
     error_E1 = af.sum(af.abs(obj.E1 - E1_expected)) / (obj.E1.elements())
     error_E2 = af.sum(af.abs(obj.E2 - E2_expected)) / (obj.E2.elements())
 
     assert (error_E1 < 1e-14 and error_E2 < 1e-14)
+
+test_fft_poisson()
\ No newline at end of file

From e3ada3fd4ddc02e3dbc1803245809e80302757c4 Mon Sep 17 00:00:00 2001
From: Mani Chandra <manic@illinois.edu>
Date: Tue, 26 Sep 2017 20:09:39 +0530
Subject: [PATCH 2/8] 1) Added option to get ksp to read command line
 arguments. Needed to add additional headers in test files for petsc to read
 command line arguments. 2) test_compute_electrostatic_fields.py is in flux

Run with : python test_compute_electrostatic_fields.py -ksp_monitor
---
 .../EM_fields_solver/electrostatic.py                 |  1 +
 .../tests/test_compute_electrostatic_fields.py        | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
index b23ac42d..69edd214 100644
--- a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
+++ b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
@@ -90,6 +90,7 @@ def compute_electrostatic_fields(self, performance_test_flag = False):
                       )
            )
 
+    ksp.setFromOptions()
     ksp.solve(rho, phi)
 
     num_tries = 0
diff --git a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
index e38b1198..f7bc7234 100644
--- a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
+++ b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
@@ -13,8 +13,11 @@
 
 import numpy as np
 import arrayfire as af
+import petsc4py, sys
+petsc4py.init(sys.argv)
 from petsc4py import PETSc
 
+
 from bolt.lib.nonlinear_solver.EM_fields_solver.electrostatic \
     import compute_electrostatic_fields
 
@@ -143,7 +146,10 @@ def test_compute_electrostatic_fields_2():
         obj = test(N[i])
         compute_electrostatic_fields(obj)
 
-        E1_expected = 0
+        E1_expected = 0 * obj.q1
+        
+        q2_minus = 0.25
+        q2_plus  = 0.75
 
         E2_expected = -0.5/20 * (  af.log(af.cosh(( obj.q2 - q2_minus)*20)) 
                                  - af.log(af.cosh(( obj.q2 - q2_plus )*20))
@@ -166,3 +172,6 @@ def test_compute_electrostatic_fields_2():
 
     assert (abs(poly_E1[0] + 2) < 0.2)
     assert (abs(poly_E2[0] + 2) < 0.2)
+
+
+test_compute_electrostatic_fields_2()

From 3a7d3534d841ab473176505fc95af4687505e67b Mon Sep 17 00:00:00 2001
From: Mani Chandra <manic@illinois.edu>
Date: Wed, 27 Sep 2017 15:00:26 +0530
Subject: [PATCH 3/8] * Using SNES to solve Poisson equation * First attempt:
 managed to solve x^2 - 2 = 0 over [63, 63] grid using SNES

---
 .../EM_fields_solver/electrostatic.py         | 196 +++++++++---------
 bolt/lib/nonlinear_solver/nonlinear_solver.py |  26 ++-
 .../test_compute_electrostatic_fields.py      |  77 +++----
 3 files changed, 154 insertions(+), 145 deletions(-)

diff --git a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
index 69edd214..8c3146d3 100644
--- a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
+++ b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
@@ -7,7 +7,7 @@
 from numpy.fft import fftfreq
 
 
-class Poisson2D(object):
+class poisson_eqn(object):
     """
     This user class is an application context for the problem at hand;
     It contains some parametes and frames the matrix system depending on
@@ -16,38 +16,21 @@ class Poisson2D(object):
     using the PETSc's KSP solver methods
     """
 
-    def __init__(self, obj):
-        self.da     = obj._da_ksp
-        self.obj    = obj
-        self.localX = self.da.createLocalVec()
+    def __init__(self, nonlinear_solver_obj):
+        self.da        = nonlinear_solver_obj._da_snes
+        self.obj       = nonlinear_solver_obj
+        self.local_phi = self.da.createLocalVec() # phi with ghost zones
 
-    def RHS(self, rho, rho_array):
-        rho_val    = self.da.getVecArray(rho)
-        rho_val[:] = rho_array
+    def compute_residual(self, snes, phi, residual):
 
-    def mult(self, mat, X, Y):
-
-        self.da.globalToLocal(X, self.localX)
-
-        x = self.da.getVecArray(self.localX)
-        y = self.da.getVecArray(Y)
-
-        (q1_start, q1_end), (q2_start, q2_end) = self.da.getRanges()
-
-        for j in range(q1_start, q1_end):
-            for i in range(q2_start, q2_end):
-                u = x[j, i]  # center
-
-                u_w = x[j, i - 1]  # west
-                u_e = x[j, i + 1]  # east
-                u_s = x[j - 1, i]  # south
-                u_n = x[j + 1, i]  # north
-
-                u_xx = (-u_e + 2 * u - u_w) / self.obj.dq2**2
-                u_yy = (-u_n + 2 * u - u_s) / self.obj.dq1**2
-
-                y[j, i] = u_xx + u_yy
+        #self.da.globalToLocal(phi, self.local_phi)
 
+        #phi_array      = self.local_phi.getArray(readonly=1)
+        phi_array      = phi.getArray(readonly=1)
+        residual_array = residual.getArray(readonly=0)
+    
+        residual_array[:] = phi_array**2. - 2.
+        return
 
 def compute_electrostatic_fields(self, performance_test_flag = False):
     
@@ -57,76 +40,89 @@ def compute_electrostatic_fields(self, performance_test_flag = False):
     # Obtaining the left-bottom corner coordinates
     # (lowest values of the canonical coordinates in the local zone)
     # Additionally, we also obtain the size of the local zone
-    ((i_q1_lowest, i_q2_lowest), (N_q1_local,N_q2_local)) = self._da_ksp.getCorners()
-
-    pde = Poisson2D(self)
-    phi = self._da_ksp.createGlobalVec()
-    rho = self._da_ksp.createGlobalVec()
-
-    phi_local = self._da_ksp.createLocalVec()
-
-    A = PETSc.Mat().createPython([phi.getSizes(), rho.getSizes()],
-                                 comm=self._da_ksp.comm
-                                )
-    A.setPythonContext(pde)
-    A.setUp()
-
-    ksp = PETSc.KSP().create()
-
-    ksp.setOperators(A)
-    ksp.setType('cg')
-
-    pc = ksp.getPC()
-    pc.setType('none')
-
-    N_g = self.N_ghost
-    ksp.setTolerances(atol=1e-7)
-    pde.RHS(rho,
-              self.physical_system.params.charge_electron
-            * np.array(self.compute_moments('density')[N_g:-N_g,
-                                                       N_g:-N_g
-                                                      ]
-                       - 1
-                      )
-           )
-
-    ksp.setFromOptions()
-    ksp.solve(rho, phi)
-
-    num_tries = 0
-    while(ksp.converged is not True):
-        
-        ksp.setTolerances(atol = 10**(-6+num_tries), rtol = 10**(-6+num_tries))
-        ksp.solve(rho, phi)
-        num_tries += 1
-        
-        if(num_tries == 5):
-            raise Exception('KSP solver diverging!')
-
-    self._da_ksp.globalToLocal(phi, phi_local)
-
-    # Since rho was defined at (i + 0.5, j + 0.5)
-    # Electric Potential returned will also be at (i + 0.5, j + 0.5)
-    electric_potential = af.to_array(np.swapaxes(phi_local[:].
-                                                 reshape(  N_q2_local
-                                                         + 2 * self.N_ghost,
-                                                           N_q1_local +
-                                                         + 2 * self.N_ghost
-                                                        ),
-                                                 0, 1
-                                                )
-                                    )
-
-    # Obtaining the values at (i+0.5, j+0.5):
-    self.E1 = -(  af.shift(electric_potential, -1)
-                - af.shift(electric_potential,  1)
-               ) / (2 * self.dq1)
-
-    self.E2 = -(  af.shift(electric_potential, 0, -1)
-                - af.shift(electric_potential, 0,  1)
-               ) / (2 * self.dq2)
-
-    af.eval(self.E1, self.E2)
+    ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_snes.getCorners()
+
+    snes = PETSc.SNES().create()
+    pde  = poisson_eqn(self)
+    snes.setFunction(pde.compute_residual, self.glob_residual)
+    
+    snes.setDM(self._da_snes)
+    snes.setFromOptions()
+    snes.solve(None, self.glob_phi)
+    
+    phi_array = self.glob_phi.getArray()
+    print("phi = ", phi_array)
+
+    
+
+#    pde = Poisson2D(self)
+#    phi = self._da_ksp.createGlobalVec()
+#    rho = self._da_ksp.createGlobalVec()
+#
+#    phi_local = self._da_ksp.createLocalVec()
+#
+#    A = PETSc.Mat().createPython([phi.getSizes(), rho.getSizes()],
+#                                 comm=self._da_ksp.comm
+#                                )
+#    A.setPythonContext(pde)
+#    A.setUp()
+#
+#    ksp = PETSc.KSP().create()
+#
+#    ksp.setOperators(A)
+#    ksp.setType('cg')
+#
+#    pc = ksp.getPC()
+#    pc.setType('none')
+#
+#    N_g = self.N_ghost
+#    ksp.setTolerances(atol=1e-7)
+#    pde.RHS(rho,
+#              self.physical_system.params.charge_electron
+#            * np.array(self.compute_moments('density')[N_g:-N_g,
+#                                                       N_g:-N_g
+#                                                      ]
+#                       - 1
+#                      )
+#           )
+#
+#    ksp.setFromOptions()
+#    ksp.solve(rho, phi)
+#
+#    num_tries = 0
+#    while(ksp.converged is not True):
+#        
+#        ksp.setTolerances(atol = 10**(-6+num_tries), rtol = 10**(-6+num_tries))
+#        ksp.solve(rho, phi)
+#        num_tries += 1
+#        
+#        if(num_tries == 5):
+#            raise Exception('KSP solver diverging!')
+#
+#    self._da_ksp.globalToLocal(phi, phi_local)
+
+#    # Since rho was defined at (i + 0.5, j + 0.5)
+#    # Electric Potential returned will also be at (i + 0.5, j + 0.5)
+#    electric_potential = af.to_array(np.swapaxes(phi_local[:].
+#                                                 reshape(  N_q2_local
+#                                                         + 2 * self.N_ghost,
+#                                                           N_q1_local +
+#                                                         + 2 * self.N_ghost
+#                                                        ),
+#                                                 0, 1
+#                                                )
+#                                    )
+#
+#    # Obtaining the values at (i+0.5, j+0.5):
+#    self.E1 = -(  af.shift(electric_potential, -1)
+#                - af.shift(electric_potential,  1)
+#               ) / (2 * self.dq1)
+#
+#    self.E2 = -(  af.shift(electric_potential, 0, -1)
+#                - af.shift(electric_potential, 0,  1)
+#               ) / (2 * self.dq2)
+#
+#    af.eval(self.E1, self.E2)
 
     if(performance_test_flag == True):
         af.sync()
diff --git a/bolt/lib/nonlinear_solver/nonlinear_solver.py b/bolt/lib/nonlinear_solver/nonlinear_solver.py
index 677ff0ed..dc6a5871 100644
--- a/bolt/lib/nonlinear_solver/nonlinear_solver.py
+++ b/bolt/lib/nonlinear_solver/nonlinear_solver.py
@@ -143,16 +143,16 @@ def __init__(self, physical_system):
 
         # Additionally, a DA object also needs to be created for the KSP solver
         # with a DOF of 1:
-        self._da_ksp = PETSc.DMDA().create([self.N_q1, self.N_q2],
-                                            stencil_width = self.N_ghost,
-                                            boundary_type = (self.bc_in_q1,
-                                                             self.bc_in_q2
-                                                            ),
-                                            proc_sizes    = (PETSc.DECIDE,
-                                                             PETSc.DECIDE
-                                                            ),
-                                            stencil_type  = 1,
-                                            comm          = self._comm)
+        self._da_snes = PETSc.DMDA().create([self.N_q1, self.N_q2],
+                                             stencil_width = self.N_ghost,
+                                             boundary_type = (self.bc_in_q1,
+                                                              self.bc_in_q2
+                                                             ),
+                                             proc_sizes    = (PETSc.DECIDE,
+                                                              PETSc.DECIDE
+                                                             ),
+                                             stencil_type  = 1,
+                                             comm          = self._comm)
 
         self._da_dump_moments = PETSc.DMDA().create([self.N_q1, self.N_q2],
                                                     dof        = len(self.
@@ -174,6 +174,12 @@ def __init__(self, physical_system):
         # the communication routines for EM fields
         self._glob_fields  = self._da_fields.createGlobalVec()
         self._local_fields = self._da_fields.createLocalVec()
+    
+        self.glob_phi      = self._da_snes.createGlobalVec()
+        self.glob_residual = self._da_snes.createGlobalVec()
+        self.glob_phi.set(0.)
+        self.glob_residual.set(0.)
+
 
         # The following vector is used to dump the data to file:
         self._glob_moments = self._da_dump_moments.createGlobalVec()
diff --git a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
index f7bc7234..167dec30 100644
--- a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
+++ b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
@@ -57,7 +57,8 @@ def __init__(self, N):
         self.dq1 = (self.q1_end - self.q1_start) / self.N_q1
         self.dq2 = (self.q2_end - self.q2_start) / self.N_q2
 
-        self.N_ghost = np.random.randint(3, 5)
+        #self.N_ghost = np.random.randint(3, 5)
+        self.N_ghost = 3
 
         self.q1 = self.q1_start \
                   + (0.5 + np.arange(-self.N_ghost,
@@ -87,12 +88,15 @@ def __init__(self, N):
 
         self._comm = PETSc.COMM_WORLD.tompi4py()
 
-        self._da_ksp = PETSc.DMDA().create([self.N_q1, self.N_q2],
+        self._da_snes = PETSc.DMDA().create([self.N_q1, self.N_q2],
                                             stencil_width=self.N_ghost,
                                             boundary_type=('periodic',
                                                            'periodic'),
                                             stencil_type=1,
-                                          )
+                                           )
+        self.glob_residual = self._da_snes.createGlobalVec()
+        self.glob_phi  = self._da_snes.createGlobalVec()
+        self.glob_phi.set(0.)
 
     compute_moments = compute_moments_sinusoidal
 
@@ -140,38 +144,41 @@ def test_compute_electrostatic_fields_2():
     error_E1 = np.zeros(5)
     error_E2 = np.zeros(5)
 
-    N = 2**np.arange(5, 10)
-
-    for i in range(N.size):
-        obj = test(N[i])
-        compute_electrostatic_fields(obj)
-
-        E1_expected = 0 * obj.q1
-        
-        q2_minus = 0.25
-        q2_plus  = 0.75
-
-        E2_expected = -0.5/20 * (  af.log(af.cosh(( obj.q2 - q2_minus)*20)) 
-                                 - af.log(af.cosh(( obj.q2 - q2_plus )*20))
-                                ) 
-
-        N_g = obj.N_ghost
-
-        error_E1[i] = af.sum(af.abs(  obj.E1[N_g:-N_g, N_g:-N_g]
-                                    - E1_expected[N_g:-N_g, N_g:-N_g]
-                                   )
-                            ) / (obj.E1[N_g:-N_g, N_g:-N_g].elements())
-
-        error_E2[i] = af.sum(af.abs(  obj.E2[N_g:-N_g, N_g:-N_g]
-                                    - E2_expected[N_g:-N_g, N_g:-N_g]
-                                   )
-                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
-
-    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
-    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
-
-    assert (abs(poly_E1[0] + 2) < 0.2)
-    assert (abs(poly_E2[0] + 2) < 0.2)
+    obj = test(49)
+    compute_electrostatic_fields(obj)
+
+#    N = 2**np.arange(5, 10)
+#
+#    for i in range(N.size):
+#        obj = test(N[i])
+#        compute_electrostatic_fields(obj)
+#
+#        E1_expected = 0 * obj.q1
+#        
+#        q2_minus = 0.25
+#        q2_plus  = 0.75
+#
+#        E2_expected = -0.5/20 * (  af.log(af.cosh(( obj.q2 - q2_minus)*20)) 
+#                                 - af.log(af.cosh(( obj.q2 - q2_plus )*20))
+#                                ) 
+#
+#        N_g = obj.N_ghost
+#
+#        error_E1[i] = af.sum(af.abs(  obj.E1[N_g:-N_g, N_g:-N_g]
+#                                    - E1_expected[N_g:-N_g, N_g:-N_g]
+#                                   )
+#                            ) / (obj.E1[N_g:-N_g, N_g:-N_g].elements())
+#
+#        error_E2[i] = af.sum(af.abs(  obj.E2[N_g:-N_g, N_g:-N_g]
+#                                    - E2_expected[N_g:-N_g, N_g:-N_g]
+#                                   )
+#                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
+#
+#    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
+#    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
+#
+#    assert (abs(poly_E1[0] + 2) < 0.2)
+#    assert (abs(poly_E2[0] + 2) < 0.2)
 
 
 test_compute_electrostatic_fields_2()

From e18df04cbcf035702352533de0b702b6197d7c4a Mon Sep 17 00:00:00 2001
From: Mani Chandra <manic@illinois.edu>
Date: Fri, 29 Sep 2017 03:32:06 +0530
Subject: [PATCH 4/8] Got residual assembly for f(x) = x^2 - 2. over [N_q1,
 N_q2] working with both numpy and arrarfire arrays

---
 .../EM_fields_solver/electrostatic.py         |  46 +++++++-
 bolt/lib/nonlinear_solver/nonlinear_solver.py |   1 +
 .../test_compute_electrostatic_fields.py      | 104 +++++++++++-------
 3 files changed, 104 insertions(+), 47 deletions(-)

diff --git a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
index 8c3146d3..d78509b6 100644
--- a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
+++ b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
@@ -5,6 +5,7 @@
 import arrayfire as af
 import numpy as np
 from numpy.fft import fftfreq
+import sys
 
 
 class poisson_eqn(object):
@@ -20,16 +21,51 @@ def __init__(self, nonlinear_solver_obj):
         self.da        = nonlinear_solver_obj._da_snes
         self.obj       = nonlinear_solver_obj
         self.local_phi = self.da.createLocalVec() # phi with ghost zones
+        self.N_ghost   = self.obj.N_ghost
+
+        ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = \
+                self.da.getCorners()
+        
+        self.N_q1_local = N_q1_local
+        self.N_q2_local = N_q2_local
 
     def compute_residual(self, snes, phi, residual):
 
-        #self.da.globalToLocal(phi, self.local_phi)
+        self.da.globalToLocal(phi, self.local_phi)
+
+        N_g = self.N_ghost
+
+	# Residual assembly using numpy
+        phi_array = self.local_phi.getArray(readonly=1)
+        phi_array = phi_array.reshape([self.N_q2_local + 2*N_g, \
+                                       self.N_q1_local + 2*N_g, 1], \
+                                      order='A'
+                                     )
 
-        #phi_array      = self.local_phi.getArray(readonly=1)
-        phi_array      = phi.getArray(readonly=1)
         residual_array = residual.getArray(readonly=0)
-    
-        residual_array[:] = phi_array**2. - 2.
+        residual_array = residual_array.reshape([self.N_q2_local, \
+                                                 self.N_q1_local, 1], \
+                                                order='A'
+                                               )
+
+        residual_array[:, :, :] = phi_array[N_g:-N_g, N_g:-N_g, :]**2. - 2.
+
+	# Residual assembly using arrayfire
+#        phi_array    = self.local_phi.getArray(readonly=1)
+#        phi_af_array = af.to_array(phi_array)
+#        phi_af_array = af.moddims(phi_af_array,
+#                                  self.N_q1_local + 2*N_g,
+#                                  self.N_q2_local + 2*N_g
+#                                 )
+#
+#        residual_af_array = phi_af_array[N_g:-N_g, N_g:-N_g]**2. - 2.
+#        residual_af_array = af.moddims(residual_af_array,
+#                                         self.N_q1_local
+#                                       * self.N_q2_local
+#                                      )
+#        residual_array    = residual.getArray(readonly=0)
+#        residual_array[:] = residual_af_array.to_ndarray()
+
         return
 
 def compute_electrostatic_fields(self, performance_test_flag = False):
diff --git a/bolt/lib/nonlinear_solver/nonlinear_solver.py b/bolt/lib/nonlinear_solver/nonlinear_solver.py
index dc6a5871..c95f50cf 100644
--- a/bolt/lib/nonlinear_solver/nonlinear_solver.py
+++ b/bolt/lib/nonlinear_solver/nonlinear_solver.py
@@ -152,6 +152,7 @@ def __init__(self, physical_system):
                                                               PETSc.DECIDE
                                                              ),
                                              stencil_type  = 1,
+                                             dof = 1,
                                              comm          = self._comm)
 
         self._da_dump_moments = PETSc.DMDA().create([self.N_q1, self.N_q2],
diff --git a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
index 167dec30..d679ff63 100644
--- a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
+++ b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
@@ -37,7 +37,7 @@ def compute_moments_gaussian(self, *args):
     return(rho)
 
 class test(object):
-    def __init__(self, N):
+    def __init__(self, N_q1, N_q2):
         # Creating an object with necessary attributes:
         self.physical_system = type('obj', (object, ),
                                     {'params': type('obj', (object, ),
@@ -51,8 +51,8 @@ def __init__(self, N):
         self.q1_end = 1
         self.q2_end = 1
 
-        self.N_q1 = N
-        self.N_q2 = N
+        self.N_q1 = N_q1
+        self.N_q2 = N_q2
 
         self.dq1 = (self.q1_end - self.q1_start) / self.N_q1
         self.dq2 = (self.q2_end - self.q2_start) / self.N_q2
@@ -93,59 +93,79 @@ def __init__(self, N):
                                             boundary_type=('periodic',
                                                            'periodic'),
                                             stencil_type=1,
+                                            dof = 1
                                            )
         self.glob_residual = self._da_snes.createGlobalVec()
         self.glob_phi  = self._da_snes.createGlobalVec()
         self.glob_phi.set(0.)
 
-    compute_moments = compute_moments_sinusoidal
-
-def test_compute_electrostatic_fields_1():
-
-    error_E1 = np.zeros(5)
-    error_E2 = np.zeros(5)
-
-    N = 2**np.arange(5, 10)
-
-    for i in range(N.size):
-        obj = test(N[i])
-        compute_electrostatic_fields(obj)
-
-        E1_expected =    (0.1 / np.pi) \
-                       * af.cos(  2 * np.pi * obj.q1
-                                + 4 * np.pi * obj.q2
-                               )
-
-        E2_expected =   (0.2 / np.pi) \
-                      * af.cos(  2 * np.pi * obj.q1
-                               + 4 * np.pi * obj.q2
-                              )
-
-        N_g = obj.N_ghost
-
-        error_E1[i] = af.sum(af.abs(  obj.E1[N_g:-N_g, N_g:-N_g]
-                                    - E1_expected[N_g:-N_g, N_g:-N_g]
-                                   )
-                            ) / (obj.E1[N_g:-N_g, N_g:-N_g].elements())
-
-        error_E2[i] = af.sum(af.abs(  obj.E2[N_g:-N_g, N_g:-N_g]
-                                    - E2_expected[N_g:-N_g, N_g:-N_g]
-                                   )
-                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
+        self._da_test = PETSc.DMDA().create([self.N_q1, self.N_q2],
+                                            stencil_width=self.N_ghost,
+                                            boundary_type=('ghosted',
+                                                           'ghosted'),
+                                            stencil_type=1,
+                                            dof = 1
+                                           )
+        self.glob_vec  = self._da_test.createGlobalVec()
 
-    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
-    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
+    compute_moments = compute_moments_sinusoidal
 
-    assert (abs(poly_E1[0] + 2) < 0.2)
-    assert (abs(poly_E2[0] + 2) < 0.2)
+#def test_compute_electrostatic_fields_1():
+#
+#    error_E1 = np.zeros(5)
+#    error_E2 = np.zeros(5)
+#
+#    N = 2**np.arange(5, 10)
+#
+#    for i in range(N.size):
+#        obj = test(N[i])
+#        compute_electrostatic_fields(obj)
+#
+#        E1_expected =    (0.1 / np.pi) \
+#                       * af.cos(  2 * np.pi * obj.q1
+#                                + 4 * np.pi * obj.q2
+#                               )
+#
+#        E2_expected =   (0.2 / np.pi) \
+#                      * af.cos(  2 * np.pi * obj.q1
+#                               + 4 * np.pi * obj.q2
+#                              )
+#
+#        N_g = obj.N_ghost
+#
+#        error_E1[i] = af.sum(af.abs(  obj.E1[N_g:-N_g, N_g:-N_g]
+#                                    - E1_expected[N_g:-N_g, N_g:-N_g]
+#                                   )
+#                            ) / (obj.E1[N_g:-N_g, N_g:-N_g].elements())
+#
+#        error_E2[i] = af.sum(af.abs(  obj.E2[N_g:-N_g, N_g:-N_g]
+#                                    - E2_expected[N_g:-N_g, N_g:-N_g]
+#                                   )
+#                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
+#
+#    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
+#    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
+#
+#    assert (abs(poly_E1[0] + 2) < 0.2)
+#    assert (abs(poly_E2[0] + 2) < 0.2)
 
 def test_compute_electrostatic_fields_2():
 
     error_E1 = np.zeros(5)
     error_E2 = np.zeros(5)
 
-    obj = test(49)
+    obj = test(14, 7)
     compute_electrostatic_fields(obj)
+#    print(obj.glob_phi.getArray().reshape([49, 35], order='A').strides)
+#    print(obj.glob_vec.getArray().reshape([49, 35, 10], order='A').strides)
+
+#    obj.glob_vec.set(3.)
+#    local_vec = obj._da_test.createLocalVec()
+#    obj._da_test.globalToLocal(obj.glob_vec, local_vec)
+#    local_vec_array_direct = local_vec.getArray().reshape([7 + 6, 14+6, 2],
+#            order='c')
+#    print(local_vec_array_direct)
+#    print(local_vec_array_direct.strides)
 
 #    N = 2**np.arange(5, 10)
 #

From 1a2961e1fd72c3fce7bfeb628dc4362963defae0 Mon Sep 17 00:00:00 2001
From: Mani Chandra <manic@illinois.edu>
Date: Fri, 6 Oct 2017 16:51:55 +0530
Subject: [PATCH 5/8] Poisson solver now works with SNES.   * Periodic BCs work
 once background density has been subtracted correctly

---
 .../EM_fields_solver/electrostatic.py         | 210 ++++++++++++------
 .../test_compute_electrostatic_fields.py      | 148 ++++++------
 2 files changed, 215 insertions(+), 143 deletions(-)

diff --git a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
index d78509b6..87364fd8 100644
--- a/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
+++ b/bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
@@ -5,8 +5,36 @@
 import arrayfire as af
 import numpy as np
 from numpy.fft import fftfreq
-import sys
-
+import pylab as pl
+
+pl.rcParams['figure.figsize']  = 17, 7.5
+pl.rcParams['figure.dpi']      = 150
+pl.rcParams['image.cmap']      = 'jet'
+pl.rcParams['lines.linewidth'] = 1.5
+pl.rcParams['font.family']     = 'serif'
+pl.rcParams['font.weight']     = 'bold'
+pl.rcParams['font.size']       = 20
+pl.rcParams['font.sans-serif'] = 'serif'
+pl.rcParams['text.usetex']     = True
+pl.rcParams['axes.linewidth']  = 1.5
+pl.rcParams['axes.titlesize']  = 'medium'
+pl.rcParams['axes.labelsize']  = 'medium'
+
+pl.rcParams['xtick.major.size'] = 8
+pl.rcParams['xtick.minor.size'] = 4
+pl.rcParams['xtick.major.pad']  = 8
+pl.rcParams['xtick.minor.pad']  = 8
+pl.rcParams['xtick.color']      = 'k'
+pl.rcParams['xtick.labelsize']  = 'medium'
+pl.rcParams['xtick.direction']  = 'in'
+
+pl.rcParams['ytick.major.size'] = 8
+pl.rcParams['ytick.minor.size'] = 4
+pl.rcParams['ytick.major.pad']  = 8
+pl.rcParams['ytick.minor.pad']  = 8
+pl.rcParams['ytick.color']      = 'k'
+pl.rcParams['ytick.labelsize']  = 'medium'
+pl.rcParams['ytick.direction']  = 'in'
 
 class poisson_eqn(object):
     """
@@ -22,6 +50,9 @@ def __init__(self, nonlinear_solver_obj):
         self.obj       = nonlinear_solver_obj
         self.local_phi = self.da.createLocalVec() # phi with ghost zones
         self.N_ghost   = self.obj.N_ghost
+        self.dq1       = self.obj.dq1
+        self.dq2       = self.obj.dq2
+        self.density   = 0.
 
         ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = \
                 self.da.getCorners()
@@ -36,7 +67,7 @@ def compute_residual(self, snes, phi, residual):
         N_g = self.N_ghost
 
 	# Residual assembly using numpy
-        phi_array = self.local_phi.getArray(readonly=1)
+        phi_array = self.local_phi.getArray(readonly=0)
         phi_array = phi_array.reshape([self.N_q2_local + 2*N_g, \
                                        self.N_q1_local + 2*N_g, 1], \
                                       order='A'
@@ -48,7 +79,37 @@ def compute_residual(self, snes, phi, residual):
                                                 order='A'
                                                )
 
-        residual_array[:, :, :] = phi_array[N_g:-N_g, N_g:-N_g, :]**2. - 2.
+        #phi_array[:N_g,                 :] = 0.
+        #phi_array[self.N_q2_local+N_g:, :] = 0.
+        #phi_array[:,                 :N_g] = 0.
+        #phi_array[:, self.N_q1_local+N_g:] = 0.
+
+        phi_plus_x  = np.roll(phi_array, shift=-1, axis=1)
+        phi_minus_x = np.roll(phi_array, shift=1,  axis=1)
+        phi_plus_y  = np.roll(phi_array, shift=-1, axis=0)
+        phi_minus_y = np.roll(phi_array, shift=1,  axis=0)
+
+        d2phi_dx2   = (phi_minus_x - 2.*phi_array + phi_plus_x)/self.dq1**2.
+        d2phi_dy2   = (phi_minus_y - 2.*phi_array + phi_plus_y)/self.dq2**2.
+	
+        laplacian_phi = d2phi_dx2 + d2phi_dy2
+
+        density_af = af.moddims(self.density,
+                                  (self.N_q1_local+2*N_g)
+                                * (self.N_q2_local+2*N_g)
+                               )
+        density_np = density_af.to_ndarray()
+        density_np = density_np.reshape([self.N_q2_local + 2*N_g, \
+                                         self.N_q1_local + 2*N_g, 1], \
+                                        order='A'
+                                       )
+
+        residual_array[:, :] = \
+            (laplacian_phi + density_np)[N_g:-N_g, N_g:-N_g]
+
+#        residual_array[:, :] = \
+#            (laplacian_phi + self.density)[N_g:-N_g, N_g:-N_g]
+
 
 	# Residual assembly using arrayfire
 #        phi_array    = self.local_phi.getArray(readonly=1)
@@ -77,6 +138,8 @@ def compute_electrostatic_fields(self, performance_test_flag = False):
     # (lowest values of the canonical coordinates in the local zone)
     # Additionally, we also obtain the size of the local zone
     ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_snes.getCorners()
+    
+    N_g = self.N_ghost
 
     snes = PETSc.SNES().create()
     pde  = poisson_eqn(self)
@@ -84,81 +147,80 @@ def compute_electrostatic_fields(self, performance_test_flag = False):
     
     snes.setDM(self._da_snes)
     snes.setFromOptions()
+    pde.density = self.compute_moments('density')
     snes.solve(None, self.glob_phi)
     
-    phi_array = self.glob_phi.getArray()
-    print("phi = ", phi_array)
-
-    
-
-#    pde = Poisson2D(self)
-#    phi = self._da_ksp.createGlobalVec()
-#    rho = self._da_ksp.createGlobalVec()
-#
-#    phi_local = self._da_ksp.createLocalVec()
-#
-#    A = PETSc.Mat().createPython([phi.getSizes(), rho.getSizes()],
-#                                 comm=self._da_ksp.comm
-#                                )
-#    A.setPythonContext(pde)
-#    A.setUp()
-#
-#    ksp = PETSc.KSP().create()
-#
-#    ksp.setOperators(A)
-#    ksp.setType('cg')
-#
-#    pc = ksp.getPC()
-#    pc.setType('none')
-#
-#    N_g = self.N_ghost
-#    ksp.setTolerances(atol=1e-7)
-#    pde.RHS(rho,
-#              self.physical_system.params.charge_electron
-#            * np.array(self.compute_moments('density')[N_g:-N_g,
-#                                                       N_g:-N_g
-#                                                      ]
-#                       - 1
-#                      )
-#           )
-#
-#    ksp.setFromOptions()
-#    ksp.solve(rho, phi)
-#
-#    num_tries = 0
-#    while(ksp.converged is not True):
-#        
-#        ksp.setTolerances(atol = 10**(-6+num_tries), rtol = 10**(-6+num_tries))
-#        ksp.solve(rho, phi)
-#        num_tries += 1
-#        
-#        if(num_tries == 5):
-#            raise Exception('KSP solver diverging!')
-#
-#    self._da_ksp.globalToLocal(phi, phi_local)
-
-#    # Since rho was defined at (i + 0.5, j + 0.5)
-#    # Electric Potential returned will also be at (i + 0.5, j + 0.5)
-#    electric_potential = af.to_array(np.swapaxes(phi_local[:].
-#                                                 reshape(  N_q2_local
-#                                                         + 2 * self.N_ghost,
-#                                                           N_q1_local +
-#                                                         + 2 * self.N_ghost
-#                                                        ),
+    #phi_array = self.glob_phi.getArray()
+    #print("phi = ", phi_array)
+    #phi_array = phi_array.reshape([N_q2_local, \
+    #                               N_q1_local, 1], \
+    #                              order='A'
+    #                             )
+
+    self._da_snes.globalToLocal(self.glob_phi, pde.local_phi)
+    phi_local_array = pde.local_phi.getArray()
+    electric_potential = af.to_array(phi_local_array)
+    phi_local_array = phi_local_array.reshape([N_q2_local + 2*N_g, \
+                                               N_q1_local + 2*N_g], \
+                                             )
+    density_af = af.moddims(pde.density,
+                              (N_q1_local+2*N_g)
+                            * (N_q2_local+2*N_g)
+                           )
+    density_np = density_af.to_ndarray()
+    density_np = density_np.reshape([N_q2_local + 2*N_g, \
+                                     N_q1_local + 2*N_g], \
+                                   )
+    # Since rho was defined at (i + 0.5, j + 0.5)
+    # Electric Potential returned will also be at (i + 0.5, j + 0.5)
+#    electric_potential = af.to_array(np.swapaxes(phi_local_array,
 #                                                 0, 1
 #                                                )
 #                                    )
-#
-#    # Obtaining the values at (i+0.5, j+0.5):
-#    self.E1 = -(  af.shift(electric_potential, -1)
-#                - af.shift(electric_potential,  1)
-#               ) / (2 * self.dq1)
-#
-#    self.E2 = -(  af.shift(electric_potential, 0, -1)
-#                - af.shift(electric_potential, 0,  1)
-#               ) / (2 * self.dq2)
-#
-#    af.eval(self.E1, self.E2)
+
+    electric_potential = af.moddims(electric_potential,
+                                    N_q1_local + 2*N_g,
+                                    N_q2_local + 2*N_g
+                                   )
+
+    # Obtaining the values at (i+0.5, j+0.5):
+    self.E1 = -(  af.shift(electric_potential, -1)
+                - af.shift(electric_potential,  1)
+               ) / (2 * self.dq1)
+
+    self.E2 = -(  af.shift(electric_potential, 0, -1)
+                - af.shift(electric_potential, 0,  1)
+               ) / (2 * self.dq2)
+
+    af.eval(self.E1, self.E2)
+
+    q2_minus = 0.25
+    q2_plus  = 0.75
+
+    E2_expected = -0.5/20 * (  af.log(af.cosh(( self.q2 - q2_minus)*20)) 
+                             - af.log(af.cosh(( self.q2 - q2_plus )*20))
+                            ) 
+
+    pl.subplot(121)
+    pl.contourf(
+                #np.array(self.E2)[N_g:-N_g, N_g:-N_g], 100
+                density_np[N_g:-N_g, N_g:-N_g], 100
+               )
+    pl.colorbar()
+    #pl.axis('equal')
+    pl.title(r'Density')
+    #pl.title(r'$E^2_{numerical}$')
+    pl.subplot(122)
+    pl.contourf(
+                #np.array(E2_expected)[N_g:-N_g, N_g:-N_g], 100
+                phi_local_array[N_g:-N_g, N_g:-N_g], 100
+               )
+    pl.colorbar()
+    pl.title(r'$\phi$')
+    #pl.title(r'$E^2_{analytic}$')
+    #pl.axis('equal')
+    pl.show()
+
 
     if(performance_test_flag == True):
         af.sync()
diff --git a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
index d679ff63..bcb35d2d 100644
--- a/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
+++ b/bolt/lib/nonlinear_solver/tests/test_compute_electrostatic_fields.py
@@ -16,13 +16,19 @@
 import petsc4py, sys
 petsc4py.init(sys.argv)
 from petsc4py import PETSc
+import pylab as pl
 
 
 from bolt.lib.nonlinear_solver.EM_fields_solver.electrostatic \
     import compute_electrostatic_fields
 
 def compute_moments_sinusoidal(self, *args):
-    return (1 + af.sin(2 * np.pi * self.q1 + 4 * np.pi * self.q2))
+    rho = 1. + 0.1*af.sin(2 * np.pi * self.q1 + 4 * np.pi * self.q2)
+
+    rho_mean = af.mean(rho)
+    print("rho_mean = ", rho_mean)
+
+    return(rho - rho_mean)
 
 def compute_moments_gaussian(self, *args):
     q2_minus = 0.25
@@ -34,7 +40,26 @@ def compute_moments_gaussian(self, *args):
                      - af.tanh(( self.q2 - q2_plus )*regulator)
                     )
 
-    return(rho)
+#    rho =  af.exp(-0.*(self.q1 - 0.5)**2./0.01 - (self.q2 - 0.5)**2./0.01)
+#    rho = (self.q2 - 0.5)**2. + (self.q1 - 0.5)**2.
+
+#    rho = 1. + 0.1*af.sin(2*np.pi*self.q2)
+
+#    sigma = 0.1
+#    rho =  af.exp(-(self.q1)**2./(2.*sigma**2.) -(self.q2)**2./(2.*sigma**2.)) \
+#    	  * 1./ sigma**2. / (2. * np.pi)
+
+    N_g = self.N_ghost
+    net_charge = af.sum(rho[N_g:-N_g, N_g:-N_g]) * self.dq1 * self.dq2
+
+    total_volume =   (self.q1_end - self.q1_start) \
+                   * (self.q2_end - self.q2_start)
+
+    rho_zero_net_charge = rho - net_charge/total_volume
+
+    print("Initial net charge = ", net_charge)
+
+    return(rho_zero_net_charge)
 
 class test(object):
     def __init__(self, N_q1, N_q2):
@@ -45,11 +70,11 @@ def __init__(self, N_q1, N_q2):
                                     }
                                    )
 
-        self.q1_start = 0
-        self.q2_start = 0
+        self.q1_start = 0.
+        self.q2_start = 0.
 
-        self.q1_end = 1
-        self.q2_end = 1
+        self.q1_end = 1.
+        self.q2_end = 1.
 
         self.N_q1 = N_q1
         self.N_q2 = N_q2
@@ -99,26 +124,18 @@ def __init__(self, N_q1, N_q2):
         self.glob_phi  = self._da_snes.createGlobalVec()
         self.glob_phi.set(0.)
 
-        self._da_test = PETSc.DMDA().create([self.N_q1, self.N_q2],
-                                            stencil_width=self.N_ghost,
-                                            boundary_type=('ghosted',
-                                                           'ghosted'),
-                                            stencil_type=1,
-                                            dof = 1
-                                           )
-        self.glob_vec  = self._da_test.createGlobalVec()
+    compute_moments = compute_moments_gaussian
 
-    compute_moments = compute_moments_sinusoidal
+def test_compute_electrostatic_fields_1():
+    obj = test(70, 70)
+    compute_electrostatic_fields(obj)
 
-#def test_compute_electrostatic_fields_1():
-#
-#    error_E1 = np.zeros(5)
-#    error_E2 = np.zeros(5)
-#
-#    N = 2**np.arange(5, 10)
+#    N = 7*np.array([2, 4, 6, 8, 10, 12])
+#    error_E1 = np.zeros(N.size)
+#    error_E2 = np.zeros(N.size)
 #
 #    for i in range(N.size):
-#        obj = test(N[i])
+#        obj = test(N[i], N[i])
 #        compute_electrostatic_fields(obj)
 #
 #        E1_expected =    (0.1 / np.pi) \
@@ -143,6 +160,9 @@ def __init__(self, N_q1, N_q2):
 #                                   )
 #                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
 #
+#    print("Error E1 = ", error_E1)
+#    print("Error E2 = ", error_E2)
+#
 #    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
 #    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
 #
@@ -151,54 +171,44 @@ def __init__(self, N_q1, N_q2):
 
 def test_compute_electrostatic_fields_2():
 
-    error_E1 = np.zeros(5)
-    error_E2 = np.zeros(5)
+    N = 7*np.array([2, 4, 6, 8, 10, 12])
+    N = 7*np.array([12])
+    error_E1 = np.zeros(N.size)
+    error_E2 = np.zeros(N.size)
 
-    obj = test(14, 7)
-    compute_electrostatic_fields(obj)
-#    print(obj.glob_phi.getArray().reshape([49, 35], order='A').strides)
-#    print(obj.glob_vec.getArray().reshape([49, 35, 10], order='A').strides)
-
-#    obj.glob_vec.set(3.)
-#    local_vec = obj._da_test.createLocalVec()
-#    obj._da_test.globalToLocal(obj.glob_vec, local_vec)
-#    local_vec_array_direct = local_vec.getArray().reshape([7 + 6, 14+6, 2],
-#            order='c')
-#    print(local_vec_array_direct)
-#    print(local_vec_array_direct.strides)
-
-#    N = 2**np.arange(5, 10)
-#
-#    for i in range(N.size):
-#        obj = test(N[i])
-#        compute_electrostatic_fields(obj)
-#
-#        E1_expected = 0 * obj.q1
-#        
-#        q2_minus = 0.25
-#        q2_plus  = 0.75
-#
-#        E2_expected = -0.5/20 * (  af.log(af.cosh(( obj.q2 - q2_minus)*20)) 
-#                                 - af.log(af.cosh(( obj.q2 - q2_plus )*20))
-#                                ) 
-#
-#        N_g = obj.N_ghost
-#
-#        error_E1[i] = af.sum(af.abs(  obj.E1[N_g:-N_g, N_g:-N_g]
-#                                    - E1_expected[N_g:-N_g, N_g:-N_g]
-#                                   )
-#                            ) / (obj.E1[N_g:-N_g, N_g:-N_g].elements())
-#
-#        error_E2[i] = af.sum(af.abs(  obj.E2[N_g:-N_g, N_g:-N_g]
-#                                    - E2_expected[N_g:-N_g, N_g:-N_g]
-#                                   )
-#                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
-#
-#    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
-#    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
-#
-#    assert (abs(poly_E1[0] + 2) < 0.2)
-#    assert (abs(poly_E2[0] + 2) < 0.2)
+    for i in range(N.size):
+        obj = test(N[i], N[i])
+        compute_electrostatic_fields(obj)
+
+        E1_expected = 0 * obj.q1
+        
+        q2_minus = 0.25
+        q2_plus  = 0.75
+
+        E2_expected = -0.5/20 * (  af.log(af.cosh(( obj.q2 - q2_minus)*20)) 
+                                 - af.log(af.cosh(( obj.q2 - q2_plus )*20))
+                                ) 
+
+        N_g = obj.N_ghost
+
+        error_E1[i] = af.sum(af.abs(  obj.E1[N_g:-N_g, N_g:-N_g]
+                                    - E1_expected[N_g:-N_g, N_g:-N_g]
+                                   )
+                            ) / (obj.E1[N_g:-N_g, N_g:-N_g].elements())
+
+        error_E2[i] = af.sum(af.abs(  obj.E2[N_g:-N_g, N_g:-N_g]
+                                    - E2_expected[N_g:-N_g, N_g:-N_g]
+                                   )
+                            ) / (obj.E2[N_g:-N_g, N_g:-N_g].elements())
+
+    print("Error E1 = ", error_E1)
+    print("Error E2 = ", error_E2)
+
+    poly_E1 = np.polyfit(np.log10(N), np.log10(error_E1), 1)
+    poly_E2 = np.polyfit(np.log10(N), np.log10(error_E2), 1)
+
+    assert (abs(poly_E1[0] + 2) < 0.2)
+    assert (abs(poly_E2[0] + 2) < 0.2)
 
 
 test_compute_electrostatic_fields_2()

From d84a745793906cf1b366eaf05986f176d62db256 Mon Sep 17 00:00:00 2001
From: Mani Chandra <manic@illinois.edu>
Date: Tue, 10 Oct 2017 00:41:14 +0530
Subject: [PATCH 6/8] First iteration of 3D poisson solver + 2D charge density.
 Code in tests/

---
 .../test_3D_poisson_solver_2D_density.py      | 234 ++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 bolt/lib/nonlinear_solver/tests/test_3D_poisson_solver_2D_density.py

diff --git a/bolt/lib/nonlinear_solver/tests/test_3D_poisson_solver_2D_density.py b/bolt/lib/nonlinear_solver/tests/test_3D_poisson_solver_2D_density.py
new file mode 100644
index 00000000..00b82598
--- /dev/null
+++ b/bolt/lib/nonlinear_solver/tests/test_3D_poisson_solver_2D_density.py
@@ -0,0 +1,234 @@
+
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import petsc4py, sys
+petsc4py.init(sys.argv)
+from petsc4py import PETSc
+import arrayfire as af
+import numpy as np
+import pylab as pl
+
+pl.rcParams['figure.figsize']  = 17, 7.5
+pl.rcParams['figure.dpi']      = 150
+pl.rcParams['image.cmap']      = 'jet'
+pl.rcParams['lines.linewidth'] = 1.5
+pl.rcParams['font.family']     = 'serif'
+pl.rcParams['font.weight']     = 'bold'
+pl.rcParams['font.size']       = 20
+pl.rcParams['font.sans-serif'] = 'serif'
+pl.rcParams['text.usetex']     = True
+pl.rcParams['axes.linewidth']  = 1.5
+pl.rcParams['axes.titlesize']  = 'medium'
+pl.rcParams['axes.labelsize']  = 'medium'
+
+pl.rcParams['xtick.major.size'] = 8
+pl.rcParams['xtick.minor.size'] = 4
+pl.rcParams['xtick.major.pad']  = 8
+pl.rcParams['xtick.minor.pad']  = 8
+pl.rcParams['xtick.color']      = 'k'
+pl.rcParams['xtick.labelsize']  = 'medium'
+pl.rcParams['xtick.direction']  = 'in'
+
+pl.rcParams['ytick.major.size'] = 8
+pl.rcParams['ytick.minor.size'] = 4
+pl.rcParams['ytick.major.pad']  = 8
+pl.rcParams['ytick.minor.pad']  = 8
+pl.rcParams['ytick.color']      = 'k'
+pl.rcParams['ytick.labelsize']  = 'medium'
+pl.rcParams['ytick.direction']  = 'in'
+
+comm = PETSc.COMM_WORLD.tompi4py()
+
+N_q1_poisson = 70
+N_q2_poisson = 70
+N_q3_poisson = 70
+
+N_q1_density = 35
+N_q2_density = 35
+
+N_ghost = 3
+
+da_3D = PETSc.DMDA().create([N_q1_poisson,
+                             N_q2_poisson,
+                             N_q3_poisson],
+                             stencil_width = N_ghost,
+                             boundary_type = ('periodic',
+                                              'periodic',
+                                              'periodic'
+                                             ),
+                              stencil_type = 1,
+                              dof          = 1,
+                              comm         = comm
+                           )
+
+glob_phi      = da_3D.createGlobalVec()
+local_phi     = da_3D.createLocalVec()
+glob_residual = da_3D.createGlobalVec()
+
+((i_q1_3D_start, i_q2_3D_start, i_q3_3D_start),
+ (N_q1_3D_local, N_q2_3D_local, N_q3_3D_local)
+) = \
+    da_3D.getCorners()
+
+q1_3D_start = -2.; q1_3D_end = 2.
+q2_3D_start = -2.; q2_3D_end = 2.
+q3_3D_start =  0.; q3_3D_end = 2.
+
+dq1_3D = (q1_3D_end - q1_3D_start) / N_q1_poisson
+dq2_3D = (q2_3D_end - q2_3D_start) / N_q2_poisson
+dq3_3D = (q3_3D_end - q3_3D_start) / N_q3_poisson
+
+i_q1_3D = ( (i_q1_3D_start + 0.5)
+           + np.arange(-N_ghost, N_q1_3D_local + N_ghost)
+          )
+
+i_q2_3D = ( (i_q2_3D_start + 0.5)
+           + np.arange(-N_ghost, N_q2_3D_local + N_ghost)
+          )
+
+i_q3_3D = ( (i_q3_3D_start + 0.5)
+           + np.arange(-N_ghost, N_q3_3D_local + N_ghost)
+          )
+
+q1_3D =  q1_3D_start + i_q1_3D * dq1_3D
+q2_3D =  q2_3D_start + i_q2_3D * dq2_3D
+q3_3D =  q3_3D_start + i_q3_3D * dq3_3D
+
+da_2D = PETSc.DMDA().create([N_q1_density,
+                             N_q2_density],
+                             stencil_width = N_ghost,
+                             boundary_type = ('periodic',
+                                              'periodic'
+                                             ),
+                             stencil_type = 1,
+                             dof          = 1,
+                             comm         = comm
+                           )
+
+glob_density  = da_2D.createGlobalVec()
+local_density = da_2D.createLocalVec()
+
+
+((i_q1_2D_start, i_q2_2D_start), 
+ (N_q1_2D_local, N_q2_2D_local)
+) = \
+    da_2D.getCorners()
+
+q1_2D_start = -1.; q1_2D_end = 1.
+q2_2D_start = -1.; q2_2D_end = 1.
+location_in_q3 = 1.
+
+dq1_2D = (q1_2D_end - q1_2D_start) / N_q1_density
+dq2_2D = (q2_2D_end - q2_2D_start) / N_q2_density
+
+i_q1_2D = ( (i_q1_2D_start + 0.5)
+           + np.arange(-N_ghost, N_q1_2D_local + N_ghost)
+          )
+
+i_q2_2D = ( (i_q2_2D_start + 0.5)
+           + np.arange(-N_ghost, N_q2_2D_local + N_ghost)
+          )
+
+q1_2D =  q1_2D_start + i_q1_2D * dq1_2D
+q2_2D =  q2_2D_start + i_q2_2D * dq2_2D
+
+glob_density_array = glob_density.getArray(readonly=0)
+glob_density_array = glob_density_array.reshape([N_q2_2D_local, \
+                                                 N_q1_2D_local, 1], \
+                                               )
+glob_density_array[:] = 1.
+
+da_2D.globalToLocal(glob_density, local_density)
+
+density_array = local_density.getArray(readonly=0)
+density_array = density_array.reshape([N_q2_2D_local + 2*N_ghost, \
+                                       N_q1_2D_local + 2*N_ghost, 1], \
+                                     )
+
+print("rank = ", comm.rank)
+
+# Figure out the coordinates of the 3D phi cube of the current rank
+print("q1_3D_start = ", q1_3D[N_ghost])
+print("q2_3D_start = ", q2_3D[N_ghost])
+print("q3_3D_start = ", q3_3D[N_ghost])
+print(" ")
+print("q1_2D_start = ", q1_2D[N_ghost])
+print("q2_2D_start = ", q2_2D[N_ghost])
+
+q1_2D_in_3D_index_start = np.where(q1_3D > q1_2D[0]  - dq1_3D)[0][0]
+q1_2D_in_3D_index_end   = np.where(q1_3D < q1_2D[-1] + dq1_3D)[0][-1]
+q2_2D_in_3D_index_start = np.where(q2_3D > q2_2D[0]  - dq2_3D)[0][0]
+q2_2D_in_3D_index_end   = np.where(q2_3D < q2_2D[-1] + dq2_3D)[0][-1]
+q3_2D_in_3D_index_start = np.where(q3_3D > location_in_q3 - dq3_3D)[0][0]
+q3_2D_in_3D_index_end   = np.where(q3_3D < location_in_q3 + dq3_3D)[0][-1]
+
+print("q1_2D_in_3D_index_start = ", q1_2D_in_3D_index_start, "q1_3D_start = ", q1_3D[q1_2D_in_3D_index_start])
+print("q1_2D_in_3D_index_end   = ", q1_2D_in_3D_index_end, "q1_3D_end   = ", q1_3D[q1_2D_in_3D_index_end])
+print("q2_2D_in_3D_index_start = ", q2_2D_in_3D_index_start, "q2_3D_start = ", q2_3D[q2_2D_in_3D_index_start])
+print("q2_2D_in_3D_index_end   = ", q2_2D_in_3D_index_end, "q2_3D_end   = ", q2_3D[q2_2D_in_3D_index_end])
+print("q3_2D_in_3D_index_start = ", q3_2D_in_3D_index_start, "q3_3D_start = ", q3_3D[q3_2D_in_3D_index_start])
+print("q3_2D_in_3D_index_end   = ", q3_2D_in_3D_index_end, "q3_3D_end   = ", q3_3D[q3_2D_in_3D_index_end])
+
+class poisson_eqn(object):
+
+    def __init__(self):
+        self.local_phi = local_phi
+
+    def compute_residual(self, snes, phi, residual):
+        da_3D.globalToLocal(phi, local_phi)
+
+        N_g = N_ghost
+
+        phi_array = local_phi.getArray(readonly=0)
+        phi_array = phi_array.reshape([N_q3_3D_local + 2*N_g, \
+                                       N_q2_3D_local + 2*N_g, \
+                                       N_q1_3D_local + 2*N_g, 1
+        			      ]
+                                     )
+    
+        residual_array = residual.getArray(readonly=0)
+        residual_array = residual_array.reshape([N_q3_3D_local, \
+                                                 N_q2_3D_local, \
+                                                 N_q1_3D_local, 1
+        					]
+                                               )
+
+        phi_array[:N_ghost, :, :]               = 0.
+        phi_array[N_q1_3D_local+N_ghost:, :, :] = 0.
+        phi_array[:, :N_ghost, :]               = 0.
+        phi_array[:, N_q2_3D_local+N_ghost:, :] = 0.
+        phi_array[:, :, :N_ghost]               = 0.
+        phi_array[:, :, N_q3_3D_local+N_ghost:] = 0.
+
+        phi_plus_x  = np.roll(phi_array, shift=-1, axis=2)
+        phi_minus_x = np.roll(phi_array, shift=1,  axis=2)
+        phi_plus_y  = np.roll(phi_array, shift=-1, axis=1)
+        phi_minus_y = np.roll(phi_array, shift=1,  axis=1)
+        phi_plus_z  = np.roll(phi_array, shift=-1, axis=0)
+        phi_minus_z = np.roll(phi_array, shift=1,  axis=0)
+
+        d2phi_dx2   = (phi_minus_x - 2.*phi_array + phi_plus_x)/dq1_3D**2.
+        d2phi_dy2   = (phi_minus_y - 2.*phi_array + phi_plus_y)/dq2_3D**2.
+        d2phi_dz2   = (phi_minus_z - 2.*phi_array + phi_plus_z)/dq3_3D**2.
+        
+        laplacian_phi = d2phi_dx2 + d2phi_dy2 + d2phi_dz2
+
+        laplacian_phi[q3_2D_in_3D_index_start:q3_2D_in_3D_index_end,
+                      q2_2D_in_3D_index_start:q2_2D_in_3D_index_end,
+                      q1_2D_in_3D_index_start:q1_2D_in_3D_index_end
+                     ] \
+                     += density_array
+
+        residual_array[:, :, :] = \
+            laplacian_phi[N_g:-N_g, N_g:-N_g, N_g:-N_g]
+
+        return
+
+snes = PETSc.SNES().create()
+pde  = poisson_eqn()
+snes.setFunction(pde.compute_residual, glob_residual)
+
+snes.setDM(da_3D)
+snes.setFromOptions()
+snes.solve(None, glob_phi)

From dababd8fea1eed05be50fa08e0a8958574affff4 Mon Sep 17 00:00:00 2001
From: Mani Chandra <manic@illinois.edu>
Date: Tue, 10 Oct 2017 00:42:33 +0530
Subject: [PATCH 7/8] Added perf test for comm to compare old and new ordering

---
 .../tests/test_af_np_petsc_data_transfer.py   | 286 ++++++++++++++++++
 1 file changed, 286 insertions(+)
 create mode 100644 bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py

diff --git a/bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py b/bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py
new file mode 100644
index 00000000..3b611a5a
--- /dev/null
+++ b/bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import arrayfire as af
+import petsc4py, sys
+petsc4py.init(sys.argv)
+from petsc4py import PETSc
+
+N_q1    = 64
+N_q2    = 128
+N_q3    = 96
+dof     = 16*16*16
+N_ghost = 3
+print("---------------------")
+print("N_q1 + 2*N_ghost =", N_q1 + 2*N_ghost) 
+print("N_q2 + 2*N_ghost =", N_q2 + 2*N_ghost) 
+print("N_q3 + 2*N_ghost =", N_q3 + 2*N_ghost) 
+print("dof              =", dof)
+print("---------------------\n")
+
+da = PETSc.DMDA().create([N_q1, N_q2],
+                          stencil_width=N_ghost,
+                          boundary_type=('periodic',
+                                         'periodic'),
+                          stencil_type=1,
+                          dof = dof
+                        )
+
+glob_vec  = da.createGlobalVec() # No ghost zones in [N_q1, N_q2]
+local_vec = da.createLocalVec()  # Has ghost zones in [N_q1, N_q2]
+
+# Initialize glob_vec to random numbers
+glob_vec.setRandom()
+
+local_vec_array_old = da.getVecArray(local_vec)
+glob_vec_array_old  = da.getVecArray(glob_vec)
+
+#1d np arrays pointing to respective vecs
+local_vec_array  = local_vec.getArray()
+global_vec_array = glob_vec.getArray()
+
+((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = da.getCorners()
+
+af_array_old = af.randu(N_q1_local+2*N_ghost,
+			N_q2_local+2*N_ghost,
+			dof,
+                        dtype=af.Dtype.f64
+		       )
+af.eval(af_array_old)
+
+af_array_new = af.randu(dof, 
+                        N_q1_local+2*N_ghost,
+			N_q2_local+2*N_ghost,
+                        dtype=af.Dtype.f64
+		       )
+af.eval(af_array_new)
+af.sync()
+
+print("af_array_old.shape = ", af_array_old.shape)
+print("af_array_new.shape = ", af_array_new.shape)
+
+def communicate_old(af_array, da, glob_vec, local_vec):
+    # af_array is (N_q2+2*N_ghost, N_q1+2*N_ghost, dof)
+
+    # Global value is non-inclusive of the ghost-zones:
+    glob_vec_array_old[:] = np.array(af_array[N_ghost:-N_ghost, 
+                                              N_ghost:-N_ghost
+                                             ]
+                                    )
+    
+    # The following function takes care of periodic boundary conditions,
+    # and interzonal communications:
+    da.globalToLocal(glob_vec, local_vec)
+
+    # Converting back from PETSc.Vec to af.Array:
+    af_array_after_comm = af.to_array(local_vec_array_old[:])
+
+    af.eval(af_array_after_comm)
+
+    return(af_array_after_comm)
+
+def communicate_new(af_array, da, glob_vec, local_vec):
+    # af_array is (dof, N_q1, N_q2)
+    
+    # First flatten af_array
+    af_array_1d = af.moddims(af_array[:, 
+                                      N_ghost:-N_ghost,
+                                      N_ghost:-N_ghost
+				     ],
+                               N_q1_local 
+			     * N_q2_local
+			     * dof
+                            )
+    
+    # Convert to a np array and copy to petsc
+    global_vec_array[:] = af_array_1d.to_ndarray()
+
+    # Communication: Global -> Local
+    da.globalToLocal(glob_vec, local_vec)
+
+    # loval_vec_array_new now has the data. Need to convert to af
+    af_array_after_comm_1d = af.to_array(local_vec_array)
+    af_array_after_comm    = af.moddims(af_array_after_comm_1d,
+                                        dof,
+                                        N_q1_local + 2*N_ghost,
+                                        N_q2_local + 2*N_ghost
+                                       )
+
+    af.eval(af_array_after_comm)
+
+    return(af_array_after_comm)
+
+communicate_old(af_array_old, da, glob_vec, local_vec)
+communicate_new(af_array_new, da, glob_vec, local_vec)
+af.sync()
+
+iters = 1
+
+tic_old = af.time()
+for n in range(iters):
+    communicate_old(af_array_old, da, glob_vec, local_vec)
+af.sync()
+toc_old = af.time()
+time_old = toc_old - tic_old
+
+tic_new = af.time()
+for n in range(iters):
+    communicate_new(af_array_new, da, glob_vec, local_vec)
+af.sync()
+toc_new = af.time()
+time_new = toc_new - tic_new
+
+print(" ")
+print("comm_old = ", format(time_old/iters, '.4f'), "secs/iter")
+print("comm_new = ", format(time_new/iters, '.4f'), "secs/iter")
+print(" ")
+
+## Now need to transfer data from glob_vec to local_vec
+#da.globalToLocal(glob_vec, local_vec)
+## Communication complete. All interprocessor ghost zones in local_vec are
+## now filled.
+#
+## Problem statement: Accessing local_vec using numpy arrays
+## ---------- Method 1----------- 
+#local_array_method_1 = da.getVecArray(local_vec)
+#
+## Note: da.getVecArray() does NOT return a numpy array
+##       local_array_method_1[:] is a numpy array
+#local_array_method_1_np = local_array_method_1[:]
+#
+#print("---------------------")
+#print("local_array_method_1_np.shape   = ", local_array_method_1_np.shape)
+#print("local_array_method_1_np.strides = ", local_array_method_1_np.strides)
+#print("---------------------\n")
+#
+## ---------- Method 2----------- 
+#local_array_method_2_np = local_vec.getArray()
+#
+## Note: local_vec.getArray() *directly* returns a numpy array of shape 1D and
+## size (N_q1 + 2*N_ghost) x (N_q2 + 2*N_ghost) * dof
+#
+#print("---------------------")
+#print("local_array_method_2_np.shape   = ", local_array_method_2_np.shape)
+#print("local_array_method_2_np.strides = ", local_array_method_2_np.strides)
+#print("---------------------\n")
+#
+## Now need to reshape
+#local_array_method_2_np_reshaped = \
+#        local_array_method_2_np.reshape([N_q2+2*N_ghost,
+#                                         N_q1+2*N_ghost,
+#                                         dof
+#                                        ]
+#                                       )
+#print("---------------------")
+#print("local_array_method_2_np_reshaped.shape   = ", \
+#        local_array_method_2_np_reshaped.shape)
+#print("local_array_method_2_np_reshaped.strides = ", \
+#        local_array_method_2_np_reshaped.strides)
+#print("---------------------\n")
+#
+#print("Conclusion:")
+#print(" * Natural order is set by PETSc where dof is the fastest index and q2 is the slowest index")
+#print(" * Optimal layout for np arrays is (q2, q1, dof)")
+#print(" * Optimal layout for af arrays is (dof, q1, q2)\n")
+#
+## Strategy for efficient af<->PETSc transfer:
+## 1) We need to transfer data from array_af with dims (dof, N_q1, N_q2) to a PETSc vec: glob_vec
+## 2) Reshape it into 1D : array_af_1d
+## 2) Get the associated 1d np array of glob_vec: glob_array = glob_vec.getArray()
+## 3) Fast transfer to np (and hence to PETSc): array_af_1d.to_ndarray(glob_array_np)
+## 4) Done! : glob_vec now has data from af_array
+## 5) Now perform globalToLocal(glob_vec, local_vec)
+## 6) Get 1d np array: local_array = local_vec.getArray()
+## 7) Transfer data from 1d np array to 1d af array: af_array_ghosted_1d = af.to_array(local_array)
+## 8) Reshape af_array_ghosted_1d to dims (dof, N_q1+2*N_ghost, N_q2+2*N_ghost)
+#
+#local_array_method_2_af = af.to_array(local_array_method_2_np_reshaped)
+#print("---------------------")
+#print("local_array_method_2_af.shape   = ", local_array_method_2_af.shape)
+#print("local_array_method_2_af.strides = ", local_array_method_2_af.strides())
+#print("---------------------\n")
+#
+#local_array_method_2_af_reshaped = af.to_array(local_array_method_2_np)
+#local_array_method_2_af_reshaped = af.moddims(local_array_method_2_af_reshaped,
+#                                              dof,
+#                                              N_q1+2*N_ghost,
+#                                              N_q2+2*N_ghost
+#                                             )
+#
+#print("---------------------")
+#print("local_array_method_2_af_reshaped.shape   = ",
+#        local_array_method_2_af_reshaped.shape)
+#print("local_array_method_2_af_reshaped.strides = ",
+#        local_array_method_2_af_reshaped.strides())
+#print("---------------------\n")
+#
+#local_array_af_to_np = local_array_method_2_af.to_ndarray()
+#print("---------------------")
+#print("local_array_af_to_np.shape   = ", local_array_af_to_np.shape)
+#print("local_array_af_to_np.strides = ", local_array_af_to_np.strides)
+#print("---------------------\n")
+
+#da = PETSc.DMDA().create([N_q1, N_q2, N_q3],
+#                          stencil_width=N_ghost,
+#                          boundary_type=('periodic',
+#                                         'periodic',
+#					 'periodic'),
+#                          stencil_type=1,
+#                          dof = dof
+#                        )
+#
+#glob_vec  = da.createGlobalVec() # No ghost zones in [N_q1, N_q2]
+#local_vec = da.createLocalVec()  # Has ghost zones in [N_q1, N_q2]
+#
+## Initialize glob_vec to random numbers
+#glob_vec.setRandom()
+#
+## Now need to transfer data from glob_vec to local_vec
+#da.globalToLocal(glob_vec, local_vec)
+## Communication complete. All interprocessor ghost zones in local_vec are
+## now filled.
+#
+## Problem statement: Accessing local_vec using numpy arrays
+## ---------- Method 1----------- 
+#local_array_method_1 = da.getVecArray(local_vec)
+#
+## Note: da.getVecArray() does NOT return a numpy array
+##       local_array_method_1[:] is a numpy array
+#local_array_method_1_np = local_array_method_1[:]
+#
+#print("---------------------")
+#print("local_array_method_1_np.shape   = ", local_array_method_1_np.shape)
+#print("local_array_method_1_np.strides = ", local_array_method_1_np.strides)
+#print("---------------------\n")
+#
+## ---------- Method 2----------- 
+#local_array_method_2_np = local_vec.getArray()
+#
+## Note: local_vec.getArray() *directly* returns a numpy array of shape 1D and
+## size (N_q1 + 2*N_ghost) x (N_q2 + 2*N_ghost) * dof
+#
+#print("---------------------")
+#print("local_array_method_2_np.shape   = ", local_array_method_2_np.shape)
+#print("local_array_method_2_np.strides = ", local_array_method_2_np.strides)
+#print("---------------------\n")
+#
+## Now need to reshape
+#local_array_method_2_np_reshaped = \
+#        local_array_method_2_np.reshape([N_q3+2*N_ghost,
+#	                                 N_q2+2*N_ghost,
+#                                         N_q1+2*N_ghost,
+#                                         dof
+#                                        ]
+#                                       )
+#print("---------------------")
+#print("local_array_method_2_np_reshaped.shape   = ", \
+#        local_array_method_2_np_reshaped.shape)
+#print("local_array_method_2_np_reshaped.strides = ", \
+#        local_array_method_2_np_reshaped.strides)
+#print("---------------------\n")
+#
+#print("Conclusion:")
+#print(" * Natural order is set by PETSc where dof is the fastest index and q2 is the slowest index")
+#print(" * Optimal layout for np arrays is (q3, q2, q1, dof)")
+#print(" * Optimal layout for af arrays is (dof, q1, q2, q3)\n")

From b6c7b88dece43a364b5dc432fbccfab225a9e84f Mon Sep 17 00:00:00 2001
From: Mani Chandra <manic@illinois.edu>
Date: Tue, 10 Oct 2017 18:08:03 +0530
Subject: [PATCH 8/8] Isolated rate limiting step in comm_new()

---
 .../tests/test_af_np_petsc_data_transfer.py   | 37 +++++++++++++------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py b/bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py
index 3b611a5a..fd2be556 100644
--- a/bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py
+++ b/bolt/lib/nonlinear_solver/tests/test_af_np_petsc_data_transfer.py
@@ -6,6 +6,7 @@
 import petsc4py, sys
 petsc4py.init(sys.argv)
 from petsc4py import PETSc
+af.info()
 
 N_q1    = 64
 N_q2    = 128
@@ -60,6 +61,8 @@
 print("af_array_old.shape = ", af_array_old.shape)
 print("af_array_new.shape = ", af_array_new.shape)
 
+af.print_mem_info()
+
 def communicate_old(af_array, da, glob_vec, local_vec):
     # af_array is (N_q2+2*N_ghost, N_q1+2*N_ghost, dof)
 
@@ -82,6 +85,13 @@ def communicate_old(af_array, da, glob_vec, local_vec):
 
 def communicate_new(af_array, da, glob_vec, local_vec):
     # af_array is (dof, N_q1, N_q2)
+#    af_array_1d = af.moddims(af_array,
+#                               (N_q1_local + 2*N_ghost)
+#			     * (N_q2_local + 2*N_ghost)
+#			     * dof
+#                            )
+#
+#    local_vec_array[:] = af_array_1d.to_ndarray()
     
     # First flatten af_array
     af_array_1d = af.moddims(af_array[:, 
@@ -94,27 +104,30 @@ def communicate_new(af_array, da, glob_vec, local_vec):
                             )
     
     # Convert to a np array and copy to petsc
-    global_vec_array[:] = af_array_1d.to_ndarray()
+    #global_vec_array[:] = af_array_1d.to_ndarray()
+    af_array_1d.to_ndarray(global_vec_array)
 
     # Communication: Global -> Local
     da.globalToLocal(glob_vec, local_vec)
 
-    # loval_vec_array_new now has the data. Need to convert to af
-    af_array_after_comm_1d = af.to_array(local_vec_array)
-    af_array_after_comm    = af.moddims(af_array_after_comm_1d,
-                                        dof,
-                                        N_q1_local + 2*N_ghost,
-                                        N_q2_local + 2*N_ghost
-                                       )
-
-    af.eval(af_array_after_comm)
+    # local_vec_array_new now has the data. Need to convert to af
+    af_array_after_comm_1d = af.interop.np_to_af_array(local_vec_array)
+#    af_array_after_comm    = af.moddims(af_array_after_comm_1d,
+#                                        dof,
+#                                        N_q1_local + 2*N_ghost,
+#                                        N_q2_local + 2*N_ghost
+#                                       )
+#
+#    af.eval(af_array_after_comm)
 
-    return(af_array_after_comm)
+#    return(af_array_after_comm)
 
-communicate_old(af_array_old, da, glob_vec, local_vec)
+#communicate_old(af_array_old, da, glob_vec, local_vec)
 communicate_new(af_array_new, da, glob_vec, local_vec)
 af.sync()
 
+af.print_mem_info()
+
 iters = 1
 
 tic_old = af.time()