Skip to content

Perf boost [WIP] #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 181 additions & 86 deletions bolt/lib/nonlinear_solver/EM_fields_solver/electrostatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,38 @@
import arrayfire as af
import numpy as np
from numpy.fft import fftfreq


class Poisson2D(object):
import pylab as pl

pl.rcParams['figure.figsize'] = 17, 7.5
pl.rcParams['figure.dpi'] = 150
pl.rcParams['image.cmap'] = 'jet'
pl.rcParams['lines.linewidth'] = 1.5
pl.rcParams['font.family'] = 'serif'
pl.rcParams['font.weight'] = 'bold'
pl.rcParams['font.size'] = 20
pl.rcParams['font.sans-serif'] = 'serif'
pl.rcParams['text.usetex'] = True
pl.rcParams['axes.linewidth'] = 1.5
pl.rcParams['axes.titlesize'] = 'medium'
pl.rcParams['axes.labelsize'] = 'medium'

pl.rcParams['xtick.major.size'] = 8
pl.rcParams['xtick.minor.size'] = 4
pl.rcParams['xtick.major.pad'] = 8
pl.rcParams['xtick.minor.pad'] = 8
pl.rcParams['xtick.color'] = 'k'
pl.rcParams['xtick.labelsize'] = 'medium'
pl.rcParams['xtick.direction'] = 'in'

pl.rcParams['ytick.major.size'] = 8
pl.rcParams['ytick.minor.size'] = 4
pl.rcParams['ytick.major.pad'] = 8
pl.rcParams['ytick.minor.pad'] = 8
pl.rcParams['ytick.color'] = 'k'
pl.rcParams['ytick.labelsize'] = 'medium'
pl.rcParams['ytick.direction'] = 'in'

class poisson_eqn(object):
"""
This user class is an application context for the problem at hand;
It contains some parametes and frames the matrix system depending on
Expand All @@ -16,38 +45,89 @@ class Poisson2D(object):
using the PETSc's KSP solver methods
"""

def __init__(self, obj):
self.da = obj._da_ksp
self.obj = obj
self.localX = self.da.createLocalVec()

def RHS(self, rho, rho_array):
rho_val = self.da.getVecArray(rho)
rho_val[:] = rho_array

def mult(self, mat, X, Y):

self.da.globalToLocal(X, self.localX)

x = self.da.getVecArray(self.localX)
y = self.da.getVecArray(Y)

(q1_start, q1_end), (q2_start, q2_end) = self.da.getRanges()

for j in range(q1_start, q1_end):
for i in range(q2_start, q2_end):
u = x[j, i] # center
def __init__(self, nonlinear_solver_obj):
self.da = nonlinear_solver_obj._da_snes
self.obj = nonlinear_solver_obj
self.local_phi = self.da.createLocalVec() # phi with ghost zones
self.N_ghost = self.obj.N_ghost
self.dq1 = self.obj.dq1
self.dq2 = self.obj.dq2
self.density = 0.

((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = \
self.da.getCorners()

self.N_q1_local = N_q1_local
self.N_q2_local = N_q2_local

u_w = x[j, i - 1] # west
u_e = x[j, i + 1] # east
u_s = x[j - 1, i] # south
u_n = x[j + 1, i] # north
def compute_residual(self, snes, phi, residual):

u_xx = (-u_e + 2 * u - u_w) / self.obj.dq2**2
u_yy = (-u_n + 2 * u - u_s) / self.obj.dq1**2
self.da.globalToLocal(phi, self.local_phi)

y[j, i] = u_xx + u_yy
N_g = self.N_ghost

# Residual assembly using numpy
phi_array = self.local_phi.getArray(readonly=0)
phi_array = phi_array.reshape([self.N_q2_local + 2*N_g, \
self.N_q1_local + 2*N_g, 1], \
order='A'
)

residual_array = residual.getArray(readonly=0)
residual_array = residual_array.reshape([self.N_q2_local, \
self.N_q1_local, 1], \
order='A'
)

#phi_array[:N_g, :] = 0.
#phi_array[self.N_q2_local+N_g:, :] = 0.
#phi_array[:, :N_g] = 0.
#phi_array[:, self.N_q1_local+N_g:] = 0.

phi_plus_x = np.roll(phi_array, shift=-1, axis=1)
phi_minus_x = np.roll(phi_array, shift=1, axis=1)
phi_plus_y = np.roll(phi_array, shift=-1, axis=0)
phi_minus_y = np.roll(phi_array, shift=1, axis=0)

d2phi_dx2 = (phi_minus_x - 2.*phi_array + phi_plus_x)/self.dq1**2.
d2phi_dy2 = (phi_minus_y - 2.*phi_array + phi_plus_y)/self.dq2**2.

laplacian_phi = d2phi_dx2 + d2phi_dy2

density_af = af.moddims(self.density,
(self.N_q1_local+2*N_g)
* (self.N_q2_local+2*N_g)
)
density_np = density_af.to_ndarray()
density_np = density_np.reshape([self.N_q2_local + 2*N_g, \
self.N_q1_local + 2*N_g, 1], \
order='A'
)

residual_array[:, :] = \
(laplacian_phi + density_np)[N_g:-N_g, N_g:-N_g]

# residual_array[:, :] = \
# (laplacian_phi + self.density)[N_g:-N_g, N_g:-N_g]


# Residual assembly using arrayfire
# phi_array = self.local_phi.getArray(readonly=1)
# phi_af_array = af.to_array(phi_array)
# phi_af_array = af.moddims(phi_af_array,
# self.N_q1_local + 2*N_g,
# self.N_q2_local + 2*N_g
# )
#
# residual_af_array = phi_af_array[N_g:-N_g, N_g:-N_g]**2. - 2.
# residual_af_array = af.moddims(residual_af_array,
# self.N_q1_local
# * self.N_q2_local
# )
# residual_array = residual.getArray(readonly=0)
# residual_array[:] = residual_af_array.to_ndarray()

return

def compute_electrostatic_fields(self, performance_test_flag = False):

Expand All @@ -57,64 +137,51 @@ def compute_electrostatic_fields(self, performance_test_flag = False):
# Obtaining the left-bottom corner coordinates
# (lowest values of the canonical coordinates in the local zone)
# Additionally, we also obtain the size of the local zone
((i_q1_lowest, i_q2_lowest), (N_q1_local,N_q2_local)) = self._da_ksp.getCorners()

pde = Poisson2D(self)
phi = self._da_ksp.createGlobalVec()
rho = self._da_ksp.createGlobalVec()

phi_local = self._da_ksp.createLocalVec()

A = PETSc.Mat().createPython([phi.getSizes(), rho.getSizes()],
comm=self._da_ksp.comm
)
A.setPythonContext(pde)
A.setUp()

ksp = PETSc.KSP().create()

ksp.setOperators(A)
ksp.setType('cg')

pc = ksp.getPC()
pc.setType('none')

((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_snes.getCorners()

N_g = self.N_ghost
ksp.setTolerances(atol=1e-7)
pde.RHS(rho,
self.physical_system.params.charge_electron
* np.array(self.compute_moments('density')[N_g:-N_g,
N_g:-N_g
]
- 1
)
)

ksp.solve(rho, phi)

num_tries = 0
while(ksp.converged is not True):

ksp.setTolerances(atol = 10**(-6+num_tries), rtol = 10**(-6+num_tries))
ksp.solve(rho, phi)
num_tries += 1

if(num_tries == 5):
raise Exception('KSP solver diverging!')

self._da_ksp.globalToLocal(phi, phi_local)

snes = PETSc.SNES().create()
pde = poisson_eqn(self)
snes.setFunction(pde.compute_residual, self.glob_residual)

snes.setDM(self._da_snes)
snes.setFromOptions()
pde.density = self.compute_moments('density')
snes.solve(None, self.glob_phi)

#phi_array = self.glob_phi.getArray()
#print("phi = ", phi_array)
#phi_array = phi_array.reshape([N_q2_local, \
# N_q1_local, 1], \
# order='A'
# )

self._da_snes.globalToLocal(self.glob_phi, pde.local_phi)
phi_local_array = pde.local_phi.getArray()
electric_potential = af.to_array(phi_local_array)
phi_local_array = phi_local_array.reshape([N_q2_local + 2*N_g, \
N_q1_local + 2*N_g], \
)
density_af = af.moddims(pde.density,
(N_q1_local+2*N_g)
* (N_q2_local+2*N_g)
)
density_np = density_af.to_ndarray()
density_np = density_np.reshape([N_q2_local + 2*N_g, \
N_q1_local + 2*N_g], \
)
# Since rho was defined at (i + 0.5, j + 0.5)
# Electric Potential returned will also be at (i + 0.5, j + 0.5)
electric_potential = af.to_array(np.swapaxes(phi_local[:].
reshape( N_q2_local
+ 2 * self.N_ghost,
N_q1_local +
+ 2 * self.N_ghost
),
0, 1
)
)
# electric_potential = af.to_array(np.swapaxes(phi_local_array,
# 0, 1
# )
# )

electric_potential = af.moddims(electric_potential,
N_q1_local + 2*N_g,
N_q2_local + 2*N_g
)

# Obtaining the values at (i+0.5, j+0.5):
self.E1 = -( af.shift(electric_potential, -1)
Expand All @@ -127,6 +194,34 @@ def compute_electrostatic_fields(self, performance_test_flag = False):

af.eval(self.E1, self.E2)

q2_minus = 0.25
q2_plus = 0.75

E2_expected = -0.5/20 * ( af.log(af.cosh(( self.q2 - q2_minus)*20))
- af.log(af.cosh(( self.q2 - q2_plus )*20))
)

pl.subplot(121)
pl.contourf(
#np.array(self.E2)[N_g:-N_g, N_g:-N_g], 100
density_np[N_g:-N_g, N_g:-N_g], 100
)
pl.colorbar()
#pl.axis('equal')
pl.title(r'Density')
#pl.title(r'$E^2_{numerical}$')
pl.subplot(122)
pl.contourf(
#np.array(E2_expected)[N_g:-N_g, N_g:-N_g], 100
phi_local_array[N_g:-N_g, N_g:-N_g], 100
)
pl.colorbar()
pl.title(r'$\phi$')
#pl.title(r'$E^2_{analytic}$')
#pl.axis('equal')
pl.show()


if(performance_test_flag == True):
af.sync()
toc = af.time()
Expand All @@ -152,7 +247,7 @@ def fft_poisson(self, performance_test_flag = False):
else:
N_g = self.N_ghost
rho = self.physical_system.params.charge_electron \
* self.compute_moments('density')[N_g:-N_g, N_g:-N_g]
* (self.compute_moments('density')[N_g:-N_g, N_g:-N_g])

k_q1 = fftfreq(rho.shape[0], self.dq1)
k_q2 = fftfreq(rho.shape[1], self.dq2)
Expand Down
27 changes: 17 additions & 10 deletions bolt/lib/nonlinear_solver/nonlinear_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,17 @@ def __init__(self, physical_system):

# Additionally, a DA object also needs to be created for the KSP solver
# with a DOF of 1:
self._da_ksp = PETSc.DMDA().create([self.N_q1, self.N_q2],
stencil_width = self.N_ghost,
boundary_type = (self.bc_in_q1,
self.bc_in_q2
),
proc_sizes = (PETSc.DECIDE,
PETSc.DECIDE
),
stencil_type = 1,
comm = self._comm)
self._da_snes = PETSc.DMDA().create([self.N_q1, self.N_q2],
stencil_width = self.N_ghost,
boundary_type = (self.bc_in_q1,
self.bc_in_q2
),
proc_sizes = (PETSc.DECIDE,
PETSc.DECIDE
),
stencil_type = 1,
dof = 1,
comm = self._comm)

self._da_dump_moments = PETSc.DMDA().create([self.N_q1, self.N_q2],
dof = len(self.
Expand All @@ -174,6 +175,12 @@ def __init__(self, physical_system):
# the communication routines for EM fields
self._glob_fields = self._da_fields.createGlobalVec()
self._local_fields = self._da_fields.createLocalVec()

self.glob_phi = self._da_snes.createGlobalVec()
self.glob_residual = self._da_snes.createGlobalVec()
self.glob_phi.set(0.)
self.glob_residual.set(0.)


# The following vector is used to dump the data to file:
self._glob_moments = self._da_dump_moments.createGlobalVec()
Expand Down
Loading