-
Notifications
You must be signed in to change notification settings - Fork 0
Description
I would like to use pressio-tools for a parallel svd in C++.
Eventually I would like to run on hundreds of cores.
The demos/svd_parallel_call_from_python/main.py programs works.
I have translated it to C++.
For some reason the call to computeThin is causing errors.
Please help me understand what is wrong ?
mpirun -n 2 ./svdc
*** Error in `./svdc': free(): invalid next size (normal): 0x0000000002806d10 ***
after computeThin
after computeThin
======= Backtrace: =========
/lib64/libc.so.6(+0x81329)[0x7f23a7821329]
./svdc[0x448207]
./svdc[0x441851]
./svdc[0x43aeea]
./svdc[0x42309d]
./svdc[0x423181]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f23a77c2555]
./svdc[0x422da9]
======= Memory map: ========
00400000-00421000 r--p 00000000 fd:03 42477065 /home/dh/trilinos/pressio-tools/demos/svd_parallel_call_from_python/svdc
00421000-004a0000 r-xp 00021000 fd:03 42477065 /home/dh/trilinos/pressio-tools/demos/svd_parallel_call_from_python/svdc
...
I am using.
A single 4 core machine.
pressiotools.version
'0.6.1rc1'
pressio-tools cloned 4/5/22
g++ 9.3.1-2
openmpi (OpenRTE) 1.10.7
trilinos 13-2-0
cat svdc.cpp
#define PRESSIOTOOLS_ENABLE_TPL_MPI 1
#include <cstdlib>
#include <iostream>
#include <types.hpp>
#include <mpi.h>
#include "./data_structures/vector.hpp"
#include "./data_structures/multivector.hpp"
#include <tsqr.hpp>
#include <Teuchos_SerialDenseMatrix.hpp>
#include <svd.hpp>
#include <xlnt/xlnt.hpp>
namespace py = pybind11;
double randf(){
return (double)rand()/RAND_MAX;
}
void run(MPI_Comm comm)
{
/*
this demo shows how to compute the parallel SVD
over N ranks of a block-row distributed random matrix A.
Suppose A is a 100x5 matrix such that it is
block-row distributed over multiple MPI ranks.
For example, suppose have 4 ranks:
0 j 4
i=0 ----------------
rank=0
24 ----------------
rank=1
49 ----------------
rank=2
74 ----------------
rank=3
i=99 ----------------
Each rank owns 25 rows and all columns of A.
*/
int rank;
MPI_Comm_rank(comm, &rank);
// fix seed for reproducibility
std::srand(1);
// np.random.seed(312367)
// create the local piece for each rank:
// here for simplicity we use random numbers, but one can
// fill each piece by reading a file or some other way.
// Note: the layout MUST be fortran such that
// pressiotools can view it without copying data.
// This is important to reduced the memory footprint
// especially for large matrices.
#define HEIGHT 25
#define WIDTH 5
// allocation must be in fortran order
pressiotools::py_f_arr local_data({ HEIGHT, WIDTH });
auto r = local_data.mutable_unchecked<2>();
for (py::ssize_t k = 0; k < r.shape(2); k++)
for (py::ssize_t j = 0; j < r.shape(1); j++)
r(j, k) = randf();
// use the local piece to create a distributed multivector
// the multivector is an abstraction that allows us
// to easily perform all the computations behind the scenes
// in native C++ without performance hit
auto A = pressiotools::MultiVector(local_data);
pressiotools::Svd svdO;
// If a return is placed here there are no errors.
// return
//The following line causes errors.
svdO.computeThin(A);
std::cout << "after computeThin" << std::endl;
return;
#if 0
// U is a numpy array viewing the local piece. U is the largest returned array
// so it is not replicated.
auto U = svdO.viewLocalLeftSingVectors();
// S contains the sing values, replicated on each rank
auto S = svdO.viewSingularValues();
// VT contains the transpose of the right-vectors, replicated on each rank
auto VT = svdO.viewRightSingVectorsTransposed();
//print("Rank = {}, U.shape = {}, S.shape = {}, VT.shape = {}".format(
// rank, U.shape, S.shape, VT.shape))
//if rank==0: print("singular values = {}".format(S))
#endif
}
//run with: mpirun -np 2 svdc
int main(int argc, char *argv[])
{
Py_Initialize();
MPI_Init(&argc, &argv);
MPI_Comm comm = MPI_COMM_WORLD;
run(comm);
MPI_Finalize();
Py_Finalize();
return 0;
}