Fix partition (#675)

Rubtsowa · web-flow · commit a152e416c38b · 2021-04-21T12:02:36.000-05:00
* fix partition
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
@@ -328,14 +328,15 @@ INP_DLLEXPORT void dpnp_sum_c(void* result_out,
  * @ingroup BACKEND_API
  * @brief Place of array elements
  *
- * @param [in]  sort_array  Input sorted array.
+ * @param [in]  array       Input array.
+ * @param [in]  array2      Copy input array.
  * @param [out]  result     Result array.
  * @param [in]  kth         Element index to partition by.
  * @param [in]  shape       Shape of input array.
  * @param [in]  ndim        Number of elements in shape.
  */
 template <typename _DataType>
-INP_DLLEXPORT void dpnp_partition_c(const void* sort_array, void* result, const size_t kth, const size_t* shape, const size_t ndim);
+INP_DLLEXPORT void dpnp_partition_c(void* array, void* array2, void* result, const size_t kth, const size_t* shape, const size_t ndim);
 
 /**
  * @ingroup BACKEND_API
diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
@@ -87,54 +87,92 @@ template <typename _DataType>
 class dpnp_partition_c_kernel;
 
 template <typename _DataType>
-void dpnp_partition_c(const void* sort_array1_in, void* result1, const size_t kth, const size_t* shape, const size_t ndim)
+void dpnp_partition_c(void* array1_in, void* array2_in, void* result1, const size_t kth, const size_t* shape_, const size_t ndim)
 {
+    _DataType* arr = reinterpret_cast<_DataType*>(array1_in);
+    _DataType* arr2 = reinterpret_cast<_DataType*>(array2_in);
+    _DataType* result = reinterpret_cast<_DataType*>(result1);
 
-    cl::sycl::event event;
+    if ((arr == nullptr) || (result == nullptr))
+    {
+        return;
+    }
 
-    const _DataType* sort_arr = reinterpret_cast<const _DataType*>(sort_array1_in);
-    _DataType* result = reinterpret_cast<_DataType*>(result1);
+    if (ndim < 1)
+    {
+        return;
+    }
 
-    size_t size_ = 1;
-    for (size_t i = 0; i < ndim - 1; ++i)
+    size_t size = 1;
+    for (size_t i = 0; i < ndim; ++i)
     {
-        size_ *= shape[i];
+        size *= shape_[i];
     }
 
+    size_t size_ = size/shape_[ndim-1];
+
     if (size_ == 0)
     {
         return;
     }
 
+    auto arr_to_result_event = DPNP_QUEUE.memcpy(result, arr, size * sizeof(_DataType));
+    arr_to_result_event.wait();
+
+    for (size_t i = 0; i < size_; ++i)
+    {
+        size_t ind_begin = i * shape_[ndim-1];
+        size_t ind_end = (i + 1) * shape_[ndim-1] - 1;
+
+        _DataType matrix[shape_[ndim-1]];
+        for (size_t j = ind_begin; j < ind_end + 1; ++j)
+        {
+            size_t ind = j - ind_begin;
+            matrix[ind] = arr2[j];
+        }
+        std::partial_sort(matrix, matrix + shape_[ndim-1], matrix + shape_[ndim-1]);
+        for (size_t j = ind_begin; j < ind_end + 1; ++j)
+        {
+            size_t ind = j - ind_begin;
+            arr2[j] = matrix[ind];
+        }
+    }
+
+    size_t* shape = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
+    auto memcpy_event = DPNP_QUEUE.memcpy(shape, shape_, ndim * sizeof(size_t));
+
+    memcpy_event.wait();
+
     cl::sycl::range<2> gws(size_, kth+1);
     auto kernel_parallel_for_func = [=](cl::sycl::id<2> global_id) {
         size_t j = global_id[0];
         size_t k = global_id[1];
 
-        _DataType val = sort_arr[j * shape[ndim - 1] + k];
+        _DataType val = arr2[j * shape[ndim - 1] + k];
 
-        size_t ind = j * shape[ndim - 1] + k;
         for (size_t i = 0; i < shape[ndim - 1]; ++i)
         {
             if (result[j * shape[ndim - 1] + i] == val)
             {
-                ind = j * shape[ndim - 1] + i;
-                break;
+                _DataType change_val1 = result[j * shape[ndim - 1] + i];
+                _DataType change_val2 = result[j * shape[ndim - 1] + k];
+                result[j * shape[ndim - 1] + k] = change_val1;
+                result[j * shape[ndim - 1] + i] = change_val2;
             }
         }
 
-        _DataType change_val = result[j * shape[ndim - 1] + k];
-        result[j * shape[ndim - 1] + k] = val;
-        result[ind] = change_val;
     };
 
     auto kernel_func = [&](cl::sycl::handler& cgh) {
+        cgh.depends_on({memcpy_event});
         cgh.parallel_for<class dpnp_partition_c_kernel<_DataType>>(gws, kernel_parallel_for_func);
     };
 
-    event = DPNP_QUEUE.submit(kernel_func);
+    auto event = DPNP_QUEUE.submit(kernel_func);
 
     event.wait();
+
+    dpnp_memory_free_c(shape);
 }
 
 template <typename _DataType>
diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pyx b/dpnp/dpnp_algo/dpnp_algo_sorting.pyx
@@ -44,7 +44,7 @@ __all__ += [
 ]
 
 
-ctypedef void(*fptr_dpnp_partition_t)(const void * , void * , const size_t , const size_t * , const size_t)
+ctypedef void(*fptr_dpnp_partition_t)(void * , void * , void * , const size_t , const size_t * , const size_t)
 
 
 cpdef dparray dpnp_argsort(dparray in_array1):
@@ -58,12 +58,12 @@ cpdef dparray dpnp_partition(dparray arr, int kth, axis=-1, kind='introselect',
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_PARTITION, param1_type, param1_type)
 
     result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
-    cdef dparray result = dpnp.copy(arr)
-    cdef dparray sort_arr = dpnp.sort(arr)
+    cdef dparray arr2 = dpnp.copy(arr)
+    cdef dparray result = dparray(arr.shape, dtype=result_type)
 
     cdef fptr_dpnp_partition_t func = <fptr_dpnp_partition_t > kernel_data.ptr
 
-    func(sort_arr.get_data(), result.get_data(), kth_, < size_t * > arr._dparray_shape.data(), arr.ndim)
+    func(arr.get_data(), arr2.get_data(), result.get_data(), kth_, < size_t * > arr._dparray_shape.data(), arr.ndim)
 
     return result