[XPU][HOST] add unstack op and kernels (#4876) (#4892)

zhupengyang · web-flow · commit 9b9a31820259 · 2020-12-04T16:27:12.000+08:00
diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt
@@ -1,5 +1,6 @@
 message(STATUS "compile with lite host kernels")
 
+# basic kernels
 add_kernel(feed_compute_host Host basic SRCS feed_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(range_compute_host Host basic SRCS range_compute.cc DEPS ${lite_kernel_deps})
@@ -12,6 +13,9 @@ add_kernel(expand_as_compute_host Host basic SRCS expand_as_compute.cc DEPS ${li
 add_kernel(fill_constant_compute_host Host basic SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(fill_constant_batch_size_like_compute_host Host basic SRCS fill_constant_batch_size_like_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(stack_compute_host Host basic SRCS stack_compute.cc DEPS ${lite_kernel_deps})
+
+# extra kernels
+add_kernel(unstack_compute_host Host extra SRCS unstack_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(shape_compute_host Host extra SRCS shape_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(is_empty_compute_host Host extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps})
 add_kernel(crf_decoding_compute_host Host extra SRCS crf_decoding_compute.cc DEPS ${lite_kernel_deps})
diff --git a/lite/kernels/host/unstack_compute.cc b/lite/kernels/host/unstack_compute.cc
@@ -0,0 +1,68 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/host/unstack_compute.h"
+#include <cstring>
+#include <vector>
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+template <typename T, PrecisionType PType>
+void UnstackCompute<T, PType>::Run() {
+  auto& param = this->template Param<operators::UnstackParam>();
+  auto x = param.X;
+  auto outs = param.Out;
+  auto x_dims = x->dims();
+  int axis = param.axis;
+  if (axis < 0) {
+    axis += x_dims.size();
+  }
+
+  size_t stride_copy = 1;
+  for (size_t i = axis + 1; i < x_dims.size(); i++) {
+    stride_copy *= static_cast<size_t>(x_dims[i]);
+  }
+  size_t stride_move = stride_copy * static_cast<size_t>(x_dims[axis]);
+  size_t copy_times = static_cast<size_t>(x_dims.production()) / stride_move;
+
+  const T* x_data = x->template data<T>();
+  for (size_t i = 0; i < outs.size(); i++) {
+    const T* x_ptr = x_data + i * stride_copy;
+    T* out_ptr = outs[i]->template mutable_data<T>();
+    for (size_t j = 0; j < copy_times; j++) {
+      std::memcpy(out_ptr, x_ptr, sizeof(T) * stride_copy);
+      x_ptr += stride_move;
+      out_ptr += stride_copy;
+    }
+  }
+}
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+using unstack_float =
+    paddle::lite::kernels::host::UnstackCompute<float, PRECISION(kFloat)>;
+REGISTER_LITE_KERNEL(unstack, kHost, kFloat, kAny, unstack_float, def)
+    .BindInput("X",
+               {LiteType::GetTensorTy(
+                   TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
+    .BindOutput("Y",
+                {LiteType::GetTensorTy(
+                    TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kAny), -1)})
+    .Finalize();
diff --git a/lite/kernels/host/unstack_compute.h b/lite/kernels/host/unstack_compute.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+template <typename T, PrecisionType PType>
+class UnstackCompute
+    : public KernelLite<TARGET(kHost), PType, DATALAYOUT(kAny)> {
+ public:
+  void Run() override;
+
+  virtual ~UnstackCompute() = default;
+};
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/kernels/xpu/CMakeLists.txt b/lite/kernels/xpu/CMakeLists.txt
@@ -39,6 +39,7 @@ else()
   add_kernel(var_conv_2d_compute_xpu XPU extra SRCS var_conv_2d_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(search_grnn_compute_xpu XPU extra SRCS search_grnn_compute.cc DEPS ${lite_kernel_deps})
   add_kernel(sequence_unpad_compute_xpu XPU extra SRCS sequence_unpad_compute.cc DEPS ${lite_kernel_deps})
+  add_kernel(unstack_compute_xpu XPU extra SRCS unstack_compute.cc DEPS ${lite_kernel_deps})
 
   # extra(fused kernel)
   add_kernel(__xpu__resnet50_compute_xpu XPU extra SRCS __xpu__resnet50_compute.cc DEPS ${lite_kernel_deps})
diff --git a/lite/kernels/xpu/unstack_compute.cc b/lite/kernels/xpu/unstack_compute.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/xpu/unstack_compute.h"
+#include <vector>
+#include "lite/backends/xpu/xpu_header_sitter.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+void UnstackCompute::Run() {
+  auto& param = this->Param<param_t>();
+  auto& ctx = this->ctx_->As<XPUContext>();
+  auto& dout = param.Out;
+  auto in_dim = param.X->dims();
+  int axis = param.axis;
+  if (axis < 0) {
+    axis += in_dim.size();
+  }
+  int num = param.num;
+
+  int height = 1;
+  for (int i = 0; i < axis; i++) {
+    height = height * in_dim[i];
+  }
+
+  std::vector<float*> out_ptrs;
+  std::vector<int> width_out;
+
+  for (auto out : dout) {
+    out->set_lod(param.X->lod());
+    out_ptrs.push_back(out->mutable_data<float>(TARGET(kXPU)));
+    width_out.push_back(out->numel() / height);
+  }
+
+  int r = xdnn::concat_grad(ctx.GetRawContext(),
+                            height,
+                            width_out.data(),
+                            num,
+                            out_ptrs.data(),
+                            param.X->data<float>());
+  CHECK_EQ(r, 0);
+}
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(unstack,
+                     kXPU,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::xpu::UnstackCompute,
+                     def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .BindOutput("Y", {LiteType::GetTensorTy(TARGET(kXPU))})
+    .Finalize();
diff --git a/lite/kernels/xpu/unstack_compute.h b/lite/kernels/xpu/unstack_compute.h
@@ -0,0 +1,35 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "lite/core/kernel.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace xpu {
+
+class UnstackCompute : public KernelLite<TARGET(kXPU), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::UnstackParam;
+
+  virtual void Run();
+
+  virtual ~UnstackCompute() = default;
+};
+
+}  // namespace xpu
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt
@@ -123,6 +123,7 @@ add_operator(pixel_shuffle_op extra SRCS pixel_shuffle_op.cc DEPS ${op_DEPS})
 add_operator(clip_op extra SRCS clip_op.cc DEPS ${op_DEPS})
 add_operator(print_op extra SRCS print_op.cc DEPS ${op_DEPS})
 add_operator(scatter extra SRCS scatter_op.cc DEPS ${op_DEPS})
+add_operator(unstack_op extra SRCS unstack_op.cc DEPS ${op_DEPS})
 
 # for OCR specific
 add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS})
diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h
@@ -222,6 +222,15 @@ struct StackParam : ParamBase {
   int axis{0};
 };
 
+// For Unstack Op
+struct UnstackParam : ParamBase {
+  const lite::Tensor* X{nullptr};
+  std::vector<lite::Tensor*> Out{};
+
+  int axis{0};
+  int num{1};
+};
+
 // For Power Op
 struct PowerParam : ParamBase {
   const lite::Tensor* X{};
diff --git a/lite/operators/unstack_op.cc b/lite/operators/unstack_op.cc
@@ -0,0 +1,68 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/unstack_op.h"
+#include "lite/core/op_lite.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+bool UnstackOp::CheckShape() const {
+  CHECK(param_.X);
+  for (auto out : param_.Out) {
+    CHECK(out);
+  }
+  return true;
+}
+
+bool UnstackOp::InferShapeImpl() const {
+  auto x = param_.X;
+  auto outs = param_.Out;
+  int axis = param_.axis;
+  if (axis < 0) {
+    axis += x->dims().size();
+  }
+  int num = param_.num;
+  auto x_shape = x->dims().Vectorize();
+  CHECK_EQ(x_shape[axis], static_cast<int64_t>(num))
+      << "num(attr) should be equal to x_dims[axis]. But received x_dims: "
+      << x->dims() << ", axis: " << param_.axis << ", num: " << num;
+
+  auto out_shape = x_shape;
+  out_shape.erase(out_shape.begin() + axis);
+  for (auto out : outs) {
+    out->Resize(out_shape);
+  }
+  return true;
+}
+
+bool UnstackOp::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) {
+  param_.X = scope->FindTensor(op_desc.Input("X").front());
+  auto out_names = op_desc.Output("Y");
+  for (auto out_name : out_names) {
+    param_.Out.emplace_back(scope->FindMutableTensor(out_name));
+  }
+
+  param_.axis = op_desc.GetAttr<int>("axis");
+  param_.num = op_desc.GetAttr<int>("num");
+  return true;
+}
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_OP(unstack, paddle::lite::operators::UnstackOp);
diff --git a/lite/operators/unstack_op.h b/lite/operators/unstack_op.h
@@ -0,0 +1,45 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <string>
+#include "lite/core/op_lite.h"
+#include "lite/core/scope.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+class UnstackOp : public OpLite {
+ public:
+  UnstackOp() {}
+
+  explicit UnstackOp(const std::string &op_type) : OpLite(op_type) {}
+
+  bool CheckShape() const override;
+
+  bool InferShapeImpl() const override;
+
+  bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
+
+  void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
+  std::string DebugString() const override { return "unstack"; }
+
+ private:
+  mutable UnstackParam param_;
+};
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt
@@ -66,6 +66,7 @@ if(LITE_BUILD_EXTRA)
     lite_cc_test(test_kernel_clip_compute SRCS clip_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_pixel_shuffle_compute SRCS pixel_shuffle_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_scatter_compute SRCS scatter_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
+    lite_cc_test(test_kernel_unstack_compute SRCS unstack_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${huawei_ascend_npu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
     lite_cc_test(test_kernel_sequence_expand_as_compute SRCS sequence_expand_as_compute_test.cc DEPS arena_framework ${xpu_kernels} ${npu_kernels} ${rknpu_kernels} ${apu_kernels} ${x86_kernels} ${bm_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
 
     # for training kernel
diff --git a/lite/tests/kernels/unstack_compute_test.cc b/lite/tests/kernels/unstack_compute_test.cc