Skip to content

Commit 30c925e

Browse files
committed
Merge branch 'multnormal_api' of https://github.com/dasenCoding/Paddle into multnormal_api
2 parents 29b0578 + 006d08a commit 30c925e

File tree

1,202 files changed

+32739
-30167
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,202 files changed

+32739
-30167
lines changed

.flake8

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ ignore =
2323

2424
# F, see https://flake8.pycqa.org/en/latest/user/error-codes.html
2525
F405,
26-
F811,F821,F841,
26+
F811,F841,
2727

2828
# W, see https://pycodestyle.pycqa.org/en/latest/intro.html#error-codes
2929
W503
@@ -33,3 +33,7 @@ per-file-ignores =
3333
python/paddle/fluid/tests/unittests/collective/fleet/test_hdfs1.py:E101,W191
3434
# Ignore unused imports in __init__.py
3535
__init__.py: F401
36+
# Ignore undefined variables in CMake config and some dygraph_to_static tests
37+
.cmake-format.py: F821
38+
python/paddle/fluid/tests/unittests/dygraph_to_static/test_loop.py: F821
39+
python/paddle/fluid/tests/unittests/dygraph_to_static/test_closure_analysis.py: F821

AUTHORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ This is an incomplete list of authors of [Paddle](https://github.com/PaddlePaddl
2020
| dragonwarrior | Long Wang |
2121
| dyning | Yuning Du |
2222
| emailweixu | Wei Xu |
23+
| engineer1109 | Jia-Liang Wang |
2324
| gangliao | Gang Liao |
2425
| gongweibao | Wei-Bao Gong |
2526
| guru4elephant | Daxiang Dong |

cmake/external/xpu.cmake

Lines changed: 12 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,80 +10,55 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
1010
if(NOT DEFINED XPU_BASE_URL)
1111
set(XPU_BASE_URL_WITHOUT_DATE
1212
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
13-
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221110")
13+
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221124")
1414
else()
1515
set(XPU_BASE_URL "${XPU_BASE_URL}")
1616
endif()
1717

18-
# ubuntu and centos: use output by XDNN API team
19-
if(NOT DEFINED XPU_XDNN_BASE_URL)
20-
set(XPU_XDNN_BASE_URL_WITHOUT_DATE
21-
"https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
22-
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20221109")
23-
else()
24-
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
25-
endif()
26-
2718
set(XPU_XCCL_BASE_URL
2819
"https://klx-sdk-release-public.su.bcebos.com/xccl/release/1.0.0")
2920

3021
if(WITH_AARCH64)
3122
set(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
3223
set(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
3324
set(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64")
34-
set(XPU_XDNN_URL
35-
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
36-
CACHE STRING "" FORCE)
3725
elseif(WITH_SUNWAY)
3826
set(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
3927
set(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
4028
set(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64")
41-
set(XPU_XDNN_URL
42-
"${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
43-
CACHE STRING "" FORCE)
4429
elseif(WITH_BDCENTOS)
4530
set(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
4631
set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
4732
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
48-
# ubuntu and centos: use output by XDNN API team
49-
set(XPU_XDNN_URL
50-
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
51-
CACHE STRING "" FORCE)
5233
elseif(WITH_UBUNTU)
5334
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
5435
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
5536
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64")
56-
# ubuntu and centos: use output by XDNN API team
57-
set(XPU_XDNN_URL
58-
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
59-
CACHE STRING "" FORCE)
6037
elseif(WITH_CENTOS)
6138
set(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
62-
set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
39+
set(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
6340
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
64-
# ubuntu and centos: use output by XDNN API team
65-
set(XPU_XDNN_URL
66-
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
67-
CACHE STRING "" FORCE)
6841
else()
6942
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
7043
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
7144
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64")
72-
# default: use output by XDNN API team
73-
set(XPU_XDNN_URL
74-
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
75-
CACHE STRING "" FORCE)
7645
endif()
7746

7847
set(XPU_XRE_URL
7948
"${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz"
8049
CACHE STRING "" FORCE)
50+
set(XPU_XDNN_URL
51+
"${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
52+
CACHE STRING "" FORCE)
8153
set(XPU_XCCL_URL
8254
"${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz"
8355
CACHE STRING "" FORCE)
8456
set(XPU_PACK_DEPENCE_URL
8557
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh"
8658
CACHE STRING "" FORCE)
59+
set(XPU_CHECK_DEPENCE_URL
60+
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/check_xpu_dependence.sh"
61+
CACHE STRING "" FORCE)
8762

8863
set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
8964
set(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}")
@@ -108,9 +83,10 @@ ExternalProject_Add(
10883
PREFIX ${SNAPPY_PREFIX_DIR}
10984
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
11085
DOWNLOAD_COMMAND
111-
wget ${XPU_PACK_DEPENCE_URL} && bash pack_paddle_depence.sh ${XPU_XRE_URL}
112-
${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL}
113-
${XPU_XCCL_DIR_NAME}
86+
wget ${XPU_CHECK_DEPENCE_URL} && bash check_xpu_dependence.sh
87+
${XPU_BASE_URL} ${XPU_XCCL_BASE_URL} && wget ${XPU_PACK_DEPENCE_URL} && bash
88+
pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL}
89+
${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME}
11490
DOWNLOAD_NO_PROGRESS 1
11591
UPDATE_COMMAND ""
11692
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}

paddle/fluid/distributed/collective/BKCLTools.h

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -77,23 +77,11 @@ class XPUEventManager {
7777
device_index_));
7878

7979
platform::XPUDeviceGuard guard(device_index_);
80-
PADDLE_ENFORCE_XPU_SUCCESS(xpu_event_record(event_, ctx.stream()));
80+
// TODO(zhangxiaoci) temporary solution: xpu::event seems buggy
81+
PADDLE_ENFORCE_XPU_SUCCESS(xpu_wait(ctx.stream()));
8182
}
8283

83-
void Block(const XPUContext& ctx) const {
84-
if (is_created_) {
85-
auto device_index = ctx.GetPlace().device;
86-
PADDLE_ENFORCE_EQ(device_index,
87-
device_index_,
88-
platform::errors::PreconditionNotMet(
89-
"XPUContext's device %d does not match"
90-
"Event's device %d",
91-
device_index,
92-
device_index_));
93-
platform::XPUDeviceGuard guard(device_index_);
94-
PADDLE_ENFORCE_XPU_SUCCESS(xpu_stream_wait_event(ctx.stream(), event_));
95-
}
96-
}
84+
void Block(const XPUContext& ctx) const {}
9785

9886
private:
9987
bool is_created_{false};

paddle/fluid/distributed/collective/NCCLTools.cc

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,109 @@ std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID) {
4444
return oss.str();
4545
}
4646

47+
void StaticCheckTensor(const phi::DenseTensor& tensor,
48+
int rank,
49+
int world_size) {
50+
// place check
51+
PADDLE_ENFORCE_EQ(
52+
platform::is_gpu_place(tensor.place()),
53+
true,
54+
platform::errors::InvalidArgument("Tensor should be in GPU place."));
55+
// rank check
56+
PADDLE_ENFORCE_GE(rank,
57+
0,
58+
platform::errors::InvalidArgument(
59+
"Rank should be greater than or equal to 0."));
60+
PADDLE_ENFORCE_LT(
61+
rank,
62+
world_size,
63+
platform::errors::InvalidArgument("Rank is out of the process group."));
64+
}
65+
66+
// static check for collective
67+
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
68+
const phi::DenseTensor& in_tensor,
69+
int rank,
70+
int world_size,
71+
int out_size_factor,
72+
int in_size_factor) {
73+
// place check
74+
PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_tensor.place()),
75+
true,
76+
platform::errors::InvalidArgument(
77+
"Output tensor should be in GPU place."));
78+
PADDLE_ENFORCE_EQ(platform::is_gpu_place(in_tensor.place()),
79+
true,
80+
platform::errors::InvalidArgument(
81+
"Input tensor should be in GPU place."));
82+
// rank check
83+
PADDLE_ENFORCE_GE(rank,
84+
0,
85+
platform::errors::InvalidArgument(
86+
"Rank should be greater than or equal to 0."));
87+
PADDLE_ENFORCE_LT(
88+
rank,
89+
world_size,
90+
platform::errors::InvalidArgument("Rank is out of the process group."));
91+
// shape check
92+
int64_t out_size = out_tensor.numel();
93+
PADDLE_ENFORCE_GT(out_size,
94+
0,
95+
platform::errors::InvalidArgument(
96+
"Size of output tensor should be greater than 0."));
97+
int64_t in_size = in_tensor.numel();
98+
PADDLE_ENFORCE_GT(in_size,
99+
0,
100+
platform::errors::InvalidArgument(
101+
"Size of input tensor should be greater than 0."));
102+
PADDLE_ENFORCE_EQ(
103+
out_size * out_size_factor,
104+
in_size * in_size_factor,
105+
platform::errors::InvalidArgument(
106+
"Input and output tensors should have matching sizes."));
107+
// dtype check
108+
PADDLE_ENFORCE_EQ(
109+
out_tensor.dtype(),
110+
in_tensor.dtype(),
111+
platform::errors::InvalidArgument(
112+
"Input and output tensors should have the same data type."));
113+
}
114+
115+
void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
116+
const phi::DenseTensor& in_tensor,
117+
int rank,
118+
int world_size) {
119+
StaticCheckTensors(out_tensor,
120+
in_tensor,
121+
rank,
122+
world_size,
123+
/*out_size_factor*/ 1,
124+
/*in_size_factor*/ 1);
125+
}
126+
127+
void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
128+
const phi::DenseTensor& in_tensor,
129+
int rank,
130+
int world_size) {
131+
StaticCheckTensors(out_tensor,
132+
in_tensor,
133+
rank,
134+
world_size,
135+
/*out_size_factor*/ world_size,
136+
/*in_size_factor*/ 1);
137+
}
138+
139+
void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
140+
const phi::DenseTensor& in_tensor,
141+
int rank,
142+
int world_size) {
143+
StaticCheckTensors(out_tensor,
144+
in_tensor,
145+
rank,
146+
world_size,
147+
/*out_size_factor*/ 1,
148+
/*in_size_factor*/ world_size);
149+
}
150+
47151
} // namespace distributed
48152
} // namespace paddle

paddle/fluid/distributed/collective/NCCLTools.h

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
namespace paddle {
4848
namespace distributed {
4949

50-
#define NCCLCHECK(cmd) \
50+
#define NCCL_CHECK(cmd) \
5151
do { \
5252
ncclResult_t r = cmd; \
5353
if (r != ncclSuccess) { \
@@ -60,7 +60,35 @@ namespace distributed {
6060
} while (0)
6161

6262
ncclRedOp_t ToNCCLRedType(ReduceOp reduction);
63+
6364
std::string SerializeNCCLUniqueId(const ncclUniqueId& ncclID);
6465

66+
// static check for p2p
67+
void StaticCheckTensor(const phi::DenseTensor& tensor,
68+
int rank,
69+
int world_size);
70+
71+
// static check for collective
72+
void StaticCheckTensors(const phi::DenseTensor& out_tensor,
73+
const phi::DenseTensor& in_tensor,
74+
int rank,
75+
int world_size,
76+
int out_size_factor,
77+
int in_size_factor);
78+
79+
void StaticCheckTensorsSameShape(const phi::DenseTensor& out_tensor,
80+
const phi::DenseTensor& in_tensor,
81+
int rank,
82+
int world_size);
83+
84+
void StaticCheckTensorsScatterLikeShape(const phi::DenseTensor& out_tensor,
85+
const phi::DenseTensor& in_tensor,
86+
int rank,
87+
int world_size);
88+
89+
void StaticCheckTensorsGatherLikeShape(const phi::DenseTensor& out_tensor,
90+
const phi::DenseTensor& in_tensor,
91+
int rank,
92+
int world_size);
6593
} // namespace distributed
6694
} // namespace paddle

paddle/fluid/distributed/collective/ProcessGroup.cc

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,6 @@ void ProcessGroup::Task::Synchronize() {}
3535

3636
void ProcessGroup::Task::UpdateWaitChain(const phi::DeviceContext& ctx) {}
3737

38-
ProcessGroup::ProcessGroup(int rank,
39-
int size,
40-
const platform::Place& place,
41-
int gid)
42-
: rank_(rank), size_(size), place_(place), gid_(gid) {
43-
if (gid != IGNORE_ID) {
44-
auto map = ProcessGroupMapFromGid::getInstance();
45-
map->insert(gid_, this);
46-
}
47-
}
48-
4938
ProcessGroup::ProcessGroup(int rank, int size, int gid)
5039
: rank_(rank), size_(size), gid_(gid) {
5140
if (gid != IGNORE_ID) {
@@ -66,5 +55,10 @@ ProcessGroup::Task::Task(int rank,
6655
bool sync_op)
6756
: rank_(rank), comm_type_(comm_type), sync_op_(sync_op) {}
6857

58+
ProcessGroupIdMap& ProcessGroupIdMap::GetInstance() {
59+
static ProcessGroupIdMap instance;
60+
return instance;
61+
}
62+
6963
} // namespace distributed
7064
} // namespace paddle

0 commit comments

Comments
 (0)