Skip to content

Commit ece208a

Browse files
committed
Rename CeedScalarCPU -> CeedScalarBase
1 parent 78ee22c commit ece208a

File tree

7 files changed

+65
-65
lines changed

7 files changed

+65
-65
lines changed

backends/cuda-gen/ceed-cuda-gen-operator-build.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,7 +1285,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
12851285
code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n";
12861286
code << tab << "// -----------------------------------------------------------------------------\n";
12871287
code << tab << "extern \"C\" __global__ void " << operator_name
1288-
<< "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarCPU *W, Points_Cuda "
1288+
<< "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarBase *W, Points_Cuda "
12891289
"points) {\n";
12901290
tab.push();
12911291

@@ -1295,11 +1295,11 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
12951295

12961296
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
12971297
if (eval_mode != CEED_EVAL_WEIGHT) { // Skip CEED_EVAL_WEIGHT
1298-
code << tab << "const CeedScalarCPU *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
1298+
code << tab << "const CeedScalarBase *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
12991299
}
13001300
}
13011301
for (CeedInt i = 0; i < num_output_fields; i++) {
1302-
code << tab << "CeedScalarCPU *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
1302+
code << tab << "CeedScalarBase *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
13031303
}
13041304

13051305
code << tab << "const CeedInt max_dim = " << max_dim << ";\n";
@@ -1698,8 +1698,8 @@ static int CeedOperatorBuildKernelAssemblyAtPoints_Cuda_gen(CeedOperator op, boo
16981698
code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n";
16991699
code << tab << "// -----------------------------------------------------------------------------\n";
17001700
code << tab << "extern \"C\" __global__ void " << operator_name
1701-
<< "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarCPU *W, Points_Cuda "
1702-
"points, CeedScalarCPU *__restrict__ values_array) {\n";
1701+
<< "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarBase *W, Points_Cuda "
1702+
"points, CeedScalarBase *__restrict__ values_array) {\n";
17031703
tab.push();
17041704

17051705
// Scratch buffers
@@ -1708,11 +1708,11 @@ static int CeedOperatorBuildKernelAssemblyAtPoints_Cuda_gen(CeedOperator op, boo
17081708

17091709
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
17101710
if (eval_mode != CEED_EVAL_WEIGHT) { // Skip CEED_EVAL_WEIGHT
1711-
code << tab << "const CeedScalarCPU *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
1711+
code << tab << "const CeedScalarBase *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
17121712
}
17131713
}
17141714
for (CeedInt i = 0; i < num_output_fields; i++) {
1715-
code << tab << "CeedScalarCPU *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
1715+
code << tab << "CeedScalarBase *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
17161716
}
17171717

17181718
code << tab << "const CeedInt max_dim = " << max_dim << ";\n";
@@ -2240,8 +2240,8 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Cuda_gen(CeedOpera
22402240
code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n";
22412241
code << tab << "// -----------------------------------------------------------------------------\n";
22422242
code << tab << "extern \"C\" __global__ void " << operator_name
2243-
<< "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarCPU *W, Points_Cuda "
2244-
"points, CeedScalarCPU *__restrict__ values_array) {\n";
2243+
<< "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarBase *W, Points_Cuda "
2244+
"points, CeedScalarBase *__restrict__ values_array) {\n";
22452245
tab.push();
22462246

22472247
// Scratch buffers
@@ -2250,11 +2250,11 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Cuda_gen(CeedOpera
22502250

22512251
CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
22522252
if (eval_mode != CEED_EVAL_WEIGHT) { // Skip CEED_EVAL_WEIGHT
2253-
code << tab << "const CeedScalarCPU *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
2253+
code << tab << "const CeedScalarBase *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
22542254
}
22552255
}
22562256
for (CeedInt i = 0; i < num_output_fields; i++) {
2257-
code << tab << "CeedScalarCPU *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
2257+
code << tab << "CeedScalarBase *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
22582258
}
22592259

22602260
code << tab << "const CeedInt max_dim = " << max_dim << ";\n";

include/ceed/ceed-f32.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@
2121
#if defined(CEED_RUNNING_JIT_PASS) && defined(CEED_JIT_PRECISION) && (CEED_JIT_PRECISION != CEED_SCALAR_TYPE)
2222
#ifdef CEED_JIT_PRECISION == CEED_SCALAR_FP64
2323
typedef double CeedScalar;
24-
typedef float CeedScalarCPU;
24+
typedef float CeedScalarBase;
2525

2626
/// Machine epsilon
2727
static const CeedScalar CEED_EPSILON = DBL_EPSILON;
2828
#endif // CEED_JIT_PRECISION
2929
#else
3030
typedef float CeedScalar;
31-
typedef CeedScalar CeedScalarCPU;
31+
typedef CeedScalar CeedScalarBase;
3232

3333
/// Machine epsilon
3434
static const CeedScalar CEED_EPSILON = FLT_EPSILON;

include/ceed/ceed-f64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@
2121
#if defined(CEED_RUNNING_JIT_PASS) && defined(CEED_JIT_PRECISION) && (CEED_JIT_PRECISION != CEED_SCALAR_TYPE)
2222
#if CEED_JIT_PRECISION == CEED_SCALAR_FP32
2323
typedef float CeedScalar;
24-
typedef double CeedScalarCPU;
24+
typedef double CeedScalarBase;
2525

2626
/// Machine epsilon
2727
static const CeedScalar CEED_EPSILON = FLT_EPSILON;
2828
#endif // CEED_JIT_PRECISION
2929
#else
3030
typedef double CeedScalar;
31-
typedef CeedScalar CeedScalarCPU;
31+
typedef CeedScalar CeedScalarBase;
3232

3333
/// Machine epsilon
3434
static const CeedScalar CEED_EPSILON = DBL_EPSILON;

include/ceed/jit-source/cuda/cuda-shared-basis-nontensor.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
//------------------------------------------------------------------------------
1616
// Interp kernels
1717
//------------------------------------------------------------------------------
18-
extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalarCPU *c_B, const CeedScalarCPU *__restrict__ d_U,
19-
CeedScalarCPU *__restrict__ d_V) {
18+
extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalarBase *c_B, const CeedScalarBase *__restrict__ d_U,
19+
CeedScalarBase *__restrict__ d_V) {
2020
extern __shared__ CeedScalar slice[];
2121

2222
SharedData_Cuda data;
@@ -42,8 +42,8 @@ extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalarCPU *c
4242
}
4343
}
4444

45-
extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalarCPU *c_B, const CeedScalarCPU *__restrict__ d_U,
46-
CeedScalarCPU *__restrict__ d_V) {
45+
extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalarBase *c_B, const CeedScalarBase *__restrict__ d_U,
46+
CeedScalarBase *__restrict__ d_V) {
4747
extern __shared__ CeedScalar slice[];
4848

4949
SharedData_Cuda data;
@@ -69,8 +69,8 @@ extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedSca
6969
}
7070
}
7171

72-
extern "C" __global__ void InterpTransposeAdd(const CeedInt num_elem, const CeedScalarCPU *c_B, const CeedScalarCPU *__restrict__ d_U,
73-
CeedScalarCPU *__restrict__ d_V) {
72+
extern "C" __global__ void InterpTransposeAdd(const CeedInt num_elem, const CeedScalarBase *c_B, const CeedScalarBase *__restrict__ d_U,
73+
CeedScalarBase *__restrict__ d_V) {
7474
extern __shared__ CeedScalar slice[];
7575

7676
SharedData_Cuda data;
@@ -99,8 +99,8 @@ extern "C" __global__ void InterpTransposeAdd(const CeedInt num_elem, const Ceed
9999
//------------------------------------------------------------------------------
100100
// Grad kernels
101101
//------------------------------------------------------------------------------
102-
extern "C" __global__ void Grad(const CeedInt num_elem, const CeedScalarCPU *c_G, const CeedScalarCPU *__restrict__ d_U,
103-
CeedScalarCPU *__restrict__ d_V) {
102+
extern "C" __global__ void Grad(const CeedInt num_elem, const CeedScalarBase *c_G, const CeedScalarBase *__restrict__ d_U,
103+
CeedScalarBase *__restrict__ d_V) {
104104
extern __shared__ CeedScalar slice[];
105105

106106
SharedData_Cuda data;
@@ -126,8 +126,8 @@ extern "C" __global__ void Grad(const CeedInt num_elem, const CeedScalarCPU *c_G
126126
}
127127
}
128128

129-
extern "C" __global__ void GradTranspose(const CeedInt num_elem, const CeedScalarCPU *c_G, const CeedScalarCPU *__restrict__ d_U,
130-
CeedScalarCPU *__restrict__ d_V) {
129+
extern "C" __global__ void GradTranspose(const CeedInt num_elem, const CeedScalarBase *c_G, const CeedScalarBase *__restrict__ d_U,
130+
CeedScalarBase *__restrict__ d_V) {
131131
extern __shared__ CeedScalar slice[];
132132

133133
SharedData_Cuda data;
@@ -153,8 +153,8 @@ extern "C" __global__ void GradTranspose(const CeedInt num_elem, const CeedScala
153153
}
154154
}
155155

156-
extern "C" __global__ void GradTransposeAdd(const CeedInt num_elem, const CeedScalarCPU *c_G, const CeedScalarCPU *__restrict__ d_U,
157-
CeedScalarCPU *__restrict__ d_V) {
156+
extern "C" __global__ void GradTransposeAdd(const CeedInt num_elem, const CeedScalarBase *c_G, const CeedScalarBase *__restrict__ d_U,
157+
CeedScalarBase *__restrict__ d_V) {
158158
extern __shared__ CeedScalar slice[];
159159

160160
SharedData_Cuda data;
@@ -183,7 +183,7 @@ extern "C" __global__ void GradTransposeAdd(const CeedInt num_elem, const CeedSc
183183
//------------------------------------------------------------------------------
184184
// Weight kernel
185185
//------------------------------------------------------------------------------
186-
extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalarCPU *__restrict__ q_weight, CeedScalarCPU *__restrict__ d_W) {
186+
extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalarBase *__restrict__ q_weight, CeedScalarBase *__restrict__ d_W) {
187187
extern __shared__ CeedScalar slice[];
188188

189189
SharedData_Cuda data;

include/ceed/jit-source/cuda/cuda-shared-basis-tensor-at-points.h

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
//------------------------------------------------------------------------------
2121
// Interp
2222
//------------------------------------------------------------------------------
23-
extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
24-
const CeedScalarCPU *__restrict__ d_X, const CeedScalarCPU *__restrict__ d_U,
25-
CeedScalarCPU *__restrict__ d_V) {
23+
extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
24+
const CeedScalarBase *__restrict__ d_X, const CeedScalarBase *__restrict__ d_U,
25+
CeedScalarBase *__restrict__ d_V) {
2626
extern __shared__ CeedScalar slice[];
2727

2828
SharedData_Cuda data;
@@ -76,9 +76,9 @@ extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScal
7676
}
7777
}
7878

79-
extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
80-
const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
81-
const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
79+
extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
80+
const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
81+
const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
8282
extern __shared__ CeedScalar slice[];
8383

8484
SharedData_Cuda data;
@@ -146,9 +146,9 @@ extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const
146146
}
147147
}
148148

149-
extern "C" __global__ void InterpTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
150-
const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
151-
const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
149+
extern "C" __global__ void InterpTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
150+
const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
151+
const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
152152
extern __shared__ CeedScalar slice[];
153153

154154
SharedData_Cuda data;
@@ -208,9 +208,9 @@ extern "C" __global__ void InterpTransposeAddAtPoints(const CeedInt num_elem, co
208208
//------------------------------------------------------------------------------
209209
// Grad
210210
//------------------------------------------------------------------------------
211-
extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
212-
const CeedScalarCPU *__restrict__ d_X, const CeedScalarCPU *__restrict__ d_U,
213-
CeedScalarCPU *__restrict__ d_V) {
211+
extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
212+
const CeedScalarBase *__restrict__ d_X, const CeedScalarBase *__restrict__ d_U,
213+
CeedScalarBase *__restrict__ d_V) {
214214
extern __shared__ CeedScalar slice[];
215215

216216
SharedData_Cuda data;
@@ -264,9 +264,9 @@ extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalar
264264
}
265265
}
266266

267-
extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
268-
const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
269-
const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
267+
extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
268+
const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
269+
const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
270270
extern __shared__ CeedScalar slice[];
271271

272272
SharedData_Cuda data;
@@ -335,9 +335,9 @@ extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const C
335335
}
336336
}
337337

338-
extern "C" __global__ void GradTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
339-
const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
340-
const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
338+
extern "C" __global__ void GradTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
339+
const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
340+
const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
341341
extern __shared__ CeedScalar slice[];
342342

343343
SharedData_Cuda data;

0 commit comments

Comments
 (0)