Rename CeedScalarCPU -> CeedScalarBase

zatkins-dev · zatkins-dev · commit ece208abd3a6 · 2025-07-24T14:07:57.000-06:00
diff --git a/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp b/backends/cuda-gen/ceed-cuda-gen-operator-build.cpp
@@ -1285,7 +1285,7 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
   code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n";
   code << tab << "// -----------------------------------------------------------------------------\n";
   code << tab << "extern \"C\" __global__ void " << operator_name
-       << "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarCPU *W, Points_Cuda "
+       << "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarBase *W, Points_Cuda "
           "points) {\n";
   tab.push();
 
@@ -1295,11 +1295,11 @@ extern "C" int CeedOperatorBuildKernel_Cuda_gen(CeedOperator op, bool *is_good_b
 
     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
     if (eval_mode != CEED_EVAL_WEIGHT) {  // Skip CEED_EVAL_WEIGHT
-      code << tab << "const CeedScalarCPU *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
+      code << tab << "const CeedScalarBase *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
     }
   }
   for (CeedInt i = 0; i < num_output_fields; i++) {
-    code << tab << "CeedScalarCPU *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
+    code << tab << "CeedScalarBase *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
   }
 
   code << tab << "const CeedInt max_dim = " << max_dim << ";\n";
@@ -1698,8 +1698,8 @@ static int CeedOperatorBuildKernelAssemblyAtPoints_Cuda_gen(CeedOperator op, boo
   code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n";
   code << tab << "// -----------------------------------------------------------------------------\n";
   code << tab << "extern \"C\" __global__ void " << operator_name
-       << "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarCPU *W, Points_Cuda "
-          "points, CeedScalarCPU *__restrict__ values_array) {\n";
+       << "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarBase *W, Points_Cuda "
+          "points, CeedScalarBase *__restrict__ values_array) {\n";
   tab.push();
 
   // Scratch buffers
@@ -1708,11 +1708,11 @@ static int CeedOperatorBuildKernelAssemblyAtPoints_Cuda_gen(CeedOperator op, boo
 
     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
     if (eval_mode != CEED_EVAL_WEIGHT) {  // Skip CEED_EVAL_WEIGHT
-      code << tab << "const CeedScalarCPU *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
+      code << tab << "const CeedScalarBase *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
     }
   }
   for (CeedInt i = 0; i < num_output_fields; i++) {
-    code << tab << "CeedScalarCPU *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
+    code << tab << "CeedScalarBase *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
   }
 
   code << tab << "const CeedInt max_dim = " << max_dim << ";\n";
@@ -2240,8 +2240,8 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Cuda_gen(CeedOpera
   code << tab << "// s_G_[in,out]_i: Gradient matrix, shared memory\n";
   code << tab << "// -----------------------------------------------------------------------------\n";
   code << tab << "extern \"C\" __global__ void " << operator_name
-       << "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarCPU *W, Points_Cuda "
-          "points, CeedScalarCPU *__restrict__ values_array) {\n";
+       << "(CeedInt num_elem, void* ctx, FieldsInt_Cuda indices, Fields_Cuda fields, Fields_Cuda B, Fields_Cuda G, CeedScalarBase *W, Points_Cuda "
+          "points, CeedScalarBase *__restrict__ values_array) {\n";
   tab.push();
 
   // Scratch buffers
@@ -2250,11 +2250,11 @@ extern "C" int CeedOperatorBuildKernelLinearAssembleQFunction_Cuda_gen(CeedOpera
 
     CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode));
     if (eval_mode != CEED_EVAL_WEIGHT) {  // Skip CEED_EVAL_WEIGHT
-      code << tab << "const CeedScalarCPU *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
+      code << tab << "const CeedScalarBase *__restrict__ d_in_" << i << " = fields.inputs[" << i << "];\n";
     }
   }
   for (CeedInt i = 0; i < num_output_fields; i++) {
-    code << tab << "CeedScalarCPU *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
+    code << tab << "CeedScalarBase *__restrict__ d_out_" << i << " = fields.outputs[" << i << "];\n";
   }
 
   code << tab << "const CeedInt max_dim = " << max_dim << ";\n";
diff --git a/include/ceed/ceed-f32.h b/include/ceed/ceed-f32.h
@@ -21,14 +21,14 @@
 #if defined(CEED_RUNNING_JIT_PASS) && defined(CEED_JIT_PRECISION) && (CEED_JIT_PRECISION != CEED_SCALAR_TYPE)
 #ifdef CEED_JIT_PRECISION == CEED_SCALAR_FP64
 typedef double CeedScalar;
-typedef float  CeedScalarCPU;
+typedef float  CeedScalarBase;
 
 /// Machine epsilon
 static const CeedScalar CEED_EPSILON = DBL_EPSILON;
 #endif  // CEED_JIT_PRECISION
 #else
 typedef float      CeedScalar;
-typedef CeedScalar CeedScalarCPU;
+typedef CeedScalar CeedScalarBase;
 
 /// Machine epsilon
 static const CeedScalar CEED_EPSILON = FLT_EPSILON;
diff --git a/include/ceed/ceed-f64.h b/include/ceed/ceed-f64.h
@@ -21,14 +21,14 @@
 #if defined(CEED_RUNNING_JIT_PASS) && defined(CEED_JIT_PRECISION) && (CEED_JIT_PRECISION != CEED_SCALAR_TYPE)
 #if CEED_JIT_PRECISION == CEED_SCALAR_FP32
 typedef float  CeedScalar;
-typedef double CeedScalarCPU;
+typedef double CeedScalarBase;
 
 /// Machine epsilon
 static const CeedScalar CEED_EPSILON = FLT_EPSILON;
 #endif  // CEED_JIT_PRECISION
 #else
 typedef double     CeedScalar;
-typedef CeedScalar CeedScalarCPU;
+typedef CeedScalar CeedScalarBase;
 
 /// Machine epsilon
 static const CeedScalar CEED_EPSILON = DBL_EPSILON;
diff --git a/include/ceed/jit-source/cuda/cuda-shared-basis-nontensor.h b/include/ceed/jit-source/cuda/cuda-shared-basis-nontensor.h
@@ -15,8 +15,8 @@
 //------------------------------------------------------------------------------
 // Interp kernels
 //------------------------------------------------------------------------------
-extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalarCPU *c_B, const CeedScalarCPU *__restrict__ d_U,
-                                  CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalarBase *c_B, const CeedScalarBase *__restrict__ d_U,
+                                  CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -42,8 +42,8 @@ extern "C" __global__ void Interp(const CeedInt num_elem, const CeedScalarCPU *c
   }
 }
 
-extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalarCPU *c_B, const CeedScalarCPU *__restrict__ d_U,
-                                           CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedScalarBase *c_B, const CeedScalarBase *__restrict__ d_U,
+                                           CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -69,8 +69,8 @@ extern "C" __global__ void InterpTranspose(const CeedInt num_elem, const CeedSca
   }
 }
 
-extern "C" __global__ void InterpTransposeAdd(const CeedInt num_elem, const CeedScalarCPU *c_B, const CeedScalarCPU *__restrict__ d_U,
-                                              CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void InterpTransposeAdd(const CeedInt num_elem, const CeedScalarBase *c_B, const CeedScalarBase *__restrict__ d_U,
+                                              CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -99,8 +99,8 @@ extern "C" __global__ void InterpTransposeAdd(const CeedInt num_elem, const Ceed
 //------------------------------------------------------------------------------
 // Grad kernels
 //------------------------------------------------------------------------------
-extern "C" __global__ void Grad(const CeedInt num_elem, const CeedScalarCPU *c_G, const CeedScalarCPU *__restrict__ d_U,
-                                CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void Grad(const CeedInt num_elem, const CeedScalarBase *c_G, const CeedScalarBase *__restrict__ d_U,
+                                CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -126,8 +126,8 @@ extern "C" __global__ void Grad(const CeedInt num_elem, const CeedScalarCPU *c_G
   }
 }
 
-extern "C" __global__ void GradTranspose(const CeedInt num_elem, const CeedScalarCPU *c_G, const CeedScalarCPU *__restrict__ d_U,
-                                         CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void GradTranspose(const CeedInt num_elem, const CeedScalarBase *c_G, const CeedScalarBase *__restrict__ d_U,
+                                         CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -153,8 +153,8 @@ extern "C" __global__ void GradTranspose(const CeedInt num_elem, const CeedScala
   }
 }
 
-extern "C" __global__ void GradTransposeAdd(const CeedInt num_elem, const CeedScalarCPU *c_G, const CeedScalarCPU *__restrict__ d_U,
-                                            CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void GradTransposeAdd(const CeedInt num_elem, const CeedScalarBase *c_G, const CeedScalarBase *__restrict__ d_U,
+                                            CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -183,7 +183,7 @@ extern "C" __global__ void GradTransposeAdd(const CeedInt num_elem, const CeedSc
 //------------------------------------------------------------------------------
 // Weight kernel
 //------------------------------------------------------------------------------
-extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalarCPU *__restrict__ q_weight, CeedScalarCPU *__restrict__ d_W) {
+extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalarBase *__restrict__ q_weight, CeedScalarBase *__restrict__ d_W) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
diff --git a/include/ceed/jit-source/cuda/cuda-shared-basis-tensor-at-points.h b/include/ceed/jit-source/cuda/cuda-shared-basis-tensor-at-points.h
@@ -20,9 +20,9 @@
 //------------------------------------------------------------------------------
 // Interp
 //------------------------------------------------------------------------------
-extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
-                                          const CeedScalarCPU *__restrict__ d_X, const CeedScalarCPU *__restrict__ d_U,
-                                          CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
+                                          const CeedScalarBase *__restrict__ d_X, const CeedScalarBase *__restrict__ d_U,
+                                          CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -76,9 +76,9 @@ extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScal
   }
 }
 
-extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
-                                                   const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
-                                                   const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
+                                                   const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
+                                                   const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -146,9 +146,9 @@ extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const
   }
 }
 
-extern "C" __global__ void InterpTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
-                                                      const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
-                                                      const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void InterpTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
+                                                      const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
+                                                      const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -208,9 +208,9 @@ extern "C" __global__ void InterpTransposeAddAtPoints(const CeedInt num_elem, co
 //------------------------------------------------------------------------------
 // Grad
 //------------------------------------------------------------------------------
-extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
-                                        const CeedScalarCPU *__restrict__ d_X, const CeedScalarCPU *__restrict__ d_U,
-                                        CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B, const CeedInt *__restrict__ points_per_elem,
+                                        const CeedScalarBase *__restrict__ d_X, const CeedScalarBase *__restrict__ d_U,
+                                        CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -264,9 +264,9 @@ extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalar
   }
 }
 
-extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
-                                                 const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
-                                                 const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
+                                                 const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
+                                                 const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
@@ -335,9 +335,9 @@ extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const C
   }
 }
 
-extern "C" __global__ void GradTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarCPU *__restrict__ c_B,
-                                                    const CeedInt *__restrict__ points_per_elem, const CeedScalarCPU *__restrict__ d_X,
-                                                    const CeedScalarCPU *__restrict__ d_U, CeedScalarCPU *__restrict__ d_V) {
+extern "C" __global__ void GradTransposeAddAtPoints(const CeedInt num_elem, const CeedScalarBase *__restrict__ c_B,
+                                                    const CeedInt *__restrict__ points_per_elem, const CeedScalarBase *__restrict__ d_X,
+                                                    const CeedScalarBase *__restrict__ d_U, CeedScalarBase *__restrict__ d_V) {
   extern __shared__ CeedScalar slice[];
 
   SharedData_Cuda data;
diff --git a/include/ceed/jit-source/cuda/cuda-shared-basis-tensor.h b/include/ceed/jit-source/cuda/cuda-shared-basis-tensor.h
diff --git a/include/ceed/jit-source/cuda/cuda-types.h b/include/ceed/jit-source/cuda/cuda-types.h