Skip to content

[PowerPC][NFC] Define new alias for mma accumulate builtins #147382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 9, 2025
Merged
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 54 additions & 105 deletions clang/include/clang/Basic/BuiltinsPPC.def
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,48 @@
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif

// Built-ins requiring custom code generation.
// Because these built-ins rely on target-dependent types and to avoid pervasive
// change, they are type checked manually in Sema using custom type descriptors.
// The first argument of the CUSTOM_BUILTIN macro is the name of the built-in
// with its prefix, the second argument is the name of the intrinsic this
// built-in generates, the third argument specifies the type of the function
// (result value, then each argument) as follows:
// i -> Unsigned integer followed by the greatest possible value for that
// argument or 0 if no constraint on the value.
// (e.g. i15 for a 4-bits value)
// V -> Vector type used with MMA built-ins (vector unsigned char)
// W -> PPC Vector type followed by the size of the vector type.
// (e.g. W512 for __vector_quad)
// any other descriptor -> Fall back to generic type descriptor decoding.
// The 'C' suffix can be used as a suffix to specify the const type.
// The '*' suffix can be used as a suffix to specify a pointer to a type.
// The fourth argument is set to true if the built-in accumulates its result
// into its given accumulator.

#ifndef CUSTOM_BUILTIN
#define CUSTOM_BUILTIN(ID, INTR, TYPES, ACCUMULATE, FEATURE) \
TARGET_BUILTIN(__builtin_##ID, "i.", "t", FEATURE)
#endif

// UNALIASED_CUSTOM_BUILTIN macro is used for built-ins that have
// the same name as that of the intrinsic they generate, i.e. the
// ID and INTR are the same.
// This avoids repeating the ID and INTR in the macro expression.
#define UNALIASED_CUSTOM_BUILTIN(ID, TYPES, ACCUMULATE, FEATURE) \
CUSTOM_BUILTIN(ID, ID, TYPES, ACCUMULATE, FEATURE)

// UNALIASED_CUSTOM_MMA_BUILTIN macro is used for MMA built-ins and its
// corresponding 4 positive/negative multiply and positive/negative accumulate
// built-in with the same ID concated with posfix [nn|np|pn|pp].
// This avoids repeating the TYPES and FEATURE in the macro expression.
#define UNALIASED_CUSTOM_MMA_BUILTIN(ID, TYPES, FEATURE) \
UNALIASED_CUSTOM_BUILTIN(ID, TYPES, false, FEATURE) \
UNALIASED_CUSTOM_BUILTIN(ID##nn, TYPES, true, FEATURE) \
UNALIASED_CUSTOM_BUILTIN(ID##np, TYPES, true, FEATURE) \
UNALIASED_CUSTOM_BUILTIN(ID##pn, TYPES, true, FEATURE) \
UNALIASED_CUSTOM_BUILTIN(ID##pp, TYPES, true, FEATURE)

// GCC predefined macros to rename builtins, undef them to keep original names.
#if defined(__GNUC__) && !defined(__clang__)
#undef __builtin_vsx_xvnmaddadp
Expand Down Expand Up @@ -967,25 +1001,6 @@ BUILTIN(__builtin_setflm, "dd", "")
// Cache built-ins
BUILTIN(__builtin_dcbf, "vvC*", "")

// Built-ins requiring custom code generation.
// Because these built-ins rely on target-dependent types and to avoid pervasive
// change, they are type checked manually in Sema using custom type descriptors.
// The first argument of the CUSTOM_BUILTIN macro is the name of the built-in
// with its prefix, the second argument is the name of the intrinsic this
// built-in generates, the third argument specifies the type of the function
// (result value, then each argument) as follows:
// i -> Unsigned integer followed by the greatest possible value for that
// argument or 0 if no constraint on the value.
// (e.g. i15 for a 4-bits value)
// V -> Vector type used with MMA built-ins (vector unsigned char)
// W -> PPC Vector type followed by the size of the vector type.
// (e.g. W512 for __vector_quad)
// any other descriptor -> Fall back to generic type descriptor decoding.
// The 'C' suffix can be used as a suffix to specify the const type.
// The '*' suffix can be used as a suffix to specify a pointer to a type.
// The fourth argument is set to true if the built-in accumulates its result into
// its given accumulator.

// Provided builtins with _mma_ prefix for compatibility.
CUSTOM_BUILTIN(mma_lxvp, vsx_lxvp, "W256SLiW256C*", false,
"paired-vector-memops")
Expand All @@ -999,11 +1014,6 @@ CUSTOM_BUILTIN(vsx_build_pair, vsx_assemble_pair, "vW256*VV", false,
"paired-vector-memops")
CUSTOM_BUILTIN(mma_build_acc, mma_assemble_acc, "vW512*VVVV", false, "mma")

// UNALIASED_CUSTOM_BUILTIN macro is used for built-ins that have
// the same name as that of the intrinsic they generate, i.e. the
// ID and INTR are the same.
// This avoids repeating the ID and INTR in the macro expression.

UNALIASED_CUSTOM_BUILTIN(vsx_lxvp, "W256SLiW256C*", false,
"paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(vsx_stxvp, "vW256SLiW256*", false,
Expand Down Expand Up @@ -1032,12 +1042,6 @@ UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2, "vW512*VV", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvi16ger2s, "vW512*VV", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2, "vW512*VV", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf32ger, "vW512*VV", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf64ger, "vW512*W256V", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvi4ger8, "vW512*VVi15i15i255", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvi8ger4, "vW512*VVi15i15i15", false,
Expand All @@ -1046,12 +1050,6 @@ UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2, "vW512*VVi15i15i3", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2s, "vW512*VVi15i15i3", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvi4ger8pp, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvi8ger4pp, "vW512*VV", true,
Expand All @@ -1072,74 +1070,6 @@ UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2pp, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvi16ger2spp, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2pp, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2pn, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2np, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf16ger2nn, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2pp, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2pn, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2np, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf16ger2nn, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf32gerpp, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf32gerpn, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf32gernp, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf32gernn, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gerpp, "vW512*VVi15i15", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gerpn, "vW512*VVi15i15", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gernp, "vW512*VVi15i15", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf32gernn, "vW512*VVi15i15", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf64gerpp, "vW512*W256V", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf64gerpn, "vW512*W256V", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf64gernp, "vW512*W256V", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvf64gernn, "vW512*W256V", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gerpp, "vW512*W256Vi15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gerpn, "vW512*W256Vi15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gernp, "vW512*W256Vi15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvf64gernn, "vW512*W256Vi15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2, "vW512*VV", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2pp, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2pn, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2np, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_xvbf16ger2nn, "vW512*VV", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2pp, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2pn, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2np, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4, "vW1024*W256V", false,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4, "vW1024*W256Vi255i15i15", false,
Expand All @@ -1148,14 +1078,33 @@ UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4pp, "vW1024*W256V", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4pp, "vW1024*W256Vi255i15i15", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4spp, "vW1024*W256V", true,
UNALIASED_CUSTOM_BUILTIN(mma_dmxvi8gerx4spp, "vW1024*W256V", true,
"mma,paired-vector-memops")
UNALIASED_CUSTOM_BUILTIN(mma_pmdmxvi8gerx4spp, "vW1024*W256Vi255i15i15", true,
"mma,paired-vector-memops")

// MMA builtins with positive/negative multiply/accumulate.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add a list of builtin's name as a comment here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel that would be a bit of an overkill. I have added an example at the top to further document the new macro usage.

UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf16ger2, "vW512*VV",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf32ger, "vW512*VV",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvf64ger, "vW512*W256V",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf16ger2, "vW512*VVi15i15i3",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf32ger, "vW512*VVi15i15",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvf64ger, "vW512*W256Vi15i3",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_xvbf16ger2, "vW512*VV",
"mma,paired-vector-memops")
UNALIASED_CUSTOM_MMA_BUILTIN(mma_pmxvbf16ger2, "vW512*VVi15i15i3",
"mma,paired-vector-memops")

// FIXME: Obviously incomplete.

#undef BUILTIN
#undef TARGET_BUILTIN
#undef CUSTOM_BUILTIN
#undef UNALIASED_CUSTOM_BUILTIN
#undef UNALIASED_CUSTOM_MMA_BUILTIN