Skip to content

Commit b5871ee

Browse files
authored
[libspirv] Implement SPIR-V vload builtins via CLC (#19174)
No change to any target's generated bytecode.
1 parent 5dc9b12 commit b5871ee

File tree

2 files changed

+111
-179
lines changed

2 files changed

+111
-179
lines changed

libclc/libspirv/lib/generic/shared/vload.cl

Lines changed: 5 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -6,185 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <clc/shared/clc_vload.h>
910
#include <libspirv/spirv.h>
1011

11-
#define VLOAD_VECTORIZE(RTYPE, PRIM_TYPE, ADDR_SPACE) \
12-
typedef PRIM_TYPE less_aligned_##ADDR_SPACE##PRIM_TYPE \
13-
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
14-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE __spirv_ocl_vload_R##RTYPE( \
15-
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
16-
return *(( \
17-
const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE *)(&x[offset])); \
18-
} \
19-
\
20-
typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
21-
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
22-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 __spirv_ocl_vloadn_R##RTYPE##2( \
23-
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
24-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
25-
*)(&x[2 * offset])); \
26-
} \
27-
\
28-
typedef PRIM_TYPE##3 less_aligned_##ADDR_SPACE##PRIM_TYPE##3 \
29-
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
30-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 __spirv_ocl_vloadn_R##RTYPE##3( \
31-
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
32-
PRIM_TYPE##2 vec = \
33-
*((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2 \
34-
*)(&x[3 * offset])); \
35-
return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset * 3 + 2]); \
36-
} \
37-
\
38-
typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 \
39-
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
40-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 __spirv_ocl_vloadn_R##RTYPE##4( \
41-
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
42-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4 \
43-
*)(&x[4 * offset])); \
44-
} \
45-
\
46-
typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 \
47-
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
48-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 __spirv_ocl_vloadn_R##RTYPE##8( \
49-
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
50-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8 \
51-
*)(&x[8 * offset])); \
52-
} \
53-
\
54-
typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 \
55-
__attribute__((aligned(sizeof(PRIM_TYPE)))); \
56-
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 __spirv_ocl_vloadn_R##RTYPE##16( \
57-
size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
58-
return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16 \
59-
*)(&x[16 * offset])); \
60-
}
12+
#define __CLC_BODY "vload.inc"
13+
#include <clc/integer/gentype.inc>
6114

62-
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
63-
#define VLOAD_VECTORIZE_GENERIC VLOAD_VECTORIZE
64-
#else
65-
// The generic address space isn't available, so make the macro do nothing
66-
#define VLOAD_VECTORIZE_GENERIC(X,Y,Z)
67-
#endif
68-
69-
#define VLOAD_ADDR_SPACES_IMPL(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE) \
70-
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __private) \
71-
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __local) \
72-
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __constant) \
73-
VLOAD_VECTORIZE(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __global) \
74-
VLOAD_VECTORIZE_GENERIC(__CLC_RET_GENTYPE, __CLC_SCALAR_GENTYPE, __generic)
75-
76-
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
77-
VLOAD_ADDR_SPACES_IMPL(__CLC_SCALAR_GENTYPE, __CLC_SCALAR_GENTYPE)
78-
79-
VLOAD_ADDR_SPACES_IMPL(char, char)
80-
81-
#define VLOAD_TYPES() \
82-
VLOAD_ADDR_SPACES(uchar) \
83-
VLOAD_ADDR_SPACES(short) \
84-
VLOAD_ADDR_SPACES(ushort) \
85-
VLOAD_ADDR_SPACES(int) \
86-
VLOAD_ADDR_SPACES(uint) \
87-
VLOAD_ADDR_SPACES(long) \
88-
VLOAD_ADDR_SPACES(ulong) \
89-
VLOAD_ADDR_SPACES(float)
90-
91-
VLOAD_TYPES()
92-
93-
#ifdef cl_khr_fp64
94-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
95-
VLOAD_ADDR_SPACES(double)
96-
#endif
97-
#ifdef cl_khr_fp16
98-
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
99-
VLOAD_ADDR_SPACES(half)
100-
#endif
101-
102-
/* vload_half are legal even without cl_khr_fp16 */
103-
/* no vload_half for double */
104-
#if __clang_major__ < 6
105-
float __clc_vload_half_float_helper__constant(const __constant half *);
106-
float __clc_vload_half_float_helper__global(const __global half *);
107-
float __clc_vload_half_float_helper__local(const __local half *);
108-
float __clc_vload_half_float_helper__private(const __private half *);
109-
110-
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
111-
float __clc_vload_half_float_helper__generic(const __generic half *);
112-
#endif
113-
114-
#define VEC_LOAD1(val, AS) \
115-
val = __clc_vload_half_float_helper##AS(&mem[offset++]);
116-
#else
117-
#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
118-
#endif
119-
120-
#define VEC_LOAD2(val, AS) \
121-
VEC_LOAD1(val.lo, AS) \
122-
VEC_LOAD1(val.hi, AS)
123-
#define VEC_LOAD3(val, AS) \
124-
VEC_LOAD1(val.s0, AS) \
125-
VEC_LOAD1(val.s1, AS) \
126-
VEC_LOAD1(val.s2, AS)
127-
#define VEC_LOAD4(val, AS) \
128-
VEC_LOAD2(val.lo, AS) \
129-
VEC_LOAD2(val.hi, AS)
130-
#define VEC_LOAD8(val, AS) \
131-
VEC_LOAD4(val.lo, AS) \
132-
VEC_LOAD4(val.hi, AS)
133-
#define VEC_LOAD16(val, AS) \
134-
VEC_LOAD8(val.lo, AS) \
135-
VEC_LOAD8(val.hi, AS)
136-
137-
#define VLOAD_HALF_VEC_IMPL(VEC_SIZE, OFFSET_SIZE, AS) \
138-
_CLC_OVERLOAD _CLC_DEF float##VEC_SIZE \
139-
__spirv_ocl_vload_halfn_Rfloat##VEC_SIZE(size_t offset, \
140-
const AS half *mem) { \
141-
offset *= VEC_SIZE; \
142-
float##VEC_SIZE __tmp; \
143-
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
144-
} \
145-
_CLC_OVERLOAD _CLC_DEF float##VEC_SIZE \
146-
__spirv_ocl_vloada_halfn_Rfloat##VEC_SIZE(size_t offset, \
147-
const AS half *mem) { \
148-
offset *= OFFSET_SIZE; \
149-
float##VEC_SIZE __tmp; \
150-
VEC_LOAD##VEC_SIZE(__tmp, AS) return __tmp; \
151-
}
152-
153-
#define VLOAD_HALF_IMPL(AS) \
154-
_CLC_OVERLOAD _CLC_DEF float __spirv_ocl_vload_half(size_t offset, \
155-
const AS half *mem) { \
156-
float __tmp; \
157-
VEC_LOAD1(__tmp, AS) return __tmp; \
158-
}
159-
160-
#define GEN_VLOAD_HALF(AS) \
161-
VLOAD_HALF_IMPL(AS) \
162-
VLOAD_HALF_VEC_IMPL(2, 2, AS) \
163-
VLOAD_HALF_VEC_IMPL(3, 4, AS) \
164-
VLOAD_HALF_VEC_IMPL(4, 4, AS) \
165-
VLOAD_HALF_VEC_IMPL(8, 8, AS) \
166-
VLOAD_HALF_VEC_IMPL(16, 16, AS)
167-
168-
GEN_VLOAD_HALF(__private)
169-
GEN_VLOAD_HALF(__global)
170-
GEN_VLOAD_HALF(__local)
171-
GEN_VLOAD_HALF(__constant)
172-
173-
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
174-
GEN_VLOAD_HALF(__generic)
175-
#endif
176-
177-
#undef VLOAD_HALF_IMPL
178-
#undef VLOAD_HALF_VEC_IMPL
179-
#undef GEN_VLOAD_HALF
180-
#undef VEC_LOAD16
181-
#undef VEC_LOAD8
182-
#undef VEC_LOAD4
183-
#undef VEC_LOAD3
184-
#undef VEC_LOAD2
185-
#undef VEC_LOAD1
186-
#undef VLOAD_TYPES
187-
#undef VLOAD_ADDR_SPACES
188-
#undef VLOAD_VECTORIZE
189-
#undef VLOAD_VECTORIZE_GENERIC
190-
#undef VLOAD_VECTORIZE
15+
#define __CLC_BODY "vload.inc"
16+
#include <clc/math/gentype.inc>
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifdef __CLC_SCALAR
10+
#define N_IF_VEC
11+
#else
12+
#define N_IF_VEC n
13+
#endif
14+
15+
#define CLC_VLOAD_NAME() __CLC_XCONCAT(__clc_vload, __CLC_VECSIZE)
16+
#define SPIRV_VLOAD_NAME() \
17+
__CLC_XCONCAT(__CLC_XCONCAT(__CLC_XCONCAT(__spirv_ocl_vload, N_IF_VEC), _R), \
18+
__CLC_GENTYPE)
19+
20+
#define CLC_VLOAD_HALF_NAME(a) \
21+
__CLC_XCONCAT(__CLC_XCONCAT(__CLC_XCONCAT(__clc_vload, a), _half), \
22+
__CLC_VECSIZE)
23+
24+
#define CLC_VLOAD_TY __CLC_XCONCAT(less_aligned_, __CLC_GENTYPE)
25+
26+
#ifdef __CLC_SCALAR
27+
28+
#define VLOAD_DEF(ADDRSPACE) \
29+
_CLC_OVERLOAD _CLC_DEF CLC_VLOAD_TY SPIRV_VLOAD_NAME()( \
30+
size_t offset, const ADDRSPACE __CLC_SCALAR_GENTYPE *x) { \
31+
return *((const ADDRSPACE CLC_VLOAD_TY *)(&x[offset])); \
32+
}
33+
34+
#define SPIRV_VLOAD_HALF_NAME(a) \
35+
__CLC_XCONCAT(__CLC_XCONCAT(__spirv_ocl_vload, a), _half)
36+
37+
#else
38+
39+
#define VLOAD_DEF(ADDRSPACE) \
40+
_CLC_OVERLOAD _CLC_DEF CLC_VLOAD_TY SPIRV_VLOAD_NAME()( \
41+
size_t offset, const ADDRSPACE __CLC_SCALAR_GENTYPE *x) { \
42+
return CLC_VLOAD_NAME()(offset, x); \
43+
}
44+
45+
#define SPIRV_VLOAD_HALF_NAME(a) \
46+
__CLC_XCONCAT( \
47+
__CLC_XCONCAT( \
48+
__CLC_XCONCAT(__CLC_XCONCAT(__spirv_ocl_vload, a), _halfn), _R), \
49+
__CLC_GENTYPE)
50+
51+
#endif
52+
53+
VLOAD_DEF(__private)
54+
VLOAD_DEF(__local)
55+
VLOAD_DEF(__constant)
56+
VLOAD_DEF(__global)
57+
58+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
59+
VLOAD_DEF(__generic)
60+
#endif
61+
62+
#undef VLOAD_DEF
63+
#undef CLC_VLOAD_TY
64+
65+
// vload_half and vloada_half are available even if cl_khr_fp16 is unavailable.
66+
// Declare these functions when working on float types, which we know are
67+
// always available.
68+
#ifdef __CLC_FPSIZE
69+
#if __CLC_FPSIZE == 32
70+
71+
#define VLOAD_HALF_DEF(ADDRSPACE, A) \
72+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE SPIRV_VLOAD_HALF_NAME(A)( \
73+
size_t offset, const ADDRSPACE half *mem) { \
74+
return CLC_VLOAD_HALF_NAME(A)(offset, mem); \
75+
}
76+
77+
VLOAD_HALF_DEF(__private, )
78+
VLOAD_HALF_DEF(__local, )
79+
VLOAD_HALF_DEF(__constant, )
80+
VLOAD_HALF_DEF(__global, )
81+
82+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
83+
VLOAD_HALF_DEF(__generic, )
84+
#endif
85+
86+
#ifndef __CLC_SCALAR
87+
VLOAD_HALF_DEF(__private, a)
88+
VLOAD_HALF_DEF(__local, a)
89+
VLOAD_HALF_DEF(__constant, a)
90+
VLOAD_HALF_DEF(__global, a)
91+
92+
#if _CLC_DISTINCT_GENERIC_AS_SUPPORTED
93+
VLOAD_HALF_DEF(__generic, a)
94+
#endif
95+
#endif
96+
97+
#undef VLOAD_HALF_DEF
98+
#endif
99+
#endif
100+
101+
#undef CLC_VLOAD_NAME
102+
#undef CLC_VLOAD_HALF_NAME
103+
#undef SPIRV_VLOAD_NAME
104+
#undef SPIRV_VLOAD_HALF_NAME
105+
106+
#undef N_IF_VEC

0 commit comments

Comments
 (0)