Skip to content

Commit 2e7231e

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents ce3e869 + 1a8bb53 commit 2e7231e

18 files changed

+470
-97
lines changed

.github/workflows/sycl_linux_build_and_test.yml

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -36,26 +36,14 @@ on:
3636
build_artifact_suffix:
3737
type: string
3838
required: true
39-
intel_drivers_image:
40-
type: string
41-
required: false
42-
default: "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest"
43-
lts_config:
39+
lts_matrix:
4440
type: string
4541
required: false
4642
default: ""
4743
lts_cmake_extra_args:
4844
type: string
4945
required: false
5046
default: ""
51-
amdgpu_image:
52-
type: string
53-
required: false
54-
default: "ghcr.io/intel/llvm/ubuntu2004_build:latest"
55-
cuda_image:
56-
type: string
57-
required: false
58-
default: "ghcr.io/intel/llvm/ubuntu2004_build:latest"
5947
lts_ref:
6048
type: string
6149
required: false
@@ -167,38 +155,14 @@ jobs:
167155
name: sycl_lit_${{ inputs.build_artifact_suffix }}
168156
path: lit.tar.xz
169157

170-
# This job generates matrix of tests for LLVM Test Suite
171-
resolve_matrix:
172-
name: Resolve Test Matrix
173-
runs-on: ubuntu-latest
174-
outputs:
175-
lts: ${{ steps.work.outputs.lts }}
176-
steps:
177-
- name: Download scripts and configs
178-
run: |
179-
wget raw.githubusercontent.com/intel/llvm/${{ github.sha }}/devops/scripts/generate_test_matrix.js
180-
wget raw.githubusercontent.com/intel/llvm/${{ github.sha }}/devops/test_configs.json
181-
wget raw.githubusercontent.com/intel/llvm/sycl/devops/dependencies.json
182-
mv dependencies.json dependencies.sycl.json
183-
wget raw.githubusercontent.com/intel/llvm/${{ github.sha }}/devops/dependencies.json
184-
- id: work
185-
uses: actions/github-script@v6
186-
name: Generate matrix
187-
env:
188-
GHA_INPUTS: ${{ toJSON(inputs) }}
189-
with:
190-
script: |
191-
const script = require('./generate_test_matrix.js');
192-
script({core, process});
193-
194158
llvm_test_suite:
195-
needs: [build, resolve_matrix]
196-
if: ${{ inputs.lts_config != '' }}
159+
needs: build
160+
if: ${{ inputs.lts_matrix != '' }}
197161
strategy:
198162
fail-fast: false
199163
max-parallel: ${{ inputs.max_parallel }}
200164
matrix:
201-
include: ${{ fromJSON(needs.resolve_matrix.outputs.lts) }}
165+
include: ${{ fromJSON(inputs.lts_matrix) }}
202166
name: ${{ matrix.name }}
203167
runs-on: ${{ matrix.runs-on }}
204168
env: ${{ matrix.env }}
@@ -239,4 +203,3 @@ jobs:
239203
check_sycl_all: ${{ matrix.check_sycl_all }}
240204
results_name_suffix: ${{ matrix.config }}_${{ inputs.build_artifact_suffix }}
241205
cmake_args: '${{ matrix.cmake_args }} ${{ inputs.lts_cmake_extra_args }}'
242-

.github/workflows/sycl_post_commit.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,20 @@ on:
1111
- .github/workflows/sycl_post_commit.yml
1212

1313
jobs:
14+
# This job generates matrix of tests for LLVM Test Suite
15+
resolve_matrix:
16+
name: Resolve Test Matrix
17+
uses: ./.github/workflows/sycl_resolve_test_matrix.yml
18+
with:
19+
lts_config: "l0_gen9"
1420
linux_default:
1521
name: Linux Default
22+
needs: resolve_matrix
1623
uses: ./.github/workflows/sycl_linux_build_and_test.yml
1724
with:
1825
build_cache_root: "/__w/llvm"
19-
build_artifact_suffix: default
26+
build_artifact_suffix: "post_commit"
27+
lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }}
2028
linux_no_assert:
2129
name: Linux (no assert)
2230
uses: ./.github/workflows/sycl_linux_build_and_test.yml

.github/workflows/sycl_precommit.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,23 @@ jobs:
2929
- name: Run clang-format
3030
uses: ./devops/actions/clang-format
3131

32+
# This job generates matrix of tests for LLVM Test Suite
33+
resolve_matrix:
34+
name: Resolve Test Matrix
35+
uses: ./.github/workflows/sycl_resolve_test_matrix.yml
36+
with:
37+
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu;cuda"
38+
3239
linux_default:
3340
name: Linux
3441
# Only build and test patches, that have passed all linter checks, because
3542
# the next commit is likely to be a follow-up on that job.
36-
needs: lint
43+
needs: [lint, resolve_matrix]
3744
if: always() && (success() || contains(github.event.pull_request.labels.*.name, 'ignore-lint'))
3845
uses: ./.github/workflows/sycl_linux_build_and_test.yml
3946
with:
4047
build_cache_root: "/__w/"
4148
build_cache_size: "8G"
4249
build_artifact_suffix: "default"
4350
build_cache_suffix: "default"
44-
lts_config: "hip_amdgpu;ocl_x64;ocl_gen9;l0_gen9;esimd_emu;cuda"
51+
lts_matrix: ${{ needs.resolve_matrix.outputs.lts_matrix }}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
name: Reusable test matrix generation
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
intel_drivers_image:
7+
type: string
8+
required: false
9+
default: "ghcr.io/intel/llvm/ubuntu2004_intel_drivers:latest"
10+
amdgpu_image:
11+
type: string
12+
required: false
13+
default: "ghcr.io/intel/llvm/ubuntu2004_build:latest"
14+
cuda_image:
15+
type: string
16+
required: false
17+
default: "ghcr.io/intel/llvm/ubuntu2004_build:latest"
18+
lts_config:
19+
type: string
20+
required: true
21+
default: ""
22+
outputs:
23+
lts_matrix:
24+
description: "Generated Matrix"
25+
value: ${{ jobs.resolve_matrix.outputs.lts_matrix }}
26+
jobs:
27+
resolve_matrix:
28+
name: Resolve Test Matrix
29+
runs-on: ubuntu-latest
30+
outputs:
31+
lts_matrix: ${{ steps.work.outputs.lts_matrix }}
32+
steps:
33+
- name: Download scripts and configs
34+
shell: bash
35+
run: |
36+
wget raw.githubusercontent.com/intel/llvm/${{ github.sha }}/devops/scripts/generate_test_matrix.js
37+
wget raw.githubusercontent.com/intel/llvm/${{ github.sha }}/devops/test_configs.json
38+
wget raw.githubusercontent.com/intel/llvm/sycl/devops/dependencies.json
39+
mv dependencies.json dependencies.sycl.json
40+
wget raw.githubusercontent.com/intel/llvm/${{ github.sha }}/devops/dependencies.json
41+
- id: work
42+
uses: actions/github-script@v6
43+
name: Generate matrix
44+
env:
45+
GHA_INPUTS: ${{ toJSON(inputs) }}
46+
with:
47+
script: |
48+
const script = require('./generate_test_matrix.js');
49+
script({core, process});

.github/workflows/sycl_windows_build_and_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ on:
1010

1111
jobs:
1212
build:
13-
name: Build
13+
name: Build + LIT
1414
runs-on: [Windows, build]
1515
# TODO use cached checkout
1616
steps:

devops/scripts/generate_test_matrix.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ module.exports = ({core, process}) => {
5959
"ghcr.io/intel/llvm/ubuntu2004_base:latest");
6060
}
6161

62-
core.setOutput('lts', ltsString);
62+
core.setOutput('lts_matrix', ltsString);
6363
}
6464
});
6565
}

libclc/amdgcn-amdhsa/libspirv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,4 @@ workitem/get_sub_group_local_id.cl
6565
workitem/get_sub_group_size.cl
6666
misc/sub_group_shuffle.cl
6767
async/wait_group_events.cl
68+
assert/__assert_fail.ll
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
;;===----------------------------------------------------------------------===//
2+
;;
3+
;; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
;; See https://llvm.org/LICENSE.txt for license information.
5+
;; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;;
7+
;;===----------------------------------------------------------------------===//
8+
9+
#if __clang_major__ >= 7
10+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
11+
#else
12+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
13+
#endif
14+
15+
@.assert_fmt = private unnamed_addr constant [79 x i8] c"%s:%u: %s: global id: [%u,%u,%u], local id: [%u,%u,%u] Assertion `%s` failed.\0A\00", align 1
16+
17+
declare void @llvm.trap() cold noreturn nounwind
18+
19+
declare i64 @__ockl_fprintf_stderr_begin() local_unnamed_addr
20+
declare i64 @__ockl_fprintf_append_string_n(i64, i8* readonly, i64, i32) local_unnamed_addr
21+
declare i64 @__ockl_fprintf_append_args(i64, i32, i64, i64, i64, i64, i64, i64, i64, i32) local_unnamed_addr
22+
23+
declare dso_local i64 @_Z28__spirv_GlobalInvocationId_xv() local_unnamed_addr
24+
declare dso_local i64 @_Z28__spirv_GlobalInvocationId_yv() local_unnamed_addr
25+
declare dso_local i64 @_Z28__spirv_GlobalInvocationId_zv() local_unnamed_addr
26+
27+
declare dso_local i64 @_Z27__spirv_LocalInvocationId_xv() local_unnamed_addr
28+
declare dso_local i64 @_Z27__spirv_LocalInvocationId_yv() local_unnamed_addr
29+
declare dso_local i64 @_Z27__spirv_LocalInvocationId_zv() local_unnamed_addr
30+
31+
define dso_local hidden noundef i64 @__strlen_assert(i8* noundef %str) local_unnamed_addr {
32+
entry:
33+
br label %while.cond
34+
35+
while.cond:
36+
%tmp.0 = phi i8* [ %str, %entry ], [ %incdec.ptr, %while.cond ]
37+
%incdec.ptr = getelementptr inbounds i8, i8* %tmp.0, i64 1
38+
%0 = load i8, i8* %tmp.0, align 1
39+
%tobool.not = icmp eq i8 %0, 0
40+
br i1 %tobool.not, label %while.end, label %while.cond
41+
42+
while.end:
43+
%sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64
44+
%sub.ptr.rhs.cast = ptrtoint i8* %str to i64
45+
%sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
46+
ret i64 %sub.ptr.sub
47+
}
48+
49+
define hidden void @__assert_fail(i8* %assertion, i8* %file, i32 %line, i8* %function) nounwind alwaysinline {
50+
entry:
51+
%msg = call i64 @__ockl_fprintf_stderr_begin()
52+
%msg.1 = call i64 @__ockl_fprintf_append_string_n(i64 %msg, i8* readonly getelementptr inbounds ([79 x i8], [79 x i8]* @.assert_fmt, i64 0, i64 0), i64 79, i32 0)
53+
%len.file = call i64 @__strlen_assert(i8* %file)
54+
%msg.2 = call i64 @__ockl_fprintf_append_string_n(i64 %msg.1, i8* readonly %file, i64 %len.file, i32 0)
55+
%line.i64 = sext i32 %line to i64
56+
%msg.3 = call i64 @__ockl_fprintf_append_args(i64 %msg.2, i32 1, i64 %line.i64, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0)
57+
%len.func = call i64 @__strlen_assert(i8* %function)
58+
%msg.4 = call i64 @__ockl_fprintf_append_string_n(i64 %msg.3, i8* readonly %function, i64 %len.func, i32 0)
59+
%gidx = tail call i64 @_Z28__spirv_GlobalInvocationId_xv()
60+
%gidy = tail call i64 @_Z28__spirv_GlobalInvocationId_yv()
61+
%gidz = tail call i64 @_Z28__spirv_GlobalInvocationId_zv()
62+
%lidx = tail call i64 @_Z27__spirv_LocalInvocationId_xv()
63+
%lidy = tail call i64 @_Z27__spirv_LocalInvocationId_yv()
64+
%lidz = tail call i64 @_Z27__spirv_LocalInvocationId_zv()
65+
%msg.5 = call i64 @__ockl_fprintf_append_args(i64 %msg.4, i32 6, i64 %gidx, i64 %gidy, i64 %gidz, i64 %lidx, i64 %lidy, i64 %lidz, i64 0, i32 0)
66+
%len.assertion = call i64 @__strlen_assert(i8* %assertion)
67+
%msg.6 = call i64 @__ockl_fprintf_append_string_n(i64 %msg.4, i8* readonly %assertion, i64 %len.assertion, i32 1)
68+
tail call void @llvm.trap()
69+
unreachable
70+
}

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,14 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
555555
}
556556

557557
/// Predicate for Internalize pass.
558+
/// Functions with the 'sycl-module-id' attribute are SYCL_EXTERNAL functions
559+
/// and must be preserved.
558560
static bool mustPreserveGV(const GlobalValue &GV) {
559561
if (const Function *F = dyn_cast<Function>(&GV))
560562
return F->isDeclaration() || F->getName().startswith("__asan_") ||
561563
F->getName().startswith("__sanitizer_") ||
562-
AMDGPU::isEntryFunctionCC(F->getCallingConv());
564+
AMDGPU::isEntryFunctionCC(F->getCallingConv()) ||
565+
F->hasFnAttribute("sycl-module-id");
563566

564567
GV.removeDeadConstantUsers();
565568
return !GV.use_empty();

sycl/doc/EnvironmentVariables.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ variables in production code.</span>
155155
| `SYCL_PI_LEVEL_ZERO_COPY_BATCH_SIZE` | Integer | Sets a preferred number of copy commands to batch into a command list before executing the command list. A value of 0 causes the batch size to be adjusted dynamically. A value greater than 0 specifies fixed size batching, with the batch size set to the specified value. The default is 0. |
156156
| `SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST` | Integer | When set to 0, disables filtering of signaled events from wait lists when using the Level Zero backend. The default is 1. |
157157
| `SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE` | Any(\*) | This environment variable enables users to control use of copy engines for copy operations. If the value is an integer, it will allow the use of copy engines, if available in the device, in Level Zero plugin to transfer SYCL buffer or image data between the host and/or device(s) and to fill SYCL buffer or image data in device or shared memory. The value of this environment variable can also be a pair of the form "lower_index:upper_index" where the indices point to copy engines in a list of all available copy engines. The default is 1. |
158-
| `SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE` | Integer | It can be set to an integer (>=0) in which case all compute commands will be submitted to the command-queue with the given index in the compute command group. If it is instead set to a negative value then all available compute engines may be used. The default value is "-1" |
158+
| `SYCL_PI_LEVEL_ZERO_USE_COMPUTE_ENGINE` | Integer | It can be set to an integer (>=0) in which case all compute commands will be submitted to the command-queue with the given index in the compute command group. If it is instead set to a negative value then all available compute engines may be used. The default value is "0" |
159159
| `SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY` (experimental) | Integer | Allows the use of copy engine, if available in the device, in Level Zero plugin for device to device copy operations. The default is 0. This option is experimental and will be removed once heuristics are added to make a decision about use of copy engine for device to device copy operations. |
160160
| `SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS` | Any(\*) | Enable support of device-scope events whose state is not visible to the host. If enabled mode is SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 the Level Zero plugin would create all events having device-scope only and create proxy host-visible events for them when their status is needed (wait/query) on the host. If enabled mode is SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 the Level Zero plugin would create all events having device-scope and add proxy host-visible event at the end of each command-list submission. The default is 2, meaning only the last event in a batch is host-visible. |
161161
| `SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS` | Integer | When set to a positive value enables use of Level Zero immediate commandlists, which means there is no batching and all commands are immediately submitted for execution. Default is 0. Note: When immediate commandlist usage is enabled it is necessary to also set SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS to either 0 or 1. |

0 commit comments

Comments
 (0)