Skip to content

Commit a3c2a85

Browse files
authored
Merge branch 'main' into revert-access-by
2 parents 5ba0d57 + 1b8ee0d commit a3c2a85

File tree

85 files changed

+2579
-1470
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+2579
-1470
lines changed

.github/scripts/compute_benchmarks.py

Lines changed: 0 additions & 206 deletions
This file was deleted.

.github/workflows/benchmarks_compute.yml

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ permissions:
4242
jobs:
4343
e2e-build-hw:
4444
# Run only on upstream; forks will not have the HW
45-
if: github.repository == 'oneapi-src/unified-runtime'
45+
# if: github.repository == 'oneapi-src/unified-runtime'
4646
name: Build SYCL, UR, run Compute Benchmarks
4747
strategy:
4848
matrix:
@@ -88,6 +88,9 @@ jobs:
8888
with:
8989
path: ur-repo
9090

91+
- name: Install pip packages
92+
run: pip install -r ${{github.workspace}}/ur-repo/third_party/requirements.txt
93+
9194
# We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged.
9295
- name: Fetch PR's merge commit
9396
if: ${{ inputs.pr_no != 0 }}
@@ -131,44 +134,13 @@ jobs:
131134
- name: Build SYCL
132135
run: cmake --build ${{github.workspace}}/sycl_build -j
133136

134-
- name: Set additional env. vars
135-
run: |
136-
echo "${{github.workspace}}/sycl_build/bin" >> $GITHUB_PATH
137-
echo "LD_LIBRARY_PATH=${{github.workspace}}/sycl_build/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV
138-
139-
# Running (newly built) sycl-ls sets up some extra variables
140-
- name: Setup SYCL variables
141-
run: |
142-
which clang++ sycl-ls
143-
SYCL_PI_TRACE=-1 sycl-ls
144-
145-
- name: Checkout Compute Benchmarks
146-
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
147-
with:
148-
repository: intel/compute-benchmarks
149-
path: compute-benchmarks-repo
150-
submodules: recursive
151-
152-
- name: Configure Compute Benchmarks
153-
run: >
154-
cmake
155-
-B ${{github.workspace}}/compute-benchmarks-build/
156-
-S ${{github.workspace}}/compute-benchmarks-repo/
157-
-DCMAKE_BUILD_TYPE=Release
158-
-DBUILD_SYCL=ON
159-
-DSYCL_COMPILER_ROOT=${{github.workspace}}/sycl_build
160-
-DALLOW_WARNINGS=ON
161-
162-
- name: Build Compute Benchmarks
163-
run: cmake --build ${{github.workspace}}/compute-benchmarks-build/ -j
164-
165137
- name: Set oneAPI Device Selector
166138
run: |
167139
echo "ONEAPI_DEVICE_SELECTOR=${{ matrix.adapter.str_name }}:${{ matrix.adapter.unit }}" >> $GITHUB_ENV
168140
169-
- name: Run SYCL API Overhead benchmark
141+
- name: Run benchmarks
170142
id: benchmarks
171-
run: ${{ github.workspace }}/ur-repo/.github/scripts/compute_benchmarks.py ${{ github.workspace }}/compute-benchmarks-build/bin/ ${{ inputs.bench_script_params }}
143+
run: numactl -N 0 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py ~/bench_workdir ${{github.workspace}}/sycl_build ${{ inputs.bench_script_params }}
172144

173145
- name: Add comment to PR
174146
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
@@ -188,7 +160,7 @@ jobs:
188160
const test_status = '${{ steps.benchmarks.outcome }}';
189161
const job_status = '${{ job.status }}';
190162
const params = '${{ inputs.bench_script_params }}';
191-
const body = `Compute Benchmarks ${adapter} run (with params: ${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`;
163+
const body = `Compute Benchmarks ${adapter} run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`;
192164
193165
github.rest.issues.createComment({
194166
issue_number: pr_no,

.github/workflows/e2e_level_zero.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
config: ""
2222
unit: "gpu"
2323
# Failing tests
24-
xfail: "ESIMD/preemption.cpp;Matrix/SG32/element_wise_all_ops.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/element_wise_all_ops.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/element_wise_ops.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_apply_bf16.cpp;Matrix/joint_matrix_apply_two_matrices.cpp;Matrix/joint_matrix_bfloat16.cpp;Matrix/joint_matrix_bfloat16_array.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp;ProgramManager/uneven_kernel_split.cpp"
24+
xfail: "ESIMD/regression/minmax.cpp;ESIMD/preemption.cpp;Matrix/SG32/element_wise_all_ops.cpp;Matrix/SG32/get_coord_int8_matB.cpp;Matrix/element_wise_all_ops.cpp;Matrix/element_wise_all_ops_1d.cpp;Matrix/element_wise_all_ops_1d_cont.cpp;Matrix/element_wise_all_ops_scalar.cpp;Matrix/element_wise_ops.cpp;Matrix/get_coord_int8_matB.cpp;Matrix/joint_matrix_apply_bf16.cpp;Matrix/joint_matrix_apply_two_matrices.cpp;Matrix/joint_matrix_bfloat16.cpp;Matrix/joint_matrix_bfloat16_array.cpp;Matrix/joint_matrix_rowmajorA_rowmajorB.cpp;ProgramManager/uneven_kernel_split.cpp"
2525
# Flaky tests
2626
filter_out: "UserDefinedReductions/user_defined_reductions.cpp"
2727
# These runners by default spawn upwards of 260 workers.

include/ur_api.h

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ typedef enum ur_structure_type_t {
283283
UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t
284284
UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t
285285
UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES = 0x2006, ///< ::ur_exp_sampler_cubemap_properties_t
286+
UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION = 0x2007, ///< ::ur_exp_image_copy_region_t
286287
UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES = 0x3000, ///< ::ur_exp_enqueue_native_command_properties_t
287288
/// @cond
288289
UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
@@ -7501,6 +7502,18 @@ typedef struct ur_exp_interop_semaphore_desc_t {
75017502

75027503
} ur_exp_interop_semaphore_desc_t;
75037504

7505+
///////////////////////////////////////////////////////////////////////////////
7506+
/// @brief Describes the (sub-)regions and the extent to be copied
7507+
typedef struct ur_exp_image_copy_region_t {
7508+
ur_structure_type_t stype; ///< [in] type of this structure, must be
7509+
///< ::UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION
7510+
const void *pNext; ///< [in][optional] pointer to extension-specific structure
7511+
ur_rect_offset_t srcOffset; ///< [in] the offset into the source image
7512+
ur_rect_offset_t dstOffset; ///< [in] the offset into the destination image
7513+
ur_rect_region_t copyExtent; ///< [in] the extent (region) of the image to copy
7514+
7515+
} ur_exp_image_copy_region_t;
7516+
75047517
///////////////////////////////////////////////////////////////////////////////
75057518
/// @brief USM allocate pitched memory
75067519
///
@@ -7740,7 +7753,7 @@ urBindlessImagesSampledImageCreateExp(
77407753
);
77417754

77427755
///////////////////////////////////////////////////////////////////////////////
7743-
/// @brief Copy image data Host to Device or Device to Host
7756+
/// @brief Copy image data Host to Device, Device to Host, or Device to Device
77447757
///
77457758
/// @remarks
77467759
/// _Analogues_
@@ -7757,34 +7770,34 @@ urBindlessImagesSampledImageCreateExp(
77577770
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
77587771
/// + `NULL == hQueue`
77597772
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
7760-
/// + `NULL == pDst`
77617773
/// + `NULL == pSrc`
7762-
/// + `NULL == pImageFormat`
7763-
/// + `NULL == pImageDesc`
7774+
/// + `NULL == pDst`
7775+
/// + `NULL == pSrcImageDesc`
7776+
/// + `NULL == pDstImageDesc`
7777+
/// + `NULL == pSrcImageFormat`
7778+
/// + `NULL == pDstImageFormat`
7779+
/// + `NULL == pCopyRegion`
77647780
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
77657781
/// + `::UR_EXP_IMAGE_COPY_FLAGS_MASK & imageCopyFlags`
77667782
/// - ::UR_RESULT_ERROR_INVALID_QUEUE
77677783
/// - ::UR_RESULT_ERROR_INVALID_VALUE
77687784
/// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR
7769-
/// + `pImageDesc && UR_MEM_TYPE_IMAGE_CUBEMAP_EXP < pImageDesc->type`
7785+
/// + `pSrcImageDesc && UR_MEM_TYPE_IMAGE_CUBEMAP_EXP < pSrcImageDesc->type`
7786+
/// + `pDstImageDesc && UR_MEM_TYPE_IMAGE_CUBEMAP_EXP < pDstImageDesc->type`
77707787
/// - ::UR_RESULT_ERROR_INVALID_IMAGE_SIZE
77717788
/// - ::UR_RESULT_ERROR_INVALID_OPERATION
77727789
UR_APIEXPORT ur_result_t UR_APICALL
77737790
urBindlessImagesImageCopyExp(
77747791
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
7775-
void *pDst, ///< [in] location the data will be copied to
77767792
const void *pSrc, ///< [in] location the data will be copied from
7777-
const ur_image_format_t *pImageFormat, ///< [in] pointer to image format specification
7778-
const ur_image_desc_t *pImageDesc, ///< [in] pointer to image description
7793+
void *pDst, ///< [in] location the data will be copied to
7794+
const ur_image_desc_t *pSrcImageDesc, ///< [in] pointer to image description
7795+
const ur_image_desc_t *pDstImageDesc, ///< [in] pointer to image description
7796+
const ur_image_format_t *pSrcImageFormat, ///< [in] pointer to image format specification
7797+
const ur_image_format_t *pDstImageFormat, ///< [in] pointer to image format specification
7798+
ur_exp_image_copy_region_t *pCopyRegion, ///< [in] Pointer to structure describing the (sub-)regions of source and
7799+
///< destination images
77797800
ur_exp_image_copy_flags_t imageCopyFlags, ///< [in] flags describing copy direction e.g. H2D or D2H
7780-
ur_rect_offset_t srcOffset, ///< [in] defines the (x,y,z) source offset in pixels in the 1D, 2D, or 3D
7781-
///< image
7782-
ur_rect_offset_t dstOffset, ///< [in] defines the (x,y,z) destination offset in pixels in the 1D, 2D,
7783-
///< or 3D image
7784-
ur_rect_region_t copyExtent, ///< [in] defines the (width, height, depth) in pixels of the 1D, 2D, or 3D
7785-
///< region to copy
7786-
ur_rect_region_t hostExtent, ///< [in] defines the (width, height, depth) in pixels of the 1D, 2D, or 3D
7787-
///< region on the host
77887801
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
77897802
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
77907803
///< events that must be complete before this command can be executed.
@@ -11138,15 +11151,14 @@ typedef struct ur_bindless_images_sampled_image_create_exp_params_t {
1113811151
/// allowing the callback the ability to modify the parameter's value
1113911152
typedef struct ur_bindless_images_image_copy_exp_params_t {
1114011153
ur_queue_handle_t *phQueue;
11141-
void **ppDst;
1114211154
const void **ppSrc;
11143-
const ur_image_format_t **ppImageFormat;
11144-
const ur_image_desc_t **ppImageDesc;
11155+
void **ppDst;
11156+
const ur_image_desc_t **ppSrcImageDesc;
11157+
const ur_image_desc_t **ppDstImageDesc;
11158+
const ur_image_format_t **ppSrcImageFormat;
11159+
const ur_image_format_t **ppDstImageFormat;
11160+
ur_exp_image_copy_region_t **ppCopyRegion;
1114511161
ur_exp_image_copy_flags_t *pimageCopyFlags;
11146-
ur_rect_offset_t *psrcOffset;
11147-
ur_rect_offset_t *pdstOffset;
11148-
ur_rect_region_t *pcopyExtent;
11149-
ur_rect_region_t *phostExtent;
1115011162
uint32_t *pnumEventsInWaitList;
1115111163
const ur_event_handle_t **pphEventWaitList;
1115211164
ur_event_handle_t **pphEvent;

0 commit comments

Comments
 (0)