Skip to content

[WIP] rocprofv3 support #801

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions gen/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[deps]
Clang = "40e3b903-d033-50b4-a0cc-940c62c95e31"
JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"

[sources]
Clang = {path = "/home/simeon/.julia/dev/Clang"}
24 changes: 24 additions & 0 deletions gen/rocprof/generator.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using Clang.Generators
using JuliaFormatter

include_dir = normpath("/home/simeon/Documents/rocm/TheRock/output-linux-portable/build/profiler/rocprofiler-sdk/dist/include")
rocprof_dir = joinpath(include_dir, "rocprofiler-sdk")
options = load_options("rocprof/rocprof-generator.toml")

args = get_default_args()
push!(args, "-I$include_dir")
push!(args, "-D__HIP_PLATFORM_AMD__")

headers = [
joinpath(rocprof_dir, header)
# for header in readdir(rocprof_dir)
# if endswith(header, ".h")
for header in ("rocprofiler.h",)
]
#push!(headers, joinpath(include_dir, "hip/driver_types.h"))

ctx = create_context(headers, args, options)
build!(ctx)

path = options["general"]["output_file_path"]
format_file(path, YASStyle())
37 changes: 37 additions & 0 deletions gen/rocprof/rocprof-generator.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
[general]
library_name = "librocprofiler_sdk_tool"
output_file_path = "./librocprof.jl"
export_symbol_prefixes = []
output_ignorelist = [
"ROCPROFILER_PUBLIC_API",
"ROCPROFILER_HIDDEN_API",
"ROCPROFILER_EXPORT_DECORATOR",
"ROCPROFILER_EXPORT",
"ROCPROFILER_IMPORT",
"ROCPROFILER_API",
"ROCPROFILER_SDK_DEPRECATED_WARNINGS",
"ROCPROFILER_SDK_EXPERIMENTAL_WARNINGS",
"ROCPROFILER_SDK_EXPERIMENTAL_MESSAGE",
"ROCPROFILER_CONTEXT_NONE",

"__HOST_DEVICE__",

"hipCpuDeviceId",
"hipInvalidDeviceId",

"HSA_API_EXPORT",
"HSA_API_IMPORT",
"HSA_API",

"ROCTX_PUBLIC_API",
"ROCTX_HIDDEN_API",
"ROCTX_EXPORT_DECORATOR",
"ROCTX_EXPORT",
"ROCTX_IMPORT",
"ROCTX_API",

"ompt_data_none",
]

[codegen]
use_ccall_macro = true
155 changes: 155 additions & 0 deletions prof.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# ENV["HSA_TOOLS_LIB"] = "/opt/rocm/lib/librocprofiler64v2.so"
# ENV["ROCPROFILER_METRICS_PATH"] = "/opt/rocm/libexec/rocprofiler/counters/derived_counters.xml"

using AMDGPU
import AMDGPU: Profiler, @check

@show Profiler.version()

# Application tracing

const ctx = Ref(Profiler.rocprofiler_context_id_t(0))
const buff = Ref(Profiler.rocprofiler_buffer_id_t(0));

@check Profiler.rocprofiler_create_context(ctx)
#@check Profiler.rocprofiler_create_buffer(ctx[], 8192, 2048, Profiler.ROCPROFILER_BUFFER_POLICY_LOSSLESS,

### Initialize tools
#@check Profiler.rocprofiler_initialize()
#
### create session with replay mode
#r_id = Ref{Profiler.rocprofiler_session_id_t}()
#@check Profiler.rocprofiler_create_session(Profiler.ROCPROFILER_NONE_REPLAY_MODE, r_id)
#id = r_id[]
#
### Create output buffer for the data
#function output_callback(record, end_record, session_id, buffer_id)
# @info "Output callback" record, end_record, session_id, buffer_id
# return nothing
#end
#
#r_buffer_id = Ref{Profiler.rocprofiler_buffer_id_t}()
#@check Profiler.rocprofiler_create_buffer(
# id,
# @cfunction(output_callback, Cvoid, (
# Ptr{Profiler.rocprofiler_record_header_t},
# Ptr{Profiler.rocprofiler_record_header_t},
# Profiler.rocprofiler_session_id_t,
# Profiler.rocprofiler_buffer_id_t)),
# 0x9999, r_buffer_id
# )
#buffer_id = r_buffer_id[]
#
## Specifying the APIs to be traced in a vector
#apis_requested = Vector{Profiler.rocprofiler_tracer_activity_domain_t}(undef, 0)
#push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HIP_API)
#push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HIP_OPS)
#push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HSA_API)
#push!(apis_requested, Profiler.ACTIVITY_DOMAIN_HSA_OPS)
#push!(apis_requested, Profiler.ACTIVITY_DOMAIN_ROCTX)
#
#GC.@preserve apis_requested begin
# # Looking at the code we can release the pointer after rocprofiler_create_filter
# filter_data = Ref{Profiler.rocprofiler_filter_data_t}()
# GC.@preserve filter_data begin
# ptr = Base.unsafe_convert(Ptr{Profiler.rocprofiler_filter_data_t}, filter_data)
# Base.memset(ptr, UInt8(0), sizeof(Profiler.rocprofiler_filter_data_t))
#
# ptr.trace_apis = pointer(apis_requested)
# end
#
# r_api_tracing_filter_id = Ref{Profiler.rocprofiler_filter_id_t}()
# filter_property = Ref{Profiler.rocprofiler_filter_property_t}()
# GC.@preserve filter_property begin
# ptr = Base.unsafe_convert(Ptr{Profiler.rocprofiler_filter_property_t}, filter_property)
# Base.memset(ptr, UInt8(0), sizeof(Profiler.rocprofiler_filter_property_t))
# end
#
# # Creating filter for tracing APIs
# @check Profiler.rocprofiler_create_filter(
# id, Profiler.ROCPROFILER_API_TRACE,
# filter_data[], length(apis_requested),
# r_api_tracing_filter_id, filter_property[]
# )
# api_tracing_filter_id = r_api_tracing_filter_id[]
#end
#api_tracing_filter_id
#
#function timestamp()
# r = Ref{Profiler.rocprofiler_timestamp_t}()
# Profiler.rocprofiler_get_timestamp(r)
# return r[].value
#end
#
#function trace_sync_callback(record::Profiler.rocprofiler_record_tracer_t, session_id)
# if record.domain == Profiler.ACTIVITY_DOMAIN_HSA_API ||
# record.domain == Profiler.ACTIVITY_DOMAIN_HIP_API
# r_fn_name = Ref{Ptr{Cchar}}()
#
# @check Profiler.rocprofiler_query_tracer_operation_name(
# record.domain, record.operation_id, r_fn_name
# )
# fn_name = Base.unsafe_string(r_fn_name[])
# else
# fn_name = nothing
# end
#
# if record.phase == Profiler.ROCPROFILER_PHASE_ENTER
# ts_begin = timestamp()
# ts_end = nothing
# elseif record.phase == Profiler.ROCPROFILER_PHASE_EXIT
# ts_begin = nothing
# ts_end = timestamp()
# else
# ts_begin = record.timestamps._begin.value
# ts_end = record.timestamps._end.value
# end
#
# if record.name != C_NULL
# name = Base.unsafe_string(record.name)
# else
# name = nothing
# end
#
# @info "trace_sync" domain=record.domain phase=record.phase correlation=record.correlation_id.value ts_begin ts_end fn_name record.name
# return nothing
#end
#
#@check Profiler.rocprofiler_set_filter_buffer(id, api_tracing_filter_id, buffer_id)
#@check Profiler.rocprofiler_set_api_trace_sync_callback(
# id, api_tracing_filter_id,
# @cfunction(trace_sync_callback, Cvoid, (
# Profiler.rocprofiler_record_tracer_t,
# Profiler.rocprofiler_session_id_t,
# )),
#)
#
## Kernel tracing
#filter_data = Ref{Profiler.rocprofiler_filter_data_t}()
#GC.@preserve filter_data begin
# ptr = Base.unsafe_convert(Ptr{Profiler.rocprofiler_filter_data_t}, filter_data)
# Base.memset(ptr, UInt8(0), sizeof(Profiler.rocprofiler_filter_data_t))
#end
#
#r_kernel_tracing_filter_id = Ref{Profiler.rocprofiler_filter_id_t}()
#@check Profiler.rocprofiler_create_filter(
# id, Profiler.ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION,
# filter_data[], 0, r_kernel_tracing_filter_id, filter_property[]
#)
#kernel_tracing_filter_id = r_kernel_tracing_filter_id[]
#
#function kernel()
# return nothing
#end
#
#@roc kernel()
#
#Profiler.rocprofiler_start_session(id)
#
#@roc kernel()
#AMDGPU.synchronize()
#
#Profiler.rocprofiler_terminate_session(id)
#Profiler.rocprofiler_flush_data(id, buffer_id)
#Profiler.rocprofiler_destroy_session(id)
#Profiler.rocprofiler_finalize()
3 changes: 3 additions & 0 deletions src/AMDGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ include("ROCKernels.jl")
import .ROCKernels: ROCBackend
export ROCBackend

include("profiler/profiler.jl")


function __init__()
# Used to shutdown hostcalls if any is running.
atexit(() -> begin Runtime.RT_EXITING[] = true end)
Expand Down
4 changes: 3 additions & 1 deletion src/discovery/discovery.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module ROCmDiscovery

export lld_artifact, lld_path, libhsaruntime, libdevice_libs, libhip
export librocblas, librocsparse, librocsolver
export librocrand, librocfft, libMIOpen_path
export librocrand, librocfft, libMIOpen_path, librocprofiler_sdk_tool

using LLD_jll
using ROCmDeviceLibs_jll
Expand Down Expand Up @@ -56,6 +56,7 @@ global librocsolver::String = ""
global librocrand::String = ""
global librocfft::String = ""
global libMIOpen_path::String = ""
global librocprofiler_sdk_tool::String = ""

function __init__()

Expand Down Expand Up @@ -97,6 +98,7 @@ function __init__()
global librocrand = find_rocm_library(lib_prefix * "rocrand"; rocm_path)
global librocfft = find_rocm_library(lib_prefix * "rocfft"; rocm_path)
global libMIOpen_path = find_rocm_library(lib_prefix * "MIOpen"; rocm_path)
global librocprofiler_sdk_tool = "/home/simeon/Documents/rocm/TheRock/output-linux-portable/build/profiler/rocprofiler-sdk/dist/lib/rocprofiler-sdk/librocprofiler-sdk-tool.so"
catch err
@error """ROCm discovery failed!
Discovered ROCm path: $rocm_path.
Expand Down
2 changes: 1 addition & 1 deletion src/dnn/MIOpen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ function Base.showerror(io::IO, exception::MIOpenException)
""")
end

function check(status)
function check(status::miopenStatus_t)
if status != miopenStatusSuccess
throw(MIOpenException(status))
end
Expand Down
Loading