diff --git a/Cargo.toml b/Cargo.toml index 15ad695..b7c8e76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" homepage = "https://github.com/filecoin-project/rust-gpu-tools" license = "MIT/Apache-2.0" repository = "https://github.com/filecoin-project/rust-gpu-tools" -rust-version = "1.70.0" +rust-version = "1.81.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] @@ -18,11 +18,11 @@ cuda = ["rustacuda"] [dependencies] home = "0.5" sha2 = "0.10" -thiserror = "1.0.10" -log = "0.4.11" +thiserror = "2.0.12" +log = "0.4.26" hex = "0.4.3" -opencl3 = { version = "0.9.3", default-features = false, features = ["CL_VERSION_1_2"], optional = true } +opencl3 = { version = "0.11.0", default-features = false, features = ["CL_VERSION_1_2"], optional = true } rustacuda = { package = "fil-rustacuda", version = "0.1.3", optional = true } once_cell = "1.8.0" temp-env = "0.3.3" diff --git a/rust-toolchain b/rust-toolchain index 832e9af..dbd4126 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1 +1 @@ -1.70.0 +1.81.0 diff --git a/src/cuda/mod.rs b/src/cuda/mod.rs index cb4b8ec..bdcf83c 100644 --- a/src/cuda/mod.rs +++ b/src/cuda/mod.rs @@ -6,7 +6,7 @@ //! //! 1. RustaCUDA doesn't expose a higher level function to launch a kernel on the default stream //! 2. There was a bug, when the default stream was used implicitly via RustaCUDA's synchronuous -//! copy methods. To prevent such kind of bugs, be explicit which stream is used. +//! copy methods. To prevent such kind of bugs, be explicit which stream is used. pub(crate) mod utils; @@ -133,13 +133,11 @@ impl Program { pub fn from_binary(device: &Device, filename: &CStr) -> GPUResult { debug!("Creating CUDA program from binary file."); rustacuda::context::CurrentContext::set_current(&device.context)?; - let module = rustacuda::module::Module::load_from_file(filename).map_err(|err| { + let module = rustacuda::module::Module::load_from_file(filename).inspect_err(|_err| { Self::pop_context(); - err })?; - let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| { + let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| { Self::pop_context(); - err })?; let prog = Program { module, @@ -155,13 +153,11 @@ impl Program { pub fn from_bytes(device: &Device, bytes: &[u8]) -> GPUResult { debug!("Creating CUDA program from bytes."); rustacuda::context::CurrentContext::set_current(&device.context)?; - let module = rustacuda::module::Module::load_from_bytes(bytes).map_err(|err| { + let module = rustacuda::module::Module::load_from_bytes(bytes).inspect_err(|_err| { Self::pop_context(); - err })?; - let stream = Stream::new(StreamFlags::NON_BLOCKING, None).map_err(|err| { + let stream = Stream::new(StreamFlags::NON_BLOCKING, None).inspect_err(|_err| { Self::pop_context(); - err })?; let prog = Program { module, @@ -203,9 +199,7 @@ impl Program { let bytes_len = mem::size_of_val(slice); // Transmuting types is safe as long a sizes match. - let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len) - }; + let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // It is only unsafe as long as the buffer isn't initialized, but that's what we do next. let mut buffer = unsafe { DeviceBuffer::::uninitialized(bytes_len)? }; @@ -245,10 +239,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts( - data.as_ptr() as *const T as *const u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) }; // It is safe as we synchronize the stream after the call. @@ -264,10 +255,7 @@ impl Program { // Transmuting types is safe as long a sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut T as *mut u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) }; // It is safe as we synchronize the stream after the call. diff --git a/src/cuda/utils.rs b/src/cuda/utils.rs index 21da312..207d658 100644 --- a/src/cuda/utils.rs +++ b/src/cuda/utils.rs @@ -11,7 +11,7 @@ use crate::error::{GPUError, GPUResult}; // are never used directly, they are only accessed through [`cuda::Device`] which contains an // `UnownedContext`. A device cannot have an own context itself, as then it couldn't be cloned, // but that is needed for creating the kernels. -pub(crate) struct CudaContexts(Vec); +pub(crate) struct CudaContexts(#[allow(unused)] Vec); unsafe impl Sync for CudaContexts {} unsafe impl Send for CudaContexts {} diff --git a/src/device.rs b/src/device.rs index b5c5abe..5412cc2 100644 --- a/src/device.rs +++ b/src/device.rs @@ -28,6 +28,8 @@ const AMD_DEVICE_ON_APPLE_VENDOR_STRING: &str = "AMD"; const AMD_DEVICE_ON_APPLE_VENDOR_ID: u32 = 0x1021d00; const NVIDIA_DEVICE_VENDOR_STRING: &str = "NVIDIA Corporation"; const NVIDIA_DEVICE_VENDOR_ID: u32 = 0x10de; +const APPLE_DEVICE_VENDOR_ID: u32 = 0x1027F00; +const APPLE_DEVICE_VENDOR_STRING: &str = "Apple"; // The owned CUDA contexts are stored globally. Each devives contains an unowned reference, so // that devices can be cloned. @@ -180,6 +182,8 @@ pub enum Vendor { Intel, /// GPU by NVIDIA. Nvidia, + /// GPU by Apple. + Apple, } impl TryFrom<&str> for Vendor { @@ -191,6 +195,7 @@ impl TryFrom<&str> for Vendor { AMD_DEVICE_ON_APPLE_VENDOR_STRING => Ok(Self::Amd), INTEL_DEVICE_VENDOR_STRING => Ok(Self::Intel), NVIDIA_DEVICE_VENDOR_STRING => Ok(Self::Nvidia), + APPLE_DEVICE_VENDOR_STRING => Ok(Self::Apple), _ => Err(GPUError::UnsupportedVendor(vendor.to_string())), } } @@ -205,6 +210,7 @@ impl TryFrom for Vendor { AMD_DEVICE_ON_APPLE_VENDOR_ID => Ok(Self::Amd), INTEL_DEVICE_VENDOR_ID => Ok(Self::Intel), NVIDIA_DEVICE_VENDOR_ID => Ok(Self::Nvidia), + APPLE_DEVICE_VENDOR_ID => Ok(Self::Apple), _ => Err(GPUError::UnsupportedVendor(format!("0x{:x}", vendor))), } } @@ -216,6 +222,7 @@ impl fmt::Display for Vendor { Self::Amd => AMD_DEVICE_VENDOR_STRING, Self::Intel => INTEL_DEVICE_VENDOR_STRING, Self::Nvidia => NVIDIA_DEVICE_VENDOR_STRING, + Self::Apple => APPLE_DEVICE_VENDOR_STRING, }; write!(f, "{}", vendor) } diff --git a/src/error.rs b/src/error.rs index a260e50..9bf382a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,11 +9,16 @@ use rustacuda::error::CudaError; pub enum GPUError { /// Error from the underlying `opencl3` library, e.g. a memory allocation failure. #[cfg(feature = "opencl")] - #[error("Opencl3 Error: {0}{}", match .1 { + #[error("Opencl3 Error: {0}{}", match .message { Some(message) => format!(" {}", message), None => "".to_string(), })] - Opencl3(ClError, Option), + Opencl3 { + /// The error code. + error: ClError, + /// The error message. + message: Option, + }, /// Error for OpenCL `clGetProgramInfo()` call failures. #[cfg(feature = "opencl")] @@ -63,6 +68,9 @@ pub type GPUResult = std::result::Result; #[cfg(feature = "opencl")] impl From for GPUError { fn from(error: ClError) -> Self { - GPUError::Opencl3(error, None) + GPUError::Opencl3 { + error, + message: None, + } } } diff --git a/src/opencl/mod.rs b/src/opencl/mod.rs index c3e6068..c8e32b4 100644 --- a/src/opencl/mod.rs +++ b/src/opencl/mod.rs @@ -152,7 +152,10 @@ impl Program { let mut program = opencl3::program::Program::create_from_source(&context, src)?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3(build_error, Some(log))); + return Err(GPUError::Opencl3 { + error: build_error, + message: Some(log), + }); } debug!( "Building kernel ({}) from source: done.", @@ -191,7 +194,10 @@ impl Program { }?; if let Err(build_error) = program.build(context.devices(), "") { let log = program.get_build_log(context.devices()[0])?; - return Err(GPUError::Opencl3(build_error, Some(log))); + return Err(GPUError::Opencl3 { + error: build_error, + message: Some(log), + }); } let queue = CommandQueue::create_default(&context, 0)?; let kernels = opencl3::kernel::create_program_kernels(&program)?; @@ -258,9 +264,7 @@ impl Program { )? }; // Transmuting types is safe as long a sizes match. - let bytes = unsafe { - std::slice::from_raw_parts(slice.as_ptr() as *const T as *const u8, bytes_len) - }; + let bytes = unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, bytes_len) }; // Write some data right-away. This makes a significant performance different. unsafe { self.queue @@ -314,10 +318,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts( - data.as_ptr() as *const T as *const u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts(data.as_ptr() as *const u8, mem::size_of_val(data)) }; unsafe { self.queue @@ -332,10 +333,7 @@ impl Program { // It is safe as long as the sizes match. let bytes = unsafe { - std::slice::from_raw_parts_mut( - data.as_mut_ptr() as *mut T as *mut u8, - mem::size_of_val(data), - ) + std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, mem::size_of_val(data)) }; unsafe { self.queue