Skip to content

SIMD Log functionality #126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions sleef_interface/sleef_avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,29 @@ rlst_float32x8 rlst_avx_exp_f32(float* value) {
return output;
}

rlst_float64x4 rlst_avx_log_f64(double* value) {

__m256d simd_value = _mm256_loadu_pd(value);
__m256d simd_output = Sleef_logd4_u10avx2(simd_value);
rlst_float64x4 output;

_mm256_storeu_pd (output.x, simd_output);


return output;
}

rlst_float32x8 rlst_avx_log_f32(float* value) {

__m256 simd_value = _mm256_loadu_ps(value);
__m256 simd_output = Sleef_logf8_u10avx2(simd_value);
rlst_float32x8 output;

_mm256_storeu_ps (output.x, simd_output);


return output;
}



25 changes: 25 additions & 0 deletions sleef_interface/sleef_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,31 @@ rlst_float32x4 rlst_neon_exp_f32(float* value) {
rlst_float32x4 output;


vst1q_f32(output.x, simd_output);

return output;

}

rlst_float64x2 rlst_neon_log_f64(double* value) {

float64x2_t simd_value = vld1q_f64(value);
float64x2_t simd_output = Sleef_logd2_u10advsimd(simd_value);
rlst_float64x2 output;


vst1q_f64(output.x, simd_output);

return output;
}

rlst_float32x4 rlst_neon_log_f32(float* value) {

float32x4_t simd_value = vld1q_f32(value);
float32x4_t simd_output = Sleef_logf4_u10advsimd(simd_value);
rlst_float32x4 output;


vst1q_f32(output.x, simd_output);

return output;
Expand Down
159 changes: 159 additions & 0 deletions src/dense/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ impl<T: RlstSimd, S: Simd> SimdFor<T, S> {
T::simd_exp(self.simd, value)
}

/// Compute the natural logarithm of a Simd vector.
#[inline(always)]
pub fn log(self, value: T::Scalars<S>) -> T::Scalars<S> {
T::simd_log(self.simd, value)
}

/// Compute the square root of a Simd vector.
#[inline(always)]
pub fn sqrt(self, value: T::Scalars<S>) -> T::Scalars<S> {
Expand Down Expand Up @@ -319,6 +325,9 @@ pub trait RlstSimd: Pod + Send + Sync + num::Zero + 'static {
/// Compute the base e exponential of a Simd vector.
fn simd_exp<S: Simd>(simd: S, value: Self::Scalars<S>) -> Self::Scalars<S>;

/// Compute the approximate log of each element in the register.
fn simd_log<S: Simd>(simd: S, value: Self::Scalars<S>) -> Self::Scalars<S>;

/// Compute the square root of each element in the register.
fn simd_sqrt<S: Simd>(simd: S, value: Self::Scalars<S>) -> Self::Scalars<S>;

Expand Down Expand Up @@ -658,6 +667,35 @@ impl RlstSimd for f32 {
out
}

#[inline(always)]
fn simd_log<S: Simd>(simd: S, value: Self::Scalars<S>) -> Self::Scalars<S> {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "sleef"))]
if coe::is_same::<S, pulp::aarch64::Neon>() {
let value: [f32; 4] = bytemuck::cast(value);
let out = unsafe { sleef_neon::rlst_neon_log_f32(value.as_ptr()) };

return bytemuck::cast(out);
}

#[cfg(all(target_arch = "x86_64", feature = "sleef"))]
if coe::is_same::<S, pulp::x86::V3>() {
let value: [f32; 8] = bytemuck::cast(value);
let out = unsafe { sleef_avx::rlst_avx_log_f32(value.as_ptr()) };

return bytemuck::cast(out);
}

let mut out = simd.splat_f32s(0.0);
{
let out: &mut [f32] = bytemuck::cast_slice_mut(std::slice::from_mut(&mut out));
let x: &[f32] = bytemuck::cast_slice(std::slice::from_ref(&value));
for (out, x) in itertools::izip!(out, x) {
*out = x.ln();
}
}
out
}

#[inline(always)]
fn simd_sqrt<S: Simd>(simd: S, value: Self::Scalars<S>) -> Self::Scalars<S> {
#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -1033,6 +1071,36 @@ impl RlstSimd for f64 {
out
}

#[inline(always)]
fn simd_log<S: Simd>(simd: S, value: Self::Scalars<S>) -> Self::Scalars<S> {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "sleef"))]
if coe::is_same::<S, pulp::aarch64::Neon>() {
let value: [f64; 2] = bytemuck::cast(value);
let out = unsafe { sleef_neon::rlst_neon_log_f64(value.as_ptr()) };

return bytemuck::cast(out);
}

#[cfg(all(target_arch = "x86_64", feature = "sleef"))]
if coe::is_same::<S, pulp::x86::V3>() {
let value: [f64; 4] = bytemuck::cast(value);
let out = unsafe { sleef_avx::rlst_avx_log_f64(value.as_ptr()) };

return bytemuck::cast(out);
}

let mut out = simd.splat_f64s(0.0);
{
let out: &mut [f64] = bytemuck::cast_slice_mut(std::slice::from_mut(&mut out));
let x: &[f64] = bytemuck::cast_slice(std::slice::from_ref(&value));
for (out, x) in itertools::izip!(out, x) {
*out = x.ln();
}
}
out
}


#[inline(always)]
fn simd_sqrt<S: Simd>(simd: S, value: Self::Scalars<S>) -> Self::Scalars<S> {
#[cfg(target_arch = "x86_64")]
Expand Down Expand Up @@ -1143,6 +1211,97 @@ mod tests {
use paste;
use rand::prelude::*;

#[test]
fn test_simd_log() {
let nsamples = 1001;

trait Tolerance: RlstScalar<Real = Self> {
const TOL: Self;
}

impl Tolerance for f32 {
const TOL: f32 = 1E-6;
}

impl Tolerance for f64 {
const TOL: f64 = 1E-13;
}

struct Impl<T: RlstScalar<Real = T> + RlstSimd + Tolerance> {
rng: StdRng,
nsamples: usize,
_marker: PhantomData<T>,
}

impl<T: RlstScalar<Real = T> + RlstSimd + Tolerance> pulp::WithSimd for Impl<T> {
type Output = ();

fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
let Self {
mut rng, nsamples, ..
} = self;

let samples = (0..nsamples)
.map(|_| num::cast::<_, T>(rng.gen::<f64>()).unwrap())
.collect::<Vec<_>>();

let mut actual = vec![T::default(); nsamples];

let (value_head, value_tail) =
<T as RlstSimd>::as_simd_slice::<S>(samples.as_slice());
let (actual_head, actual_tail) =
<T as RlstSimd>::as_simd_slice_mut::<S>(actual.as_mut_slice());

fn impl_slice<T: RlstScalar<Real = T> + RlstSimd + Tolerance, S: Simd>(
simd: S,
values: &[<T as RlstSimd>::Scalars<S>],
actual: &mut [<T as RlstSimd>::Scalars<S>],
) {
let simd = SimdFor::<T, S>::new(simd);

for (&v, a) in itertools::izip!(values, actual) {
*a = simd.log(v);
}
}

impl_slice::<T, _>(simd, value_head, actual_head);
impl_slice::<T, _>(
pulp::Scalar::new(),
value_tail.coerce(),
actual_tail.coerce(),
);

if coe::is_same::<T, f32>() {
for (v, a) in itertools::izip!(samples, actual) {
let v: f32 = coerce_static(v);
let a: f32 = coerce_static(a);
assert_relative_eq!(v.ln(), a, max_relative = f32::TOL);
}
} else if coe::is_same::<T, f64>() {
for (v, a) in itertools::izip!(samples, actual) {
let v: f64 = coerce_static(v);
let a: f64 = coerce_static(a);
assert_relative_eq!(v.ln(), a, max_relative = f64::TOL);
}
} else {
panic!("Unsupported type");
}
}
}

pulp::Arch::new().dispatch(Impl::<f32> {
rng: StdRng::seed_from_u64(0),
nsamples,
_marker: PhantomData,
});

pulp::Arch::new().dispatch(Impl::<f64> {
rng: StdRng::seed_from_u64(0),
nsamples,
_marker: PhantomData,
});
}

#[test]
fn test_simd_approx_recip() {
let nsamples = 1001;
Expand Down