Skip to content

Commit d018b2e

Browse files
committed
Add hadd
1 parent 8c8ca07 commit d018b2e

File tree

2 files changed

+328
-0
lines changed

2 files changed

+328
-0
lines changed

crates/core_arch/neon.spec

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,19 @@ link-arm = llvm.arm.neon.vqadds._EXT_
319319
link-aarch64 = llvm.aarch64.neon.sqadd._EXT_
320320
generate int*_t
321321

322+
/// Halving add
323+
name = vhadd
324+
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
325+
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
326+
e = 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
327+
328+
arm = uhadd
329+
link-arm = llvm.arm.neon.vhaddu._EXT_
330+
link-aarch64 = llvm.aarch64.neon.uhadd._EXT_
331+
generate uint*_t
332+
333+
arm = shadd
334+
link-arm = llvm.arm.neon.vhadds._EXT_
335+
link-aarch64 = llvm.aarch64.neon.shadd._EXT_
336+
generate int*_t
337+

crates/core_arch/src/arm/neon/generated.rs

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,6 +1596,210 @@ pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
15961596
vqaddq_s32_(a, b)
15971597
}
15981598

1599+
#[allow(improper_ctypes)]
1600+
extern "C" {
1601+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")]
1602+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")]
1603+
fn vhadd_u8_(a: uint8x8_t, a: uint8x8_t) -> uint8x8_t;
1604+
}
1605+
1606+
/// Halving add
1607+
#[inline]
1608+
#[target_feature(enable = "neon")]
1609+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1610+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(uhadd))]
1611+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
1612+
pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
1613+
vhadd_u8_(a, b)
1614+
}
1615+
1616+
#[allow(improper_ctypes)]
1617+
extern "C" {
1618+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")]
1619+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")]
1620+
fn vhaddq_u8_(a: uint8x16_t, a: uint8x16_t) -> uint8x16_t;
1621+
}
1622+
1623+
/// Halving add
1624+
#[inline]
1625+
#[target_feature(enable = "neon")]
1626+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1627+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(uhadd))]
1628+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
1629+
pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
1630+
vhaddq_u8_(a, b)
1631+
}
1632+
1633+
#[allow(improper_ctypes)]
1634+
extern "C" {
1635+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")]
1636+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")]
1637+
fn vhadd_u16_(a: uint16x4_t, a: uint16x4_t) -> uint16x4_t;
1638+
}
1639+
1640+
/// Halving add
1641+
#[inline]
1642+
#[target_feature(enable = "neon")]
1643+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1644+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(uhadd))]
1645+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
1646+
pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
1647+
vhadd_u16_(a, b)
1648+
}
1649+
1650+
#[allow(improper_ctypes)]
1651+
extern "C" {
1652+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")]
1653+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")]
1654+
fn vhaddq_u16_(a: uint16x8_t, a: uint16x8_t) -> uint16x8_t;
1655+
}
1656+
1657+
/// Halving add
1658+
#[inline]
1659+
#[target_feature(enable = "neon")]
1660+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1661+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(uhadd))]
1662+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
1663+
pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
1664+
vhaddq_u16_(a, b)
1665+
}
1666+
1667+
#[allow(improper_ctypes)]
1668+
extern "C" {
1669+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")]
1670+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")]
1671+
fn vhadd_u32_(a: uint32x2_t, a: uint32x2_t) -> uint32x2_t;
1672+
}
1673+
1674+
/// Halving add
1675+
#[inline]
1676+
#[target_feature(enable = "neon")]
1677+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1678+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(uhadd))]
1679+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
1680+
pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
1681+
vhadd_u32_(a, b)
1682+
}
1683+
1684+
#[allow(improper_ctypes)]
1685+
extern "C" {
1686+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")]
1687+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")]
1688+
fn vhaddq_u32_(a: uint32x4_t, a: uint32x4_t) -> uint32x4_t;
1689+
}
1690+
1691+
/// Halving add
1692+
#[inline]
1693+
#[target_feature(enable = "neon")]
1694+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1695+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(uhadd))]
1696+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))]
1697+
pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
1698+
vhaddq_u32_(a, b)
1699+
}
1700+
1701+
#[allow(improper_ctypes)]
1702+
extern "C" {
1703+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")]
1704+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i8")]
1705+
fn vhadd_s8_(a: int8x8_t, a: int8x8_t) -> int8x8_t;
1706+
}
1707+
1708+
/// Halving add
1709+
#[inline]
1710+
#[target_feature(enable = "neon")]
1711+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1712+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(shadd))]
1713+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
1714+
pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
1715+
vhadd_s8_(a, b)
1716+
}
1717+
1718+
#[allow(improper_ctypes)]
1719+
extern "C" {
1720+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")]
1721+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v16i8")]
1722+
fn vhaddq_s8_(a: int8x16_t, a: int8x16_t) -> int8x16_t;
1723+
}
1724+
1725+
/// Halving add
1726+
#[inline]
1727+
#[target_feature(enable = "neon")]
1728+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1729+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(shadd))]
1730+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
1731+
pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
1732+
vhaddq_s8_(a, b)
1733+
}
1734+
1735+
#[allow(improper_ctypes)]
1736+
extern "C" {
1737+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")]
1738+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i16")]
1739+
fn vhadd_s16_(a: int16x4_t, a: int16x4_t) -> int16x4_t;
1740+
}
1741+
1742+
/// Halving add
1743+
#[inline]
1744+
#[target_feature(enable = "neon")]
1745+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1746+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(shadd))]
1747+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
1748+
pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
1749+
vhadd_s16_(a, b)
1750+
}
1751+
1752+
#[allow(improper_ctypes)]
1753+
extern "C" {
1754+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")]
1755+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i16")]
1756+
fn vhaddq_s16_(a: int16x8_t, a: int16x8_t) -> int16x8_t;
1757+
}
1758+
1759+
/// Halving add
1760+
#[inline]
1761+
#[target_feature(enable = "neon")]
1762+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1763+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(shadd))]
1764+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
1765+
pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
1766+
vhaddq_s16_(a, b)
1767+
}
1768+
1769+
#[allow(improper_ctypes)]
1770+
extern "C" {
1771+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")]
1772+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v2i32")]
1773+
fn vhadd_s32_(a: int32x2_t, a: int32x2_t) -> int32x2_t;
1774+
}
1775+
1776+
/// Halving add
1777+
#[inline]
1778+
#[target_feature(enable = "neon")]
1779+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1780+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(shadd))]
1781+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
1782+
pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
1783+
vhadd_s32_(a, b)
1784+
}
1785+
1786+
#[allow(improper_ctypes)]
1787+
extern "C" {
1788+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")]
1789+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i32")]
1790+
fn vhaddq_s32_(a: int32x4_t, a: int32x4_t) -> int32x4_t;
1791+
}
1792+
1793+
/// Halving add
1794+
#[inline]
1795+
#[target_feature(enable = "neon")]
1796+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
1797+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(shadd))]
1798+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))]
1799+
pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
1800+
vhaddq_s32_(a, b)
1801+
}
1802+
15991803
#[cfg(test)]
16001804
mod test {
16011805
use super::*;
@@ -2880,4 +3084,112 @@ mod test {
28803084
let r: i32x4 = transmute(vqaddq_s32(transmute(a), transmute(b)));
28813085
assert_eq!(r, e);
28823086
}
3087+
3088+
#[simd_test(enable = "neon")]
3089+
unsafe fn test_vhadd_u8() {
3090+
let a:u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
3091+
let b:u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
3092+
let e:u8x8 = u8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
3093+
let r: u8x8 = transmute(vhadd_u8(transmute(a), transmute(b)));
3094+
assert_eq!(r, e);
3095+
}
3096+
3097+
#[simd_test(enable = "neon")]
3098+
unsafe fn test_vhaddq_u8() {
3099+
let a:u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
3100+
let b:u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
3101+
let e:u8x16 = u8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
3102+
let r: u8x16 = transmute(vhaddq_u8(transmute(a), transmute(b)));
3103+
assert_eq!(r, e);
3104+
}
3105+
3106+
#[simd_test(enable = "neon")]
3107+
unsafe fn test_vhadd_u16() {
3108+
let a:u16x4 = u16x4::new(42, 42, 42, 42);
3109+
let b:u16x4 = u16x4::new(1, 2, 3, 4);
3110+
let e:u16x4 = u16x4::new(21, 22, 22, 23);
3111+
let r: u16x4 = transmute(vhadd_u16(transmute(a), transmute(b)));
3112+
assert_eq!(r, e);
3113+
}
3114+
3115+
#[simd_test(enable = "neon")]
3116+
unsafe fn test_vhaddq_u16() {
3117+
let a:u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
3118+
let b:u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
3119+
let e:u16x8 = u16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
3120+
let r: u16x8 = transmute(vhaddq_u16(transmute(a), transmute(b)));
3121+
assert_eq!(r, e);
3122+
}
3123+
3124+
#[simd_test(enable = "neon")]
3125+
unsafe fn test_vhadd_u32() {
3126+
let a:u32x2 = u32x2::new(42, 42);
3127+
let b:u32x2 = u32x2::new(1, 2);
3128+
let e:u32x2 = u32x2::new(21, 22);
3129+
let r: u32x2 = transmute(vhadd_u32(transmute(a), transmute(b)));
3130+
assert_eq!(r, e);
3131+
}
3132+
3133+
#[simd_test(enable = "neon")]
3134+
unsafe fn test_vhaddq_u32() {
3135+
let a:u32x4 = u32x4::new(42, 42, 42, 42);
3136+
let b:u32x4 = u32x4::new(1, 2, 3, 4);
3137+
let e:u32x4 = u32x4::new(21, 22, 22, 23);
3138+
let r: u32x4 = transmute(vhaddq_u32(transmute(a), transmute(b)));
3139+
assert_eq!(r, e);
3140+
}
3141+
3142+
#[simd_test(enable = "neon")]
3143+
unsafe fn test_vhadd_s8() {
3144+
let a:i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
3145+
let b:i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
3146+
let e:i8x8 = i8x8::new(21, 22, 22, 23, 23, 24, 24, 25);
3147+
let r: i8x8 = transmute(vhadd_s8(transmute(a), transmute(b)));
3148+
assert_eq!(r, e);
3149+
}
3150+
3151+
#[simd_test(enable = "neon")]
3152+
unsafe fn test_vhaddq_s8() {
3153+
let a:i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42);
3154+
let b:i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
3155+
let e:i8x16 = i8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29);
3156+
let r: i8x16 = transmute(vhaddq_s8(transmute(a), transmute(b)));
3157+
assert_eq!(r, e);
3158+
}
3159+
3160+
#[simd_test(enable = "neon")]
3161+
unsafe fn test_vhadd_s16() {
3162+
let a:i16x4 = i16x4::new(42, 42, 42, 42);
3163+
let b:i16x4 = i16x4::new(1, 2, 3, 4);
3164+
let e:i16x4 = i16x4::new(21, 22, 22, 23);
3165+
let r: i16x4 = transmute(vhadd_s16(transmute(a), transmute(b)));
3166+
assert_eq!(r, e);
3167+
}
3168+
3169+
#[simd_test(enable = "neon")]
3170+
unsafe fn test_vhaddq_s16() {
3171+
let a:i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
3172+
let b:i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
3173+
let e:i16x8 = i16x8::new(21, 22, 22, 23, 23, 24, 24, 25);
3174+
let r: i16x8 = transmute(vhaddq_s16(transmute(a), transmute(b)));
3175+
assert_eq!(r, e);
3176+
}
3177+
3178+
#[simd_test(enable = "neon")]
3179+
unsafe fn test_vhadd_s32() {
3180+
let a:i32x2 = i32x2::new(42, 42);
3181+
let b:i32x2 = i32x2::new(1, 2);
3182+
let e:i32x2 = i32x2::new(21, 22);
3183+
let r: i32x2 = transmute(vhadd_s32(transmute(a), transmute(b)));
3184+
assert_eq!(r, e);
3185+
}
3186+
3187+
#[simd_test(enable = "neon")]
3188+
unsafe fn test_vhaddq_s32() {
3189+
let a:i32x4 = i32x4::new(42, 42, 42, 42);
3190+
let b:i32x4 = i32x4::new(1, 2, 3, 4);
3191+
let e:i32x4 = i32x4::new(21, 22, 22, 23);
3192+
let r: i32x4 = transmute(vhaddq_s32(transmute(a), transmute(b)));
3193+
assert_eq!(r, e);
3194+
}
28833195
}

0 commit comments

Comments
 (0)