Skip to content

Commit d4b9b7e

Browse files
committed
derivative: refactor derivatives, allows f32, Vec2, Vec3, Vec4 and Vec3A to be derived
disallow f64 as spec expects only 32bit floats
1 parent 82071cd commit d4b9b7e

File tree

2 files changed

+150
-85
lines changed

2 files changed

+150
-85
lines changed
Lines changed: 135 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,102 +1,152 @@
1-
use crate::float::Float;
1+
use crate::sealed::Sealed;
2+
use glam::{Vec2, Vec3, Vec3A, Vec4};
3+
4+
macro_rules! cap_deriv_control {
5+
() => {
6+
unsafe {
7+
core::arch::asm!("OpCapability DerivativeControl");
8+
}
9+
};
10+
}
211

3-
#[cfg(target_arch = "spirv")]
412
macro_rules! deriv_fn {
5-
($p:ident, $inst:ident) => {
13+
($inst:ident, $param:expr) => {
614
unsafe {
7-
let mut o = Default::default();
8-
::core::arch::asm!(
9-
"%input = OpLoad _ {0}",
10-
concat!("%result = ", stringify!($inst), " _ %input"),
11-
"OpStore {1} %result",
12-
in(reg) &$p,
13-
in(reg) &mut o,
15+
let mut result = Default::default();
16+
core::arch::asm!(
17+
"%input = OpLoad typeof*{1} {1}",
18+
concat!("%result = ", stringify!($inst), " typeof*{1} %input"),
19+
"OpStore {0} %result",
20+
in(reg) &mut result,
21+
in(reg) &$param,
1422
);
15-
o
23+
result
1624
}
1725
};
1826
}
1927

20-
/// Returns the partial derivative of `component` with respect to the window's X
21-
/// coordinate. Returns the same result as either [`ddx_fine`] or
22-
/// [`ddx_coarse`], selection of which one is dependent on external factors.
23-
#[crate::macros::vectorized]
24-
#[crate::macros::gpu_only]
25-
pub fn ddx<F: Float>(component: F) -> F {
26-
deriv_fn!(component, OpDPdx)
27-
}
28+
/// Types that can be derived by partial derivatives
29+
pub unsafe trait Derivative: Sealed + Default {
30+
/// Result is the partial derivative of `Self` with respect to the window x coordinate. Uses local differencing
31+
/// based on the value of `Self`. Same result as either [`ddx_fine`] or [`ddx_coarse`] on `Self`. Selection of which
32+
/// one is based on external factors.
33+
///
34+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
35+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
36+
///
37+
/// This instruction is only valid in the Fragment Execution Model.
38+
#[crate::macros::gpu_only]
39+
fn ddx(self) -> Self {
40+
deriv_fn!(OpDPdx, self)
41+
}
2842

29-
/// Returns the partial derivative of `component` with respect to the window's X
30-
/// coordinate. Uses local differencing based on the value of `component` for
31-
/// the current fragment and its immediate neighbor(s).
32-
#[crate::macros::vectorized]
33-
#[crate::macros::gpu_only]
34-
pub fn ddx_fine<F: Float>(component: F) -> F {
35-
deriv_fn!(component, OpDPdxFine)
36-
}
43+
/// Result is the partial derivative of `Self` with respect to the window x coordinate. Uses local differencing
44+
/// based on the value of `Self` for the current fragment and its immediate neighbor(s).
45+
///
46+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
47+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
48+
///
49+
/// This instruction is only valid in the Fragment Execution Model.
50+
#[crate::macros::gpu_only]
51+
fn ddx_fine(self) -> Self {
52+
cap_deriv_control!();
53+
deriv_fn!(OpDPdxFine, self)
54+
}
3755

38-
/// Returns the partial derivative of `component` with respect to the window's X
39-
/// coordinate. Uses local differencing based on the value of `component` for
40-
/// the current fragment’s neighbors, and possibly, but not necessarily,
41-
/// includes the value of `component` for the current fragment. That is, over a
42-
/// given area, the implementation can compute X derivatives in fewer unique
43-
/// locations than would be allowed by [`ddx_fine`].
44-
#[crate::macros::vectorized]
45-
#[crate::macros::gpu_only]
46-
pub fn ddx_coarse<F: Float>(component: F) -> F {
47-
deriv_fn!(component, OpDPdxCoarse)
48-
}
56+
/// Result is the partial derivative of `Self` with respect to the window x coordinate. Uses local differencing
57+
/// based on the value of `Self` for the current fragment’s neighbors, and possibly, but not necessarily, includes
58+
/// the value of `Self` for the current fragment. That is, over a given area, the implementation can compute x
59+
/// derivatives in fewer unique locations than would be allowed for [`ddx_fine`].
60+
///
61+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
62+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
63+
///
64+
/// This instruction is only valid in the Fragment Execution Model.
65+
#[crate::macros::gpu_only]
66+
fn ddx_coarse(self) -> Self {
67+
cap_deriv_control!();
68+
deriv_fn!(OpDPdxCoarse, self)
69+
}
4970

50-
/// Returns the partial derivative of `component` with respect to the window's Y
51-
/// coordinate. Returns the same result as either [`ddy_fine`] or
52-
/// [`ddy_coarse`], selection of which one is dependent on external factors.
53-
#[crate::macros::vectorized]
54-
#[crate::macros::gpu_only]
55-
pub fn ddy<F: Float>(component: F) -> F {
56-
deriv_fn!(component, OpDPdy)
57-
}
71+
/// Result is the partial derivative of `Self` with respect to the window y coordinate. Uses local differencing
72+
/// based on the value of `Self`. Same result as either [`ddy_fine`] or [`ddy_coarse`] on `Self`. Selection of which
73+
/// one is based on external factors.
74+
///
75+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
76+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
77+
///
78+
/// This instruction is only valid in the Fragment Execution Model.
79+
#[crate::macros::gpu_only]
80+
fn ddy(self) -> Self {
81+
deriv_fn!(OpDPdy, self)
82+
}
5883

59-
/// Returns the partial derivative of `component` with respect to the window's Y
60-
/// coordinate. Uses local differencing based on the value of `component` for
61-
/// the current fragment and its immediate neighbor(s).
62-
#[crate::macros::vectorized]
63-
#[crate::macros::gpu_only]
64-
pub fn ddy_fine<F: Float>(component: F) -> F {
65-
deriv_fn!(component, OpDPdyFine)
66-
}
84+
/// Result is the partial derivative of `Self` with respect to the window y coordinate. Uses local differencing
85+
/// based on the value of `Self` for the current fragment and its immediate neighbor(s).
86+
///
87+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
88+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
89+
///
90+
/// This instruction is only valid in the Fragment Execution Model.
91+
#[crate::macros::gpu_only]
92+
fn ddy_fine(self) -> Self {
93+
cap_deriv_control!();
94+
deriv_fn!(OpDPdyFine, self)
95+
}
6796

68-
/// Returns the partial derivative of `component` with respect to the window's Y
69-
/// coordinate. Uses local differencing based on the value of `component` for
70-
/// the current fragment’s neighbors, and possibly, but not necessarily,
71-
/// includes the value of `component` for the current fragment. That is, over a
72-
/// given area, the implementation can compute Y derivatives in fewer unique
73-
/// locations than would be allowed by [`ddy_fine`].
74-
#[crate::macros::vectorized]
75-
#[crate::macros::gpu_only]
76-
pub fn ddy_coarse<F: Float>(component: F) -> F {
77-
deriv_fn!(component, OpDPdyCoarse)
78-
}
97+
/// Result is the partial derivative of `Self` with respect to the window y coordinate. Uses local differencing
98+
/// based on the value of `Self` for the current fragment’s neighbors, and possibly, but not necessarily, includes
99+
/// the value of `Self` for the current fragment. That is, over a given area, the implementation can compute y
100+
/// derivatives in fewer unique locations than would be allowed for [`ddy_fine`].
101+
///
102+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
103+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
104+
///
105+
/// This instruction is only valid in the Fragment Execution Model.
106+
#[crate::macros::gpu_only]
107+
fn ddy_coarse(self) -> Self {
108+
cap_deriv_control!();
109+
deriv_fn!(OpDPdyCoarse, self)
110+
}
79111

80-
/// Returns the sum of the absolute values of [`ddx`] and [`ddy`] as a single
81-
/// operation.
82-
#[crate::macros::vectorized]
83-
#[crate::macros::gpu_only]
84-
pub fn fwidth<F: Float>(component: F) -> F {
85-
deriv_fn!(component, OpFwidth)
86-
}
112+
/// Result is the same as computing the sum of the absolute values of [`ddx`] and [`ddy`] on P.
113+
///
114+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
115+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
116+
///
117+
/// This instruction is only valid in the Fragment Execution Model.
118+
#[crate::macros::gpu_only]
119+
fn fwidth(self) -> Self {
120+
deriv_fn!(OpFwidth, self)
121+
}
87122

88-
/// Returns the sum of the absolute values of [`ddx_fine`] and [`ddy_fine`] as a
89-
/// single operation.
90-
#[crate::macros::vectorized]
91-
#[crate::macros::gpu_only]
92-
pub fn fwidth_fine<F: Float>(component: F) -> F {
93-
deriv_fn!(component, OpFwidthFine)
94-
}
123+
/// Result is the same as computing the sum of the absolute values of [`ddx_fine`] and [`ddy_fine`] on P.
124+
///
125+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
126+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
127+
///
128+
/// This instruction is only valid in the Fragment Execution Model.
129+
#[crate::macros::gpu_only]
130+
fn fwidth_fine(self) -> Self {
131+
cap_deriv_control!();
132+
deriv_fn!(OpFwidthFine, self)
133+
}
95134

96-
/// Returns the sum of the absolute values of [`ddx_coarse`] and [`ddy_coarse`]
97-
/// as a single operation.
98-
#[crate::macros::vectorized]
99-
#[crate::macros::gpu_only]
100-
pub fn fwidth_coarse<F: Float>(component: F) -> F {
101-
deriv_fn!(component, OpFwidthCoarse)
135+
/// Result is the same as computing the sum of the absolute values of [`ddx_coarse`] and [`ddy_coarse`] on P.
136+
///
137+
/// An invocation will not execute a dynamic instance of this instruction (X') until all invocations in its
138+
/// derivative group have executed all dynamic instances that are program-ordered before X'.
139+
///
140+
/// This instruction is only valid in the Fragment Execution Model.
141+
#[crate::macros::gpu_only]
142+
fn fwidth_coarse(self) -> Self {
143+
cap_deriv_control!();
144+
deriv_fn!(OpFwidthCoarse, self)
145+
}
102146
}
147+
148+
unsafe impl Derivative for f32 {}
149+
unsafe impl Derivative for Vec2 {}
150+
unsafe impl Derivative for Vec3 {}
151+
unsafe impl Derivative for Vec4 {}
152+
unsafe impl Derivative for Vec3A {}

crates/spirv-std/src/sealed.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,18 @@ impl Sealed for i8 {}
1313
impl Sealed for i16 {}
1414
impl Sealed for i32 {}
1515
impl Sealed for i64 {}
16+
17+
impl Sealed for glam::Vec2 {}
18+
impl Sealed for glam::Vec3 {}
19+
impl Sealed for glam::Vec4 {}
20+
impl Sealed for glam::DVec2 {}
21+
impl Sealed for glam::DVec3 {}
22+
impl Sealed for glam::DVec4 {}
23+
impl Sealed for glam::UVec2 {}
24+
impl Sealed for glam::UVec3 {}
25+
impl Sealed for glam::UVec4 {}
26+
impl Sealed for glam::IVec2 {}
27+
impl Sealed for glam::IVec3 {}
28+
impl Sealed for glam::IVec4 {}
29+
30+
impl Sealed for glam::Vec3A {}

0 commit comments

Comments
 (0)