Skip to content

Commit 117025e

Browse files
committed
Merge remote-tracking branch 'origin/dev' into dev
2 parents 69e4bdb + 24beec4 commit 117025e

File tree

8 files changed

+496
-125
lines changed

8 files changed

+496
-125
lines changed

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ let img = ImageReader::open("./assets/nasa-4928x3279.png")
1212
let dimensions = img.dimensions();
1313
let transient = img.to_rgb8();
1414

15-
let start = Instant::now();
16-
1715
let src_size = ImageSize::new(dimensions.0 as usize, dimensions.1 as usize);
1816
let dst_size = ImageSize::new(dimensions.0 as usize / 4, dimensions.1 as usize / 4);
1917

app/src/main.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ use image::{
3535
ImageReader, Rgb, RgbImage,
3636
};
3737
use pic_scale_safe::{
38-
resize_fixed_point, resize_floating_point, resize_rgb16, resize_rgb8, resize_rgb_f32,
39-
resize_rgba16, resize_rgba8, ImageSize, ResamplingFunction,
38+
premultiply_rgba8, resize_fixed_point, resize_floating_point, resize_rgb16, resize_rgb8,
39+
resize_rgb_f32, resize_rgba16, resize_rgba8, unpremultiply_rgba8, ImageSize,
40+
ResamplingFunction,
4041
};
4142
use std::ops::{BitXor, Shr};
4243
use std::time::Instant;
@@ -47,26 +48,31 @@ fn main() {
4748
.decode()
4849
.unwrap();
4950
let dimensions = img.dimensions();
50-
let transient = img.to_rgb8();
51+
let transient = img.to_rgba8();
5152

5253
let mut working_store = transient.to_vec();
5354

5455
let start = Instant::now();
5556

5657
let src_size = ImageSize::new(dimensions.0 as usize, dimensions.1 as usize);
57-
let dst_size = ImageSize::new(
58-
(dimensions.0 as f32 + 1.) as usize,
59-
(dimensions.1 as f32 + 1.) as usize,
60-
);
58+
let dst_size = ImageSize::new(dimensions.0 as usize / 2, dimensions.1 as usize / 2);
6159

62-
let mut resized = resize_rgb8(
60+
let start_mul = Instant::now();
61+
62+
premultiply_rgba8(&mut working_store);
63+
64+
println!("Alpha mul time {:?}", start_mul.elapsed());
65+
66+
let mut resized = resize_rgba8(
6367
&working_store,
6468
src_size,
6569
dst_size,
6670
ResamplingFunction::Lanczos3,
6771
)
6872
.unwrap();
6973

74+
// unpremultiply_rgba8(&mut resized);
75+
7076
println!("Working time {:?}", start.elapsed());
7177

7278
// let rgba_image = DynamicImage::ImageRgb16(ImageBuffer::<Rgb<u16>, Vec<u16>>::from_vec(dimensions.0 * 4, dimensions.1 / 4, resized).unwrap());
@@ -82,7 +88,7 @@ fn main() {
8288
&resized,
8389
dst_size.width as u32,
8490
dst_size.height as u32,
85-
image::ColorType::Rgb8,
91+
image::ColorType::Rgba8,
8692
)
8793
.unwrap();
8894

src/alpha.rs

Lines changed: 83 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@
2727
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
*/
2929

30+
#[inline]
31+
fn div_by_255(v: u16) -> u8 {
32+
((((v + 0x80) >> 8) + v + 0x80) >> 8).min(255) as u8
33+
}
34+
3035
/// Associate alpha in place
3136
///
3237
/// Note, for scaling alpha must be *associated*
@@ -36,17 +41,14 @@
3641
/// * `in_place`: Slice to where premultiply
3742
///
3843
pub fn premultiply_rgba8(in_place: &mut [u8]) {
39-
for chunk in in_place.chunks_mut(4) {
44+
// Almost all loops are not auto-vectorised without doing anything dirty.
45+
// So everywhere is just added something beautiful.
46+
for chunk in in_place.chunks_exact_mut(4) {
4047
let a = chunk[3] as u16;
41-
let mut r = chunk[0] as u16;
42-
let mut g = chunk[1] as u16;
43-
let mut b = chunk[2] as u16;
44-
r = (r * a) / 255;
45-
g = (g * a) / 255;
46-
b = (b * a) / 255;
47-
chunk[0] = r as u8;
48-
chunk[1] = g as u8;
49-
chunk[2] = b as u8;
48+
chunk[0] = div_by_255(chunk[0] as u16 * a);
49+
chunk[1] = div_by_255(chunk[1] as u16 * a);
50+
chunk[2] = div_by_255(chunk[2] as u16 * a);
51+
chunk[3] = div_by_255(a * a);
5052
}
5153
}
5254

@@ -60,23 +62,17 @@ pub fn premultiply_rgba8(in_place: &mut [u8]) {
6062
///
6163
///
6264
pub fn unpremultiply_rgba8(in_place: &mut [u8]) {
63-
for chunk in in_place.chunks_mut(4) {
64-
let a = chunk[3] as u16;
65-
let mut r = chunk[0] as u16;
66-
let mut g = chunk[1] as u16;
67-
let mut b = chunk[2] as u16;
68-
if a == 0 {
69-
r = 0;
70-
g = 0;
71-
b = 0;
72-
} else {
73-
r = (r * 255) / a;
74-
g = (g * 255) / a;
75-
b = (b * 255) / a;
65+
// Almost all loops are not auto-vectorised without doing anything dirty.
66+
// So everywhere is just added something beautiful.
67+
for chunk in in_place.chunks_exact_mut(4) {
68+
let a = chunk[3];
69+
if a != 0 {
70+
let a_recip = 1. / a as f32;
71+
chunk[0] = ((chunk[0] as f32 * 255.) * a_recip) as u8;
72+
chunk[1] = ((chunk[1] as f32 * 255.) * a_recip) as u8;
73+
chunk[2] = ((chunk[2] as f32 * 255.) * a_recip) as u8;
74+
chunk[3] = ((a as f32 * 255.) * a_recip) as u8;
7675
}
77-
chunk[0] = r as u8;
78-
chunk[1] = g as u8;
79-
chunk[2] = b as u8;
8076
}
8177
}
8278

@@ -89,11 +85,12 @@ pub fn unpremultiply_rgba8(in_place: &mut [u8]) {
8985
/// * `in_place`: Slice to where premultiply
9086
///
9187
pub fn premultiply_la8(in_place: &mut [u8]) {
92-
for chunk in in_place.chunks_mut(2) {
88+
// Almost all loops are not auto-vectorised without doing anything dirty.
89+
// So everywhere is just added something beautiful.
90+
for chunk in in_place.chunks_exact_mut(2) {
9391
let a = chunk[1] as u16;
94-
let mut r = chunk[0] as u16;
95-
r = (r * a) / 255;
96-
chunk[0] = r as u8;
92+
chunk[0] = div_by_255(chunk[0] as u16 * a);
93+
chunk[1] = div_by_255(chunk[1] as u16 * a);
9794
}
9895
}
9996

@@ -107,15 +104,15 @@ pub fn premultiply_la8(in_place: &mut [u8]) {
107104
///
108105
///
109106
pub fn unpremultiply_la8(in_place: &mut [u8]) {
110-
for chunk in in_place.chunks_mut(2) {
111-
let a = chunk[1] as u16;
112-
let mut r = chunk[0] as u16;
113-
if a == 0 {
114-
r = 0;
115-
} else {
116-
r = (r * 255) / a;
107+
// Almost all loops are not auto-vectorised without doing anything dirty.
108+
// So everywhere is just added something beautiful.
109+
for chunk in in_place.chunks_exact_mut(2) {
110+
let a = chunk[1];
111+
if a != 0 {
112+
let a_recip = 1. / a as f32;
113+
chunk[0] = ((chunk[0] as f32 * 255.) * a_recip) as u8;
114+
chunk[1] = ((a as f32 * 255.) * a_recip) as u8;
117115
}
118-
chunk[0] = r as u8;
119116
}
120117
}
121118

@@ -129,19 +126,20 @@ pub fn unpremultiply_la8(in_place: &mut [u8]) {
129126
/// * `bit_depth`: Bit-depth of the image
130127
///
131128
pub fn premultiply_rgba16(in_place: &mut [u16], bit_depth: u32) {
129+
// Almost all loops are not auto-vectorised without doing anything dirty.
130+
// So everywhere is just added something beautiful.
132131
assert!(bit_depth > 0 && bit_depth <= 16);
133132
let max_colors = (1 << bit_depth) - 1;
134-
for chunk in in_place.chunks_mut(4) {
133+
let recip_max_colors = 1. / max_colors as f32;
134+
for chunk in in_place.chunks_exact_mut(4) {
135135
let a = chunk[3] as u32;
136-
let mut r = chunk[0] as u32;
137-
let mut g = chunk[1] as u32;
138-
let mut b = chunk[2] as u32;
139-
r = (r * a) / max_colors;
140-
g = (g * a) / max_colors;
141-
b = (b * a) / max_colors;
142-
chunk[0] = r as u16;
143-
chunk[1] = g as u16;
144-
chunk[2] = b as u16;
136+
chunk[0] = (((chunk[0] as u32 * a) as f32 * recip_max_colors) as u32).min(max_colors as u32)
137+
as u16;
138+
chunk[1] = (((chunk[1] as u32 * a) as f32 * recip_max_colors) as u32).min(max_colors as u32)
139+
as u16;
140+
chunk[2] = (((chunk[2] as u32 * a) as f32 * recip_max_colors) as u32).min(max_colors as u32)
141+
as u16;
142+
chunk[3] = (((a * a) as f32 * recip_max_colors) as u32).min(max_colors as u32) as u16;
145143
}
146144
}
147145

@@ -155,13 +153,16 @@ pub fn premultiply_rgba16(in_place: &mut [u16], bit_depth: u32) {
155153
/// * `bit_depth`: Bit-depth of the image
156154
///
157155
pub fn premultiply_la16(in_place: &mut [u16], bit_depth: u32) {
156+
// Almost all loops are not auto-vectorised without doing anything dirty.
157+
// So everywhere is just added something beautiful.
158158
assert!(bit_depth > 0 && bit_depth <= 16);
159159
let max_colors = (1 << bit_depth) - 1;
160-
for chunk in in_place.chunks_mut(2) {
160+
let recip_max_colors = 1. / max_colors as f32;
161+
for chunk in in_place.chunks_exact_mut(4) {
161162
let a = chunk[1] as u32;
162-
let mut r = chunk[0] as u32;
163-
r = (r * a) / max_colors;
164-
chunk[0] = r as u16;
163+
chunk[0] = (((chunk[0] as u32 * a) as f32 * recip_max_colors) as u32).min(max_colors as u32)
164+
as u16;
165+
chunk[1] = (((a * a) as f32 * recip_max_colors) as u32).min(max_colors as u32) as u16;
165166
}
166167
}
167168

@@ -176,17 +177,17 @@ pub fn premultiply_la16(in_place: &mut [u16], bit_depth: u32) {
176177
///
177178
///
178179
pub fn unpremultiply_la16(in_place: &mut [u16], bit_depth: u32) {
180+
// Almost all loops are not auto-vectorised without doing anything dirty.
181+
// So everywhere is just added something beautiful.
179182
assert!(bit_depth > 0 && bit_depth <= 16);
180183
let max_colors = (1 << bit_depth) - 1;
181-
for chunk in in_place.chunks_mut(2) {
184+
for chunk in in_place.chunks_exact_mut(2) {
182185
let a = chunk[1] as u32;
183-
let mut r = chunk[0] as u32;
184-
if a == 0 {
185-
r = 0;
186-
} else {
187-
r = (r * max_colors) / a;
186+
if a != 0 {
187+
let a_recip = 1. / a as f32;
188+
chunk[0] = ((chunk[0] as u32 * max_colors) as f32 * a_recip) as u16;
189+
chunk[1] = ((a * max_colors) as f32 * a_recip) as u16;
188190
}
189-
chunk[0] = r as u16;
190191
}
191192
}
192193

@@ -201,25 +202,19 @@ pub fn unpremultiply_la16(in_place: &mut [u16], bit_depth: u32) {
201202
///
202203
///
203204
pub fn unpremultiply_rgba16(in_place: &mut [u16], bit_depth: u32) {
205+
// Almost all loops are not auto-vectorised without doing anything dirty.
206+
// So everywhere is just added something beautiful.
204207
assert!(bit_depth > 0 && bit_depth <= 16);
205208
let max_colors = (1 << bit_depth) - 1;
206-
for chunk in in_place.chunks_mut(4) {
209+
for chunk in in_place.chunks_exact_mut(4) {
207210
let a = chunk[3] as u32;
208-
let mut r = chunk[0] as u32;
209-
let mut g = chunk[1] as u32;
210-
let mut b = chunk[2] as u32;
211-
if a == 0 {
212-
r = 0;
213-
g = 0;
214-
b = 0;
215-
} else {
216-
r = (r * max_colors) / a;
217-
g = (g * max_colors) / a;
218-
b = (b * max_colors) / a;
211+
if a != 0 {
212+
let a_recip = 1. / a as f32;
213+
chunk[0] = ((chunk[0] as u32 * max_colors) as f32 * a_recip) as u16;
214+
chunk[1] = ((chunk[1] as u32 * max_colors) as f32 * a_recip) as u16;
215+
chunk[2] = ((chunk[2] as u32 * max_colors) as f32 * a_recip) as u16;
216+
chunk[3] = ((a * max_colors) as f32 * a_recip) as u16;
219217
}
220-
chunk[0] = r as u16;
221-
chunk[1] = g as u16;
222-
chunk[2] = b as u16;
223218
}
224219
}
225220

@@ -232,17 +227,14 @@ pub fn unpremultiply_rgba16(in_place: &mut [u16], bit_depth: u32) {
232227
/// * `in_place`: Slice to where premultiply
233228
///
234229
pub fn premultiply_rgba_f32(in_place: &mut [f32]) {
235-
for chunk in in_place.chunks_mut(4) {
230+
// Almost all loops are not auto-vectorised without doing anything dirty.
231+
// So everywhere is just added something beautiful.
232+
for chunk in in_place.chunks_exact_mut(4) {
236233
let a = chunk[3];
237-
let mut r = chunk[0];
238-
let mut g = chunk[1];
239-
let mut b = chunk[2];
240-
r *= a;
241-
g *= a;
242-
b *= a;
243-
chunk[0] = r;
244-
chunk[1] = g;
245-
chunk[2] = b;
234+
chunk[0] *= a;
235+
chunk[1] *= a;
236+
chunk[2] *= a;
237+
chunk[3] = a;
246238
}
247239
}
248240

@@ -256,22 +248,14 @@ pub fn premultiply_rgba_f32(in_place: &mut [f32]) {
256248
///
257249
///
258250
pub fn unpremultiply_rgba_f32(in_place: &mut [f32]) {
259-
for chunk in in_place.chunks_mut(4) {
251+
for chunk in in_place.chunks_exact_mut(4) {
260252
let a = chunk[3];
261-
let mut r = chunk[0];
262-
let mut g = chunk[1];
263-
let mut b = chunk[2];
264-
if a == 0. {
265-
r = 0.;
266-
g = 0.;
267-
b = 0.;
268-
} else {
269-
r /= a;
270-
g /= a;
271-
b /= a;
253+
if a != 0. {
254+
let a_recip = 1. / a;
255+
chunk[0] *= a_recip;
256+
chunk[1] *= a_recip;
257+
chunk[2] *= a_recip;
258+
chunk[3] = a;
272259
}
273-
chunk[0] = r;
274-
chunk[1] = g;
275-
chunk[2] = b;
276260
}
277261
}

0 commit comments

Comments
 (0)