Skip to content

Commit 89c312d

Browse files
committed
Speed up rendering
* Move vertex normalization to render thread * Convert bg affine transformations on gpu
1 parent 40075f6 commit 89c312d

File tree

5 files changed

+54
-54
lines changed

5 files changed

+54
-54
lines changed

src/core/graphics/gpu_2d/renderer_2d.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::utils;
1414
use crate::utils::rgb5_to_float8;
1515
use gl::types::{GLint, GLuint};
1616
use static_assertions::const_assert;
17-
use std::hint::unreachable_unchecked;
17+
use std::hint::{assert_unchecked, unreachable_unchecked};
1818
use std::intrinsics::unlikely;
1919
use std::{mem, ptr, slice};
2020

@@ -73,12 +73,12 @@ const_assert!(size_of::<WinBgUbo>() <= 16 * 1024);
7373
#[repr(C)]
7474
struct BgUbo {
7575
ofs: [u32; DISPLAY_HEIGHT * 4],
76-
x: [f32; DISPLAY_HEIGHT * 2],
77-
y: [f32; DISPLAY_HEIGHT * 2],
78-
pa: [f32; DISPLAY_HEIGHT * 2],
79-
pb: [f32; DISPLAY_HEIGHT * 2],
80-
pc: [f32; DISPLAY_HEIGHT * 2],
81-
pd: [f32; DISPLAY_HEIGHT * 2],
76+
x: [i32; DISPLAY_HEIGHT * 2],
77+
y: [i32; DISPLAY_HEIGHT * 2],
78+
pa: [i32; DISPLAY_HEIGHT * 2],
79+
pb: [i32; DISPLAY_HEIGHT * 2],
80+
pc: [i32; DISPLAY_HEIGHT * 2],
81+
pd: [i32; DISPLAY_HEIGHT * 2],
8282
}
8383

8484
const_assert!(size_of::<BgUbo>() <= 16 * 1024);
@@ -102,6 +102,7 @@ impl Default for Gpu2DRenderRegs {
102102

103103
impl Gpu2DRenderRegs {
104104
fn on_scanline<const ENGINE: Gpu2DEngine>(&mut self, inner: &mut Gpu2DRegisters<ENGINE>, line: u8) {
105+
unsafe { assert_unchecked(self.current_batch_count_index < DISPLAY_HEIGHT) };
105106
let updated = self.disp_cnts[self.current_batch_count_index] != u32::from(inner.disp_cnt);
106107
let updated = updated || {
107108
let mut updated = false;
@@ -135,21 +136,21 @@ impl Gpu2DRenderRegs {
135136
self.bg_ubo.ofs[i * DISPLAY_HEIGHT + line as usize] = (inner.bg_h_ofs[i] as u32) | ((inner.bg_v_ofs[i] as u32) << 16);
136137
}
137138
for i in 0..2 {
138-
self.bg_ubo.x[i * DISPLAY_HEIGHT + line as usize] = inner.bg_x[i] as f32 / 256.0;
139-
self.bg_ubo.y[i * DISPLAY_HEIGHT + line as usize] = inner.bg_y[i] as f32 / 256.0;
140-
self.bg_ubo.pa[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pa[i] as f32 / 256.0;
141-
self.bg_ubo.pc[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pc[i] as f32 / 256.0;
139+
self.bg_ubo.x[i * DISPLAY_HEIGHT + line as usize] = inner.bg_x[i];
140+
self.bg_ubo.y[i * DISPLAY_HEIGHT + line as usize] = inner.bg_y[i];
141+
self.bg_ubo.pa[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pa[i] as i32;
142+
self.bg_ubo.pc[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pc[i] as i32;
142143

143144
if unlikely(inner.bg_x_dirty || line == 0) {
144-
self.bg_ubo.pb[i * DISPLAY_HEIGHT + line as usize] = 0f32;
145+
self.bg_ubo.pb[i * DISPLAY_HEIGHT + line as usize] = 0;
145146
} else {
146-
self.bg_ubo.pb[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pb[i] as f32 / 256.0 + self.bg_ubo.pb[i * DISPLAY_HEIGHT + line as usize - 1];
147+
self.bg_ubo.pb[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pb[i] as i32 + self.bg_ubo.pb[i * DISPLAY_HEIGHT + line as usize - 1];
147148
}
148149

149150
if unlikely(inner.bg_y_dirty || line == 0) {
150-
self.bg_ubo.pd[i * DISPLAY_HEIGHT + line as usize] = 0f32;
151+
self.bg_ubo.pd[i * DISPLAY_HEIGHT + line as usize] = 0;
151152
} else {
152-
self.bg_ubo.pd[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pd[i] as f32 / 256.0 + self.bg_ubo.pd[i * DISPLAY_HEIGHT + line as usize - 1];
153+
self.bg_ubo.pd[i * DISPLAY_HEIGHT + line as usize] = inner.bg_pd[i] as i32 + self.bg_ubo.pd[i * DISPLAY_HEIGHT + line as usize - 1];
153154
}
154155
}
155156
inner.bg_x_dirty = false;

src/core/graphics/gpu_2d/shaders/cg/bg_frag_common.cg

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ uniform sampler2D display3dTex : TEXUNIT4;
1313

1414
uniform BgUbo {
1515
int bgOfs[192 * 4];
16-
float bgX[192 * 2];
17-
float bgY[192 * 2];
18-
float bgPas[192 * 2];
19-
float bgPbs[192 * 2];
20-
float bgPcs[192 * 2];
21-
float bgPds[192 * 2];
16+
int bgX[192 * 2];
17+
int bgY[192 * 2];
18+
int bgPas[192 * 2];
19+
int bgPbs[192 * 2];
20+
int bgPcs[192 * 2];
21+
int bgPds[192 * 2];
2222
} BgUbo : BUFFER[0];
2323

2424
short readBg8(int addr) {
@@ -57,11 +57,11 @@ float3 normRgb5(short color) {
5757

5858
short2 calculateAffineCoords(short x, short y, short bgNum) {
5959
short index = (bgNum - 2) * 192 + y;
60-
float bgX = BgUbo.bgX[index];
61-
float bgY = BgUbo.bgY[index];
62-
float bgPa = BgUbo.bgPas[index];
63-
float bgPb = BgUbo.bgPbs[index];
64-
float bgPc = BgUbo.bgPcs[index];
65-
float bgPd = BgUbo.bgPds[index];
60+
float bgX = float(BgUbo.bgX[index]) / 256.0;
61+
float bgY = float(BgUbo.bgY[index]) / 256.0;
62+
float bgPa = float(BgUbo.bgPas[index]) / 256.0;
63+
float bgPb = float(BgUbo.bgPbs[index]) / 256.0;
64+
float bgPc = float(BgUbo.bgPcs[index]) / 256.0;
65+
float bgPd = float(BgUbo.bgPds[index]) / 256.0;
6666
return short2(short(bgX + bgPb + float(x) * bgPa), short(bgY + bgPd + float(x) * bgPc));
6767
}

src/core/graphics/gpu_2d/shaders/glsl/bg_frag_common.glsl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ uniform int bgCnt;
1313

1414
uniform BgUbo {
1515
int bgOfs[192 * 4];
16-
float bgX[192 * 2];
17-
float bgY[192 * 2];
18-
float bgPas[192 * 2];
19-
float bgPbs[192 * 2];
20-
float bgPcs[192 * 2];
21-
float bgPds[192 * 2];
16+
int bgX[192 * 2];
17+
int bgY[192 * 2];
18+
int bgPas[192 * 2];
19+
int bgPbs[192 * 2];
20+
int bgPcs[192 * 2];
21+
int bgPds[192 * 2];
2222
};
2323

2424
uniform sampler2D bgTex;
@@ -66,11 +66,11 @@ vec3 normRgb5(int color) {
6666

6767
ivec2 calculateAffineCoords(int x, int y, int bgNum) {
6868
int index = (bgNum - 2) * 192 + y;
69-
float bgX = bgX[index];
70-
float bgY = bgY[index];
71-
float bgPa = bgPas[index];
72-
float bgPb = bgPbs[index];
73-
float bgPc = bgPcs[index];
74-
float bgPd = bgPds[index];
69+
float bgX = float(bgX[index]) / 256.0;
70+
float bgY = float(bgY[index]) / 256.0;
71+
float bgPa = float(bgPas[index]) / 256.0;
72+
float bgPb = float(bgPbs[index]) / 256.0;
73+
float bgPc = float(bgPcs[index]) / 256.0;
74+
float bgPd = float(bgPds[index]) / 256.0;
7575
return ivec2(int(bgX + bgPb + float(x) * bgPa), int(bgY + bgPd + float(x) * bgPc));
7676
}

src/core/graphics/gpu_3d/registers_3d.rs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ pub struct Vertex {
231231
pub coords: Vectori32<4>,
232232
pub tex_coords: Vectori16<2>,
233233
pub color: u32,
234+
pub viewport: Vectoru16<4>,
234235
}
235236

236237
fn intersect(v1: &Vectorf32<4>, v2: &Vectorf32<4>, val1: f32, val2: f32) -> Vectorf32<4> {
@@ -1142,19 +1143,11 @@ impl Gpu3DRegisters {
11421143
}
11431144

11441145
fn process_vertices(&mut self) {
1145-
let [x, y, w, h] = *self.viewport.as_ref();
1146-
let Self { vertices, .. } = self;
1147-
1148-
for i in vertices.process_count..vertices.count_in {
1149-
let coords = &mut vertices.ins[i].coords;
1150-
if coords[3] != 0 {
1151-
coords[0] = ((coords[0] as i64 + coords[3] as i64) * w as i64 / (coords[3] as i64 * 2) + x as i64) as i32;
1152-
coords[1] = ((-coords[1] as i64 + coords[3] as i64) * h as i64 / (coords[3] as i64 * 2) + y as i64) as i32;
1153-
coords[2] = (((coords[2] as i64) << 12) / coords[3] as i64) as i32;
1154-
}
1146+
for i in self.vertices.process_count..self.vertices.count_in {
1147+
unsafe { self.vertices.ins.get_unchecked_mut(i).viewport = self.viewport };
11551148
}
11561149

1157-
vertices.process_count = vertices.count_in;
1150+
self.vertices.process_count = self.vertices.count_in;
11581151
self.viewport = self.viewport_next;
11591152
}
11601153

@@ -1264,7 +1257,7 @@ impl Gpu3DRegisters {
12641257
let mut clipped = [Vectorf32::<4>::default(); 10];
12651258
let cull = (!self.render_front && dot > 0) || (!self.render_back && dot < 0);
12661259
let mut clipped_size = self.saved_polygon.size;
1267-
let clip = if cull { false } else { clip_polygon(&unclipped, &mut clipped, &mut clipped_size) };
1260+
let clip = !cull && clip_polygon(&unclipped, &mut clipped, &mut clipped_size);
12681261

12691262
if cull || clipped_size == 0 {
12701263
match self.polygon_type {

src/core/graphics/gpu_3d/renderer_3d.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,17 @@ impl From<(&Vertex, u16)> for Gpu3DVertex {
111111
fn from(value: (&Vertex, u16)) -> Self {
112112
let (vertex, polygon_index) = value;
113113
let c = rgb6_to_float8(vertex.color);
114+
115+
let [x, y, w, h] = *vertex.viewport.as_ref();
116+
let vertex_x = ((vertex.coords[0] as i64 + vertex.coords[3] as i64) * w as i64 / (vertex.coords[3] as i64 * 2) + x as i64) as i32;
117+
let vertex_y = ((-vertex.coords[1] as i64 + vertex.coords[3] as i64) * h as i64 / (vertex.coords[3] as i64 * 2) + y as i64) as i32;
118+
let vertex_z = (((vertex.coords[2] as i64) << 12) / vertex.coords[3] as i64) as i32;
119+
114120
Gpu3DVertex {
115121
coords: [
116-
vertex.coords[0] as f32 / 255f32 * 2f32 - 1f32,
117-
1f32 - vertex.coords[1] as f32 / 191f32 * 2f32,
118-
(vertex.coords[2] as f32 / 4096f32) * 0.5 - 0.5,
122+
vertex_x as f32 / 255f32 * 2f32 - 1f32,
123+
1f32 - vertex_y as f32 / 191f32 * 2f32,
124+
(vertex_z as f32 / 4096f32) * 0.5 - 0.5,
119125
polygon_index as f32,
120126
],
121127
color: [c.0, c.1, c.2],

0 commit comments

Comments
 (0)