Major speedups with SIMD.

This commit is contained in:
Joshua Yanovski 2020-08-07 23:50:27 +02:00
parent fba64a7d93
commit 3dc973e0be
11 changed files with 67 additions and 62 deletions

29
Cargo.lock generated
View File

@ -1223,7 +1223,7 @@ name = "euc"
version = "0.5.1" version = "0.5.1"
source = "git+https://github.com/zesterer/euc.git#c9a7c17a03d45fce00caeeca09afa1e1558cd183" source = "git+https://github.com/zesterer/euc.git#c9a7c17a03d45fce00caeeca09afa1e1558cd183"
dependencies = [ dependencies = [
"vek", "vek 0.11.2",
] ]
[[package]] [[package]]
@ -4468,7 +4468,7 @@ version = "0.1.0"
source = "git+https://gitlab.com/yusdacra/treeculler.git#efcf5283cf386117a7e654abdaa45ef664a08e42" source = "git+https://gitlab.com/yusdacra/treeculler.git#efcf5283cf386117a7e654abdaa45ef664a08e42"
dependencies = [ dependencies = [
"num-traits", "num-traits",
"vek", "vek 0.11.2",
] ]
[[package]] [[package]]
@ -4636,6 +4636,19 @@ dependencies = [
"static_assertions", "static_assertions",
] ]
[[package]]
name = "vek"
version = "0.12.0"
source = "git+https://gitlab.com/veloren/vek.git?branch=fix_intrinsics#237a78528b505f34f6dde5dc77db3b642388fe4a"
dependencies = [
"approx",
"num-integer",
"num-traits",
"rustc_version",
"serde",
"static_assertions",
]
[[package]] [[package]]
name = "veloren-chat-cli" name = "veloren-chat-cli"
version = "0.6.0" version = "0.6.0"
@ -4663,7 +4676,7 @@ dependencies = [
"specs", "specs",
"tracing", "tracing",
"uvth 3.1.1", "uvth 3.1.1",
"vek", "vek 0.12.0",
"veloren-common", "veloren-common",
"veloren_network", "veloren_network",
] ]
@ -4694,7 +4707,7 @@ dependencies = [
"specs-idvs", "specs-idvs",
"sum_type", "sum_type",
"tracing", "tracing",
"vek", "vek 0.12.0",
] ]
[[package]] [[package]]
@ -4725,7 +4738,7 @@ dependencies = [
"tiny_http", "tiny_http",
"tracing", "tracing",
"uvth 3.1.1", "uvth 3.1.1",
"vek", "vek 0.12.0",
"veloren-common", "veloren-common",
"veloren-world", "veloren-world",
"veloren_network", "veloren_network",
@ -4786,7 +4799,7 @@ dependencies = [
"tracing-subscriber", "tracing-subscriber",
"treeculler", "treeculler",
"uvth 3.1.1", "uvth 3.1.1",
"vek", "vek 0.12.0",
"veloren-client", "veloren-client",
"veloren-common", "veloren-common",
"veloren-server", "veloren-server",
@ -4805,7 +4818,7 @@ dependencies = [
"libloading 0.6.2", "libloading 0.6.2",
"notify", "notify",
"tracing", "tracing",
"vek", "vek 0.12.0",
"veloren-common", "veloren-common",
] ]
@ -4835,7 +4848,7 @@ dependencies = [
"serde", "serde",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"vek", "vek 0.12.0",
"veloren-common", "veloren-common",
] ]

View File

@ -80,3 +80,4 @@ debug = 1
[patch.crates-io] [patch.crates-io]
# cpal conflict fix isn't released yet # cpal conflict fix isn't released yet
winit = { git = "https://github.com/Imberflur/winit.git", branch = "macos-test" } winit = { git = "https://github.com/Imberflur/winit.git", branch = "macos-test" }
vek = { git = "https://gitlab.com/veloren/vek.git", branch = "fix_intrinsics" }

View File

@ -19,6 +19,6 @@ num_cpus = "1.10.1"
tracing = { version = "0.1", default-features = false } tracing = { version = "0.1", default-features = false }
rayon = "^1.3.0" rayon = "^1.3.0"
specs = { git = "https://github.com/amethyst/specs.git", rev = "7a2e348ab2223818bad487695c66c43db88050a5" } specs = { git = "https://github.com/amethyst/specs.git", rev = "7a2e348ab2223818bad487695c66c43db88050a5" }
vek = { version = "0.11.2", features = ["repr_simd", "serde"] } vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
hashbrown = { version = "0.7.2", features = ["rayon", "serde", "nightly"] } hashbrown = { version = "0.7.2", features = ["rayon", "serde", "nightly"] }
authc = { git = "https://gitlab.com/veloren/auth.git", rev = "b943c85e4a38f5ec60cd18c34c73097640162bfe" } authc = { git = "https://gitlab.com/veloren/auth.git", rev = "b943c85e4a38f5ec60cd18c34c73097640162bfe" }

View File

@ -13,7 +13,7 @@ specs-idvs = { git = "https://gitlab.com/veloren/specs-idvs.git", branch = "spec
roots = "0.0.5" roots = "0.0.5"
specs = { git = "https://github.com/amethyst/specs.git", features = ["serde", "storage-event-control"], rev = "7a2e348ab2223818bad487695c66c43db88050a5" } specs = { git = "https://github.com/amethyst/specs.git", features = ["serde", "storage-event-control"], rev = "7a2e348ab2223818bad487695c66c43db88050a5" }
vek = { version = "0.11.2", features = ["repr_simd", "serde"] } vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
dot_vox = "4.0" dot_vox = "4.0"
image = { version = "0.23.8", default-features = false, features = ["png"] } image = { version = "0.23.8", default-features = false, features = ["png"] }
serde = { version = "1.0.110", features = ["derive"] } serde = { version = "1.0.110", features = ["derive"] }

View File

@ -17,7 +17,7 @@ specs-idvs = { git = "https://gitlab.com/veloren/specs-idvs.git", branch = "spec
tracing = "0.1" tracing = "0.1"
specs = { git = "https://github.com/amethyst/specs.git", features = ["shred-derive"], rev = "7a2e348ab2223818bad487695c66c43db88050a5" } specs = { git = "https://github.com/amethyst/specs.git", features = ["shred-derive"], rev = "7a2e348ab2223818bad487695c66c43db88050a5" }
vek = { version = "0.11.2", features = ["repr_simd", "serde"] } vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
uvth = "3.1.1" uvth = "3.1.1"
futures-util = "0.3" futures-util = "0.3"
futures-executor = "0.3" futures-executor = "0.3"

View File

@ -38,7 +38,7 @@ specs = { git = "https://github.com/amethyst/specs.git", rev = "7a2e348ab2223818
specs-idvs = { git = "https://gitlab.com/veloren/specs-idvs.git", branch = "specs-git" } specs-idvs = { git = "https://gitlab.com/veloren/specs-idvs.git", branch = "specs-git" }
# Mathematics # Mathematics
vek = { version = "0.11.2", features = ["repr_simd", "serde"] } vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
# Controller # Controller
gilrs = { version = "0.7", features = ["serde"] } gilrs = { version = "0.7", features = ["serde"] }

View File

@ -17,7 +17,7 @@ be-dyn-lib = []
default = ["be-dyn-lib"] default = ["be-dyn-lib"]
[dependencies] [dependencies]
vek = { version = "0.11.2", features = ["repr_simd", "serde"] } vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
common = { package = "veloren-common", path = "../../../common" } common = { package = "veloren-common", path = "../../../common" }
libloading = { version = "0.6.2", optional = true } libloading = { version = "0.6.2", optional = true }
notify = { version = "5.0.0-pre.2", optional = true } notify = { version = "5.0.0-pre.2", optional = true }

View File

@ -331,9 +331,6 @@ impl FigureMgr {
#[allow(clippy::redundant_pattern_matching)] #[allow(clippy::redundant_pattern_matching)]
// TODO: Pending review in #587 // TODO: Pending review in #587
// NOTE: All of the "useless" conversion reported here allow us to abstract over repr_c vs.
// simd vectors, so fixing this warning would make the code worse in this case.
#[allow(clippy::useless_conversion)]
pub fn update_lighting(&mut self, scene_data: &SceneData) { pub fn update_lighting(&mut self, scene_data: &SceneData) {
let ecs = scene_data.state.ecs(); let ecs = scene_data.state.ecs();
for (entity, light_emitter) in (&ecs.entities(), &ecs.read_storage::<LightEmitter>()).join() for (entity, light_emitter) in (&ecs.entities(), &ecs.read_storage::<LightEmitter>()).join()
@ -411,9 +408,6 @@ impl FigureMgr {
#[allow(clippy::or_fun_call)] #[allow(clippy::or_fun_call)]
// TODO: Pending review in #587 // TODO: Pending review in #587
// NOTE: All of the "useless" conversion reported here allow us to abstract over repr_c vs.
// simd vectors, so fixing this warning would make the code worse in this case.
#[allow(clippy::useless_conversion)]
pub fn maintain( pub fn maintain(
&mut self, &mut self,
renderer: &mut Renderer, renderer: &mut Renderer,
@ -422,6 +416,7 @@ impl FigureMgr {
visible_psr_bounds: math::Aabr<f32>, visible_psr_bounds: math::Aabr<f32>,
camera: &Camera, camera: &Camera,
) -> anim::vek::Aabb<f32> { ) -> anim::vek::Aabb<f32> {
let visible_psr_bounds = math::Aabr::from(visible_psr_bounds);
let state = scene_data.state; let state = scene_data.state;
let time = state.get_time(); let time = state.get_time();
let tick = scene_data.tick; let tick = scene_data.tick;
@ -455,9 +450,10 @@ impl FigureMgr {
let ray_mat = ray_mat * math::Mat4::translation_3d(-focus_off); let ray_mat = ray_mat * math::Mat4::translation_3d(-focus_off);
let collides_with_aabr = |a: math::Aabr<f32>, b: math::Aabr<f32>| { let collides_with_aabr = |a: math::Aabr<f32>, b: math::Aabr<f32>| {
a.min.partial_cmple(&b.max).reduce_and() && a.max.partial_cmpge(&b.min).reduce_and() let min = math::Vec4::new(a.min.x, a.min.y, b.min.x, b.min.y);
let max = math::Vec4::new(b.max.x, b.max.y, a.max.x, a.max.y);
min.partial_cmple_simd(max).reduce_and()
}; };
// println!("Aabr: {:?}", visible_bounds);
move |pos: (anim::vek::Vec3<f32>,), radius: f32| { move |pos: (anim::vek::Vec3<f32>,), radius: f32| {
// Short circuit when there are no shadows to cast. // Short circuit when there are no shadows to cast.
if !can_shadow_sun { if !can_shadow_sun {

View File

@ -1,9 +1,9 @@
use core::{iter, mem}; use core::{iter, mem};
use hashbrown::HashMap; use hashbrown::HashMap;
use num::traits::Float; use num::traits::Float;
// pub use vek::{geom::repr_simd::*, mat::repr_simd::column_major::Mat4, ops::*, pub use vek::{geom::repr_simd::*, mat::repr_simd::column_major::Mat4, ops::*, vec::repr_simd::*};
// vec::repr_simd::*}; // pub use vek::{geom::repr_c::*, mat::repr_c::column_major::Mat4, ops::*,
pub use vek::{geom::repr_c::*, mat::repr_c::column_major::Mat4, ops::*, vec::repr_c::*}; // vec::repr_c::*};
pub fn aabb_to_points<T: Float>(bounds: Aabb<T>) -> [Vec3<T>; 8] { pub fn aabb_to_points<T: Float>(bounds: Aabb<T>) -> [Vec3<T>; 8] {
[ [
@ -18,7 +18,7 @@ pub fn aabb_to_points<T: Float>(bounds: Aabb<T>) -> [Vec3<T>; 8] {
] ]
} }
/// Each Vec4 <a, b, c, d> should be interpreted as reprenting plane /// Each Vec4 <a, b, c, -d> should be interpreted as reprenting plane
/// equation /// equation
/// ///
/// a(x - x0) + b(y - y0) + c(z - z0) = 0, i.e. /// a(x - x0) + b(y - y0) + c(z - z0) = 0, i.e.
@ -28,23 +28,23 @@ pub fn aabb_to_points<T: Float>(bounds: Aabb<T>) -> [Vec3<T>; 8] {
/// ax + by + cz = d /// ax + by + cz = d
/// ///
/// where d is the distance of the plane from the origin. /// where d is the distance of the plane from the origin.
pub fn aabb_to_planes<T: Float>(bounds: Aabb<T>) -> [(Vec3<T>, T); 6] { pub fn aabb_to_planes<T: Float>(bounds: Aabb<T>) -> [Vec4<T>; 6] {
let zero = T::zero(); let zero = T::zero();
let one = T::one(); let one = T::one();
let bounds = bounds.map(|e| e.abs()); let bounds = bounds.map(|e| e.abs());
[ [
// bottom // bottom
(Vec3::new(zero, -one, zero), bounds.min.y), Vec4::new(zero, -one, zero, -bounds.min.y),
// top // top
(Vec3::new(zero, one, zero), bounds.max.y), Vec4::new(zero, one, zero, -bounds.max.y),
// left // left
(Vec3::new(-one, zero, zero), bounds.min.x), Vec4::new(-one, zero, zero, -bounds.min.x),
// right // right
(Vec3::new(one, zero, zero), bounds.max.x), Vec4::new(one, zero, zero, -bounds.max.x),
// near // near
(Vec3::new(zero, zero, -one), bounds.min.z), Vec4::new(zero, zero, -one, -bounds.min.z),
// far // far
(Vec3::new(zero, zero, one), bounds.max.z), Vec4::new(zero, zero, one, -bounds.max.z),
] ]
} }
@ -87,11 +87,11 @@ pub fn calc_view_frustum_world_coord<T: Float + MulAdd<T, T, Output = T>>(
world_pts world_pts
} }
pub fn point_plane_distance<T: Float>(point: Vec3<T>, (norm, dist): (Vec3<T>, T)) -> T { pub fn point_plane_distance<T: Float>(point: Vec3<T>, norm_dist: Vec4<T>) -> T {
norm.dot(point) - dist norm_dist.dot(Vec4::from_point(point))
} }
pub fn point_before_plane<T: Float>(point: Vec3<T>, plane: (Vec3<T>, T)) -> bool { pub fn point_before_plane<T: Float>(point: Vec3<T>, plane: Vec4<T>) -> bool {
point_plane_distance(point, plane) > T::zero() point_plane_distance(point, plane) > T::zero()
} }
@ -100,7 +100,7 @@ pub fn point_before_plane<T: Float>(point: Vec3<T>, plane: (Vec3<T>, T)) -> bool
/// (this implies that the polygon must be non-degenerate). /// (this implies that the polygon must be non-degenerate).
pub fn clip_points_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Debug>( pub fn clip_points_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Debug>(
points: &mut Vec<Vec3<T>>, points: &mut Vec<Vec3<T>>,
plane: (Vec3<T>, T), plane: Vec4<T>,
intersection_points: &mut Vec<Vec3<T>>, intersection_points: &mut Vec<Vec3<T>>,
) -> bool { ) -> bool {
if points.len() < 3 { if points.len() < 3 {
@ -108,18 +108,17 @@ pub fn clip_points_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Deb
} }
// NOTE: Guaranteed to succeed since points.len() > 3. // NOTE: Guaranteed to succeed since points.len() > 3.
let mut current_point = points[points.len() - 1]; let mut current_point = points[points.len() - 1];
let (norm, dist) = plane;
let intersect_plane_edge = |a, b| { let intersect_plane_edge = |a, b| {
let diff = b - a; let diff: Vec3<_> = b - a;
let t = norm.dot(diff); let t = plane.dot(Vec4::from_direction(diff));
if t == T::zero() { if t == T::zero() {
None None
} else { } else {
let t = (dist - norm.dot(a)) / t; let t = -(plane.dot(Vec4::from_point(a)) / t);
if t < T::zero() || T::one() < t { if t < T::zero() || T::one() < t {
None None
} else { } else {
Some(diff.mul_add(Vec3::broadcast(t), a)) Some(diff * t + a)
} }
} }
}; };
@ -247,7 +246,7 @@ fn append_intersection_points<T: Float + core::fmt::Debug>(
pub fn clip_object_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Debug>( pub fn clip_object_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Debug>(
polys: &mut Vec<Vec<Vec3<T>>>, polys: &mut Vec<Vec<Vec3<T>>>,
plane: (Vec3<T>, T), plane: Vec4<T>,
tolerance: T, tolerance: T,
) { ) {
let mut intersection_points = Vec::new(); let mut intersection_points = Vec::new();

View File

@ -2365,9 +2365,6 @@ impl<V: RectRasterableVol> Terrain<V> {
} }
/// Maintain terrain data. To be called once per tick. /// Maintain terrain data. To be called once per tick.
// NOTE: All of the "useless" conversion reported here allow us to abstract over repr_c vs.
// simd vectors, so fixing this warning would make the code worse in this case.
#[allow(clippy::useless_conversion)]
#[allow(clippy::for_loops_over_fallibles)] // TODO: Pending review in #587 #[allow(clippy::for_loops_over_fallibles)] // TODO: Pending review in #587
#[allow(clippy::len_zero)] // TODO: Pending review in #587 #[allow(clippy::len_zero)] // TODO: Pending review in #587
pub fn maintain( pub fn maintain(
@ -2689,7 +2686,7 @@ impl<V: RectRasterableVol> Terrain<V> {
// Update chunk visibility // Update chunk visibility
let chunk_sz = V::RECT_SIZE.x as f32; let chunk_sz = V::RECT_SIZE.x as f32;
for (pos, chunk) in &mut self.chunks { for (pos, chunk) in &mut self.chunks {
let chunk_pos = pos.map(|e| e as f32 * chunk_sz); let chunk_pos = pos.as_::<f32>() * chunk_sz;
chunk.can_shadow_sun = false; chunk.can_shadow_sun = false;
@ -2745,33 +2742,32 @@ impl<V: RectRasterableVol> Terrain<V> {
// PSCs: Potential shadow casters // PSCs: Potential shadow casters
let ray_direction = scene_data.get_sun_dir(); let ray_direction = scene_data.get_sun_dir();
let collides_with_aabr = |a: math::Aabr<f32>, b: math::Aabr<f32>| { let collides_with_aabr = |a: math::Aabb<f32>, b: math::Aabr<f32>| {
a.min.partial_cmple(&b.max).reduce_and() && a.max.partial_cmpge(&b.min).reduce_and() let min = math::Vec4::new(a.min.x, a.min.y, b.min.x, b.min.y);
let max = math::Vec4::new(b.max.x, b.max.y, a.max.x, a.max.y);
min.partial_cmple_simd(max).reduce_and()
}; };
let (visible_light_volume, visible_psr_bounds) = if ray_direction.z < 0.0 let (visible_light_volume, visible_psr_bounds) = if ray_direction.z < 0.0
&& renderer.render_mode().shadow.is_map() && renderer.render_mode().shadow.is_map()
{ {
let visible_bounding_box = Aabb { let visible_bounding_box = math::Aabb::<f32> {
min: visible_bounding_box.min - focus_off, min: math::Vec3::from(visible_bounding_box.min - focus_off),
max: visible_bounding_box.max - focus_off, max: math::Vec3::from(visible_bounding_box.max - focus_off),
}; };
let focus_off = math::Vec3::from(focus_off); let focus_off = math::Vec3::from(focus_off);
let visible_bounds_fine = math::Aabb::<f64> { let visible_bounds_fine = visible_bounding_box.as_::<f64>();
min: math::Vec3::from(visible_bounding_box.min.map(f64::from)),
max: math::Vec3::from(visible_bounding_box.max.map(f64::from)),
};
let inv_proj_view = let inv_proj_view =
math::Mat4::from_col_arrays((proj_mat * view_mat).into_col_arrays()) math::Mat4::from_col_arrays((proj_mat * view_mat).into_col_arrays())
.map(f64::from) .as_::<f64>()
.inverted(); .inverted();
let ray_direction = math::Vec3::<f32>::from(ray_direction); let ray_direction = math::Vec3::<f32>::from(ray_direction);
let visible_light_volume = math::calc_focused_light_volume_points( let visible_light_volume = math::calc_focused_light_volume_points(
inv_proj_view, inv_proj_view,
ray_direction.map(f64::from), ray_direction.as_::<f64>(),
visible_bounds_fine, visible_bounds_fine,
1e-6, 1e-6,
) )
.map(|v| v.map(|e| e as f32)) .map(|v| v.as_::<f32>())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let cam_pos = math::Vec4::from(view_mat.inverted() * Vec4::unit_w()).xyz(); let cam_pos = math::Vec4::from(view_mat.inverted() * Vec4::unit_w()).xyz();
@ -2786,7 +2782,7 @@ impl<V: RectRasterableVol> Terrain<V> {
let ray_mat = ray_mat * math::Mat4::translation_3d(-focus_off); let ray_mat = ray_mat * math::Mat4::translation_3d(-focus_off);
let can_shadow_sun = |pos: Vec2<i32>, chunk: &TerrainChunkData| { let can_shadow_sun = |pos: Vec2<i32>, chunk: &TerrainChunkData| {
let chunk_pos = pos.map(|e| e as f32 * chunk_sz); let chunk_pos = pos.as_::<f32>() * chunk_sz;
// Ensure the chunk is within the PSR set. // Ensure the chunk is within the PSR set.
let chunk_box = math::Aabb { let chunk_box = math::Aabb {
@ -2798,11 +2794,11 @@ impl<V: RectRasterableVol> Terrain<V> {
), ),
}; };
let chunk_from_light = math::Aabr::from(math::fit_psr( let chunk_from_light = math::fit_psr(
ray_mat, ray_mat,
math::aabb_to_points(chunk_box).iter().copied(), math::aabb_to_points(chunk_box).iter().copied(),
|p| p, |p| p,
)); );
collides_with_aabr(chunk_from_light, visible_bounds) collides_with_aabr(chunk_from_light, visible_bounds)
}; };

View File

@ -11,7 +11,7 @@ bitvec = "0.17.4"
fxhash = "0.2.1" fxhash = "0.2.1"
image = { version = "0.23.8", default-features = false, features = ["png"] } image = { version = "0.23.8", default-features = false, features = ["png"] }
itertools = "0.9" itertools = "0.9"
vek = { version = "0.11.2", features = ["repr_simd", "serde"] } vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
noise = { version = "0.6.0", default-features = false } noise = { version = "0.6.0", default-features = false }
num = "0.2" num = "0.2"
ordered-float = "1.0" ordered-float = "1.0"