Major speedups with SIMD.

This commit is contained in:
Joshua Yanovski 2020-08-07 23:50:27 +02:00
parent fba64a7d93
commit 3dc973e0be
11 changed files with 67 additions and 62 deletions

29
Cargo.lock generated
View File

@ -1223,7 +1223,7 @@ name = "euc"
version = "0.5.1"
source = "git+https://github.com/zesterer/euc.git#c9a7c17a03d45fce00caeeca09afa1e1558cd183"
dependencies = [
"vek",
"vek 0.11.2",
]
[[package]]
@ -4468,7 +4468,7 @@ version = "0.1.0"
source = "git+https://gitlab.com/yusdacra/treeculler.git#efcf5283cf386117a7e654abdaa45ef664a08e42"
dependencies = [
"num-traits",
"vek",
"vek 0.11.2",
]
[[package]]
@ -4636,6 +4636,19 @@ dependencies = [
"static_assertions",
]
[[package]]
name = "vek"
version = "0.12.0"
source = "git+https://gitlab.com/veloren/vek.git?branch=fix_intrinsics#237a78528b505f34f6dde5dc77db3b642388fe4a"
dependencies = [
"approx",
"num-integer",
"num-traits",
"rustc_version",
"serde",
"static_assertions",
]
[[package]]
name = "veloren-chat-cli"
version = "0.6.0"
@ -4663,7 +4676,7 @@ dependencies = [
"specs",
"tracing",
"uvth 3.1.1",
"vek",
"vek 0.12.0",
"veloren-common",
"veloren_network",
]
@ -4694,7 +4707,7 @@ dependencies = [
"specs-idvs",
"sum_type",
"tracing",
"vek",
"vek 0.12.0",
]
[[package]]
@ -4725,7 +4738,7 @@ dependencies = [
"tiny_http",
"tracing",
"uvth 3.1.1",
"vek",
"vek 0.12.0",
"veloren-common",
"veloren-world",
"veloren_network",
@ -4786,7 +4799,7 @@ dependencies = [
"tracing-subscriber",
"treeculler",
"uvth 3.1.1",
"vek",
"vek 0.12.0",
"veloren-client",
"veloren-common",
"veloren-server",
@ -4805,7 +4818,7 @@ dependencies = [
"libloading 0.6.2",
"notify",
"tracing",
"vek",
"vek 0.12.0",
"veloren-common",
]
@ -4835,7 +4848,7 @@ dependencies = [
"serde",
"tracing",
"tracing-subscriber",
"vek",
"vek 0.12.0",
"veloren-common",
]

View File

@ -80,3 +80,4 @@ debug = 1
[patch.crates-io]
# cpal conflict fix isn't released yet
winit = { git = "https://github.com/Imberflur/winit.git", branch = "macos-test" }
vek = { git = "https://gitlab.com/veloren/vek.git", branch = "fix_intrinsics" }

View File

@ -19,6 +19,6 @@ num_cpus = "1.10.1"
tracing = { version = "0.1", default-features = false }
rayon = "^1.3.0"
specs = { git = "https://github.com/amethyst/specs.git", rev = "7a2e348ab2223818bad487695c66c43db88050a5" }
vek = { version = "0.11.2", features = ["repr_simd", "serde"] }
vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
hashbrown = { version = "0.7.2", features = ["rayon", "serde", "nightly"] }
authc = { git = "https://gitlab.com/veloren/auth.git", rev = "b943c85e4a38f5ec60cd18c34c73097640162bfe" }

View File

@ -13,7 +13,7 @@ specs-idvs = { git = "https://gitlab.com/veloren/specs-idvs.git", branch = "spec
roots = "0.0.5"
specs = { git = "https://github.com/amethyst/specs.git", features = ["serde", "storage-event-control"], rev = "7a2e348ab2223818bad487695c66c43db88050a5" }
vek = { version = "0.11.2", features = ["repr_simd", "serde"] }
vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
dot_vox = "4.0"
image = { version = "0.23.8", default-features = false, features = ["png"] }
serde = { version = "1.0.110", features = ["derive"] }

View File

@ -17,7 +17,7 @@ specs-idvs = { git = "https://gitlab.com/veloren/specs-idvs.git", branch = "spec
tracing = "0.1"
specs = { git = "https://github.com/amethyst/specs.git", features = ["shred-derive"], rev = "7a2e348ab2223818bad487695c66c43db88050a5" }
vek = { version = "0.11.2", features = ["repr_simd", "serde"] }
vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
uvth = "3.1.1"
futures-util = "0.3"
futures-executor = "0.3"

View File

@ -38,7 +38,7 @@ specs = { git = "https://github.com/amethyst/specs.git", rev = "7a2e348ab2223818
specs-idvs = { git = "https://gitlab.com/veloren/specs-idvs.git", branch = "specs-git" }
# Mathematics
vek = { version = "0.11.2", features = ["repr_simd", "serde"] }
vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
# Controller
gilrs = { version = "0.7", features = ["serde"] }

View File

@ -17,7 +17,7 @@ be-dyn-lib = []
default = ["be-dyn-lib"]
[dependencies]
vek = { version = "0.11.2", features = ["repr_simd", "serde"] }
vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
common = { package = "veloren-common", path = "../../../common" }
libloading = { version = "0.6.2", optional = true }
notify = { version = "5.0.0-pre.2", optional = true }

View File

@ -331,9 +331,6 @@ impl FigureMgr {
#[allow(clippy::redundant_pattern_matching)]
// TODO: Pending review in #587
// NOTE: All of the "useless" conversion reported here allow us to abstract over repr_c vs.
// simd vectors, so fixing this warning would make the code worse in this case.
#[allow(clippy::useless_conversion)]
pub fn update_lighting(&mut self, scene_data: &SceneData) {
let ecs = scene_data.state.ecs();
for (entity, light_emitter) in (&ecs.entities(), &ecs.read_storage::<LightEmitter>()).join()
@ -411,9 +408,6 @@ impl FigureMgr {
#[allow(clippy::or_fun_call)]
// TODO: Pending review in #587
// NOTE: All of the "useless" conversion reported here allow us to abstract over repr_c vs.
// simd vectors, so fixing this warning would make the code worse in this case.
#[allow(clippy::useless_conversion)]
pub fn maintain(
&mut self,
renderer: &mut Renderer,
@ -422,6 +416,7 @@ impl FigureMgr {
visible_psr_bounds: math::Aabr<f32>,
camera: &Camera,
) -> anim::vek::Aabb<f32> {
let visible_psr_bounds = math::Aabr::from(visible_psr_bounds);
let state = scene_data.state;
let time = state.get_time();
let tick = scene_data.tick;
@ -455,9 +450,10 @@ impl FigureMgr {
let ray_mat = ray_mat * math::Mat4::translation_3d(-focus_off);
let collides_with_aabr = |a: math::Aabr<f32>, b: math::Aabr<f32>| {
a.min.partial_cmple(&b.max).reduce_and() && a.max.partial_cmpge(&b.min).reduce_and()
let min = math::Vec4::new(a.min.x, a.min.y, b.min.x, b.min.y);
let max = math::Vec4::new(b.max.x, b.max.y, a.max.x, a.max.y);
min.partial_cmple_simd(max).reduce_and()
};
// println!("Aabr: {:?}", visible_bounds);
move |pos: (anim::vek::Vec3<f32>,), radius: f32| {
// Short circuit when there are no shadows to cast.
if !can_shadow_sun {

View File

@ -1,9 +1,9 @@
use core::{iter, mem};
use hashbrown::HashMap;
use num::traits::Float;
// pub use vek::{geom::repr_simd::*, mat::repr_simd::column_major::Mat4, ops::*,
// vec::repr_simd::*};
pub use vek::{geom::repr_c::*, mat::repr_c::column_major::Mat4, ops::*, vec::repr_c::*};
pub use vek::{geom::repr_simd::*, mat::repr_simd::column_major::Mat4, ops::*, vec::repr_simd::*};
// pub use vek::{geom::repr_c::*, mat::repr_c::column_major::Mat4, ops::*,
// vec::repr_c::*};
pub fn aabb_to_points<T: Float>(bounds: Aabb<T>) -> [Vec3<T>; 8] {
[
@ -18,7 +18,7 @@ pub fn aabb_to_points<T: Float>(bounds: Aabb<T>) -> [Vec3<T>; 8] {
]
}
/// Each Vec4 <a, b, c, d> should be interpreted as reprenting plane
/// Each Vec4 <a, b, c, -d> should be interpreted as reprenting plane
/// equation
///
/// a(x - x0) + b(y - y0) + c(z - z0) = 0, i.e.
@ -28,23 +28,23 @@ pub fn aabb_to_points<T: Float>(bounds: Aabb<T>) -> [Vec3<T>; 8] {
/// ax + by + cz = d
///
/// where d is the distance of the plane from the origin.
pub fn aabb_to_planes<T: Float>(bounds: Aabb<T>) -> [(Vec3<T>, T); 6] {
pub fn aabb_to_planes<T: Float>(bounds: Aabb<T>) -> [Vec4<T>; 6] {
let zero = T::zero();
let one = T::one();
let bounds = bounds.map(|e| e.abs());
[
// bottom
(Vec3::new(zero, -one, zero), bounds.min.y),
Vec4::new(zero, -one, zero, -bounds.min.y),
// top
(Vec3::new(zero, one, zero), bounds.max.y),
Vec4::new(zero, one, zero, -bounds.max.y),
// left
(Vec3::new(-one, zero, zero), bounds.min.x),
Vec4::new(-one, zero, zero, -bounds.min.x),
// right
(Vec3::new(one, zero, zero), bounds.max.x),
Vec4::new(one, zero, zero, -bounds.max.x),
// near
(Vec3::new(zero, zero, -one), bounds.min.z),
Vec4::new(zero, zero, -one, -bounds.min.z),
// far
(Vec3::new(zero, zero, one), bounds.max.z),
Vec4::new(zero, zero, one, -bounds.max.z),
]
}
@ -87,11 +87,11 @@ pub fn calc_view_frustum_world_coord<T: Float + MulAdd<T, T, Output = T>>(
world_pts
}
pub fn point_plane_distance<T: Float>(point: Vec3<T>, (norm, dist): (Vec3<T>, T)) -> T {
norm.dot(point) - dist
pub fn point_plane_distance<T: Float>(point: Vec3<T>, norm_dist: Vec4<T>) -> T {
norm_dist.dot(Vec4::from_point(point))
}
pub fn point_before_plane<T: Float>(point: Vec3<T>, plane: (Vec3<T>, T)) -> bool {
pub fn point_before_plane<T: Float>(point: Vec3<T>, plane: Vec4<T>) -> bool {
point_plane_distance(point, plane) > T::zero()
}
@ -100,7 +100,7 @@ pub fn point_before_plane<T: Float>(point: Vec3<T>, plane: (Vec3<T>, T)) -> bool
/// (this implies that the polygon must be non-degenerate).
pub fn clip_points_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Debug>(
points: &mut Vec<Vec3<T>>,
plane: (Vec3<T>, T),
plane: Vec4<T>,
intersection_points: &mut Vec<Vec3<T>>,
) -> bool {
if points.len() < 3 {
@ -108,18 +108,17 @@ pub fn clip_points_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Deb
}
// NOTE: Guaranteed to succeed since points.len() > 3.
let mut current_point = points[points.len() - 1];
let (norm, dist) = plane;
let intersect_plane_edge = |a, b| {
let diff = b - a;
let t = norm.dot(diff);
let diff: Vec3<_> = b - a;
let t = plane.dot(Vec4::from_direction(diff));
if t == T::zero() {
None
} else {
let t = (dist - norm.dot(a)) / t;
let t = -(plane.dot(Vec4::from_point(a)) / t);
if t < T::zero() || T::one() < t {
None
} else {
Some(diff.mul_add(Vec3::broadcast(t), a))
Some(diff * t + a)
}
}
};
@ -247,7 +246,7 @@ fn append_intersection_points<T: Float + core::fmt::Debug>(
pub fn clip_object_by_plane<T: Float + MulAdd<T, T, Output = T> + core::fmt::Debug>(
polys: &mut Vec<Vec<Vec3<T>>>,
plane: (Vec3<T>, T),
plane: Vec4<T>,
tolerance: T,
) {
let mut intersection_points = Vec::new();

View File

@ -2365,9 +2365,6 @@ impl<V: RectRasterableVol> Terrain<V> {
}
/// Maintain terrain data. To be called once per tick.
// NOTE: All of the "useless" conversion reported here allow us to abstract over repr_c vs.
// simd vectors, so fixing this warning would make the code worse in this case.
#[allow(clippy::useless_conversion)]
#[allow(clippy::for_loops_over_fallibles)] // TODO: Pending review in #587
#[allow(clippy::len_zero)] // TODO: Pending review in #587
pub fn maintain(
@ -2689,7 +2686,7 @@ impl<V: RectRasterableVol> Terrain<V> {
// Update chunk visibility
let chunk_sz = V::RECT_SIZE.x as f32;
for (pos, chunk) in &mut self.chunks {
let chunk_pos = pos.map(|e| e as f32 * chunk_sz);
let chunk_pos = pos.as_::<f32>() * chunk_sz;
chunk.can_shadow_sun = false;
@ -2745,33 +2742,32 @@ impl<V: RectRasterableVol> Terrain<V> {
// PSCs: Potential shadow casters
let ray_direction = scene_data.get_sun_dir();
let collides_with_aabr = |a: math::Aabr<f32>, b: math::Aabr<f32>| {
a.min.partial_cmple(&b.max).reduce_and() && a.max.partial_cmpge(&b.min).reduce_and()
let collides_with_aabr = |a: math::Aabb<f32>, b: math::Aabr<f32>| {
let min = math::Vec4::new(a.min.x, a.min.y, b.min.x, b.min.y);
let max = math::Vec4::new(b.max.x, b.max.y, a.max.x, a.max.y);
min.partial_cmple_simd(max).reduce_and()
};
let (visible_light_volume, visible_psr_bounds) = if ray_direction.z < 0.0
&& renderer.render_mode().shadow.is_map()
{
let visible_bounding_box = Aabb {
min: visible_bounding_box.min - focus_off,
max: visible_bounding_box.max - focus_off,
let visible_bounding_box = math::Aabb::<f32> {
min: math::Vec3::from(visible_bounding_box.min - focus_off),
max: math::Vec3::from(visible_bounding_box.max - focus_off),
};
let focus_off = math::Vec3::from(focus_off);
let visible_bounds_fine = math::Aabb::<f64> {
min: math::Vec3::from(visible_bounding_box.min.map(f64::from)),
max: math::Vec3::from(visible_bounding_box.max.map(f64::from)),
};
let visible_bounds_fine = visible_bounding_box.as_::<f64>();
let inv_proj_view =
math::Mat4::from_col_arrays((proj_mat * view_mat).into_col_arrays())
.map(f64::from)
.as_::<f64>()
.inverted();
let ray_direction = math::Vec3::<f32>::from(ray_direction);
let visible_light_volume = math::calc_focused_light_volume_points(
inv_proj_view,
ray_direction.map(f64::from),
ray_direction.as_::<f64>(),
visible_bounds_fine,
1e-6,
)
.map(|v| v.map(|e| e as f32))
.map(|v| v.as_::<f32>())
.collect::<Vec<_>>();
let cam_pos = math::Vec4::from(view_mat.inverted() * Vec4::unit_w()).xyz();
@ -2786,7 +2782,7 @@ impl<V: RectRasterableVol> Terrain<V> {
let ray_mat = ray_mat * math::Mat4::translation_3d(-focus_off);
let can_shadow_sun = |pos: Vec2<i32>, chunk: &TerrainChunkData| {
let chunk_pos = pos.map(|e| e as f32 * chunk_sz);
let chunk_pos = pos.as_::<f32>() * chunk_sz;
// Ensure the chunk is within the PSR set.
let chunk_box = math::Aabb {
@ -2798,11 +2794,11 @@ impl<V: RectRasterableVol> Terrain<V> {
),
};
let chunk_from_light = math::Aabr::from(math::fit_psr(
let chunk_from_light = math::fit_psr(
ray_mat,
math::aabb_to_points(chunk_box).iter().copied(),
|p| p,
));
);
collides_with_aabr(chunk_from_light, visible_bounds)
};

View File

@ -11,7 +11,7 @@ bitvec = "0.17.4"
fxhash = "0.2.1"
image = { version = "0.23.8", default-features = false, features = ["png"] }
itertools = "0.9"
vek = { version = "0.11.2", features = ["repr_simd", "serde"] }
vek = { version = "0.12.0", features = ["platform_intrinsics", "serde"] }
noise = { version = "0.6.0", default-features = false }
num = "0.2"
ordered-float = "1.0"