veloren/assets/voxygen/shaders/light-shadows-geom.glsl
2022-10-23 20:03:21 +01:00

278 lines
13 KiB
GLSL

// Adapted from https://learnopengl.com/Advanced-Lighting/Shadows/Point-Shadows
// NOTE: We only technically need this for cube map arrays and geometry shader
// instancing.
#version 330 core
// #extension ARB_texture_storage : enable
#include <constants.glsl>
#define LIGHTING_TYPE LIGHTING_TYPE_REFLECTION
#define LIGHTING_REFLECTION_KIND LIGHTING_REFLECTION_KIND_GLOSSY
#if (FLUID_MODE == FLUID_MODE_LOW)
#define LIGHTING_TRANSPORT_MODE LIGHTING_TRANSPORT_MODE_IMPORTANCE
#elif (FLUID_MODE >= FLUID_MODE_MEDIUM)
#define LIGHTING_TRANSPORT_MODE LIGHTING_TRANSPORT_MODE_RADIANCE
#endif
#define LIGHTING_DISTRIBUTION_SCHEME LIGHTING_DISTRIBUTION_SCHEME_MICROFACET
#define LIGHTING_DISTRIBUTION LIGHTING_DISTRIBUTION_BECKMANN
#define HAS_SHADOW_MAPS
// Currently, we only need globals for the max light count (light_shadow_count.x)
// and the far plane (scene_res.z).
#include <globals.glsl>
#include <shadows.glsl>
// // Currently, we only need lights for the light position
// #include <light.glsl>
/* struct Light {
vec4 light_pos;
vec4 light_col;
// mat4 light_proj;
};
layout (std140)
uniform u_lights {
Light lights[31];
}; */
// Since our output primitive is a triangle strip, we have to render three vertices
// each.
#define VERTICES_PER_FACE 3
// Since we render our depth texture to a cube map, we need to render each face
// six times. If we used other shadow mapping methods with fewer outputs, this would
// shrink considerably.
#define FACES_PER_POINT_LIGHT 6
// If MAX_VERTEX_UNIFORM_COMPONENTS_ARB = 512 on many platforms, and we want a mat4
// for each of 6 directions for each light, 20 is close to the maximum allowable
// size. We could add a final matrix for the directional light of the sun or moon
// to bring us to 126 matrices, which is just 2 off.
//
// To improve this limit, we could do many things, such as:
// - choose an implementation that isn't cube maps (e.g. tetrahedrons or curves;
// if there were an easy way to sample from tetrahedrons, we'd be at 32 * 4 = 128
// exactly, leaving no room for a solar body, though).
// - Do more work in the geometry shader (e.g. just have a single projection
// matrix per light, and derive the different-facing components; or there may be
// other ways of greatly simplifying this). The tradeoff would be losing performance
// here.
// - Use ARB_instanced_arrays and switch lights with indexing, instead of a uniform
// buffer. This would probably work fine (and ARB_instanced_arrays is supported on
// pretty much every platform), but AFAIK it's possible that instanced arrays are
// slower than uniform arraay access on many platforms.
// - Don't try to do everything in one call (break this out into multiple passes).
//
// Actually, according to what I'm reading, MAX_GEOM_UNIFORM_COMPONENTS = 1024, and
// gl_MaxGeometryUniformComponents = 1024.
//
// Also, this only applies to uniforms defined *outside* of uniform blocks, of which
// there can be up to 12 (14 in OpenGL 4.3, which we definitely can't support).
// GL_MAX_UNIFORM_BLOCK_SIZE has a minimum of 16384, which *easily* exceeds our usage
// constraints. So this part might not matter.
//
// Other restrictions are easy to satisfy:
//
// gl_MaxGeometryVaryingComponents has a minimum of 64 and is the maximum number of
// varying components; I think this is the number of out components per vertex, which
// is technically 0, but would be 4 if we wrote FragPos. But it might also
// be the *total* number of varying components, in which case if we wrote FragPos
// it would be 4 * 20 * 6 * 3 = 1440, which would blow it out of the water. However,
// I kind of doubt this interpretation because writing FragPos for each of 18 vertices,
// as the original shader did, already yields 4 * 18 = 72, and it seems unlikely that
// the original example exceeds OpenGL limits.
//
// gl_MaxGeometryOutputComponents has a minimum of 128 and is the maximum number of
// components allowed in out variables; we easily fall under this since we actually
// have 0 of these. However, if we were to write FragPos for each vertex, it *might*
// cause us to exceed this limit, depending on whether it refers to the total output
// component count *including* varying components, or not. See the previous
// discussion; since 72 < 128 it's more plausible that this interpretation might be
// correct, but hopefully it's not.
//
// gl_MaxGeometryInputComponents has a minimum of 64 and we easily fall under that
// limit (I'm actually not sure we even have any user-defined input components?).
//
// gl_MaxGeometryTextureImageUnits = 16 and we have no texture image units (or maybe
// 1, the one we bound?). This might come into play if we were to have attached
// cubemaps instead of a single cubemap array, in which case it would limit us to
// 16 lights *regardless* of any of the fixes mentioned above (i.e., we'd just have
// to split up draw calls, I think).
//
// ---
//
// However, there is another limit to consider: GL_MAX_GEOMETRY_OUTPUT_VERTICES. Its
// minimum is 256, and 20 * 6 * 3 = 360, which exceeds that. This introduces a new
// limit of at most 14 point lights.
//
// Another, related limit is GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS. This counts
// every component output ("component" is usually a 4-byte field of a vector, but maybe
// this would improve with something like half-floats?), and has a minimum (as of
// OpenGL 3.3) of 1024. Since even builtin outputs gl_Layer count against this total,
// this means we issue 5 components per vertex, and 14 * 6 * 3 * 5 = 1260 > 1024.
//
// Ultimately, we find our maximum output limit of 11, ≤ 1024/5/3/6.
//
// If we choose to reserve a slot for a non-point light (and/or other uniforms), it
// is just 10, or half what we got from VERTICES_PER_FACE (we could also round down to
// 8 as a power of 2, if we had to).
//
// Unlike the input limits, whwich we can get around with "clever" solutions, it seems
// likely that the only real way to defeat the vertex limits is to use instancing of
// some sort (be it geometry shader or otherwise). This would restrict us to OpenGL
// 4.0 or above.
//
// A further consideration (were we to switch to OpenGL 4.1-supported features, but
// actually it is often supported on 3.3 hardware with ARB_viewport_array--whereas
// geometry shader instancing is *not* supported on any 3.3 hardware, so would actually
// require us to upgrade) would be setting gl_ViewportIndex. The main reason to consider
// this is that it allows specifying a separate scissor rectangle per viewport. This
// introduces two new constraints. Firstly, it adds an extra component to each vertex
// (lowering our maximum to 9 faces per light ≤ 1024/6/3/6, or 8 if we want to support a
// directional light).
//
// Secondly, a new constant (MAX_VIEWPORTS) is introduced, which would restrict the
// total number of active viewports; the minimum value for this is 16. While this may
// not seem all that relevant since our current hard limit is 11, the difference is that
// this limit would apply *across* instanced calls (since it may be a "global"
// restriction, tied to the OpenGL context; this means it couldn't even be a multiple
// frame buffer thing, as there is usually one per window). This would also tie in
// with gl_MaxGeometryTextureImageUnits, I guess.
//
// --
//
// I just realized tht using cube map arrays at all bumps our required OpenGL
// version to 4.0, so let's just do instancing...
//
// The instancing limit on MAX_GEOMETRY_SHADER_INVOCATIONS has a minimum of 32, which
// would be sufficient to run through all 32 lights with a different cube map and
// completely removes any imits on ight count.
//
// This should instantly bring us below all relevant limits in all cases considered
// except for the two that would require 16. Unfortunately, 32 is also the *maximum*
// number of point lights, which is much higher than the usual value, and the instance
// count has to be a constant. If we were to instead geometry-shader-instance each
// *face*, we'd get a maximum light count of 56 ≤ 1024/6/3, which is not as elegant
// but is easily higher than 32. So, let's try using that instead.
//
// It is *possible* that using instancing on the *vertex* shader with the (dynamically
// uniform) total number of instances set to the actual number of point lights, would
// improve performance, since it would give us a 1:1 vertex input:output ratio, which
// might be optimized in hardware.
//
// It also seems plausible that constructing a separate geometry shader with values
// from 1 to 32 would be worthwhile, but that seems a little extreme.
//
// ---
//
// Since wgpu doesn't support geometry shaders anyway, it seems likely that we'll have
// to do the multiple draw calls, anyway... I don't think gl_Layer can be set from
// outside a geometry shader. But in wgpu, such a thing is much cheaper, anyway.
#define MAX_POINT_LIGHTS 31
// We use geometry shader instancing to construct each face separately.
#define MAX_LAYER_VERTICES_PER_FACE (MAX_POINT_LIGHTS * VERTICES_PER_FACE)
#define MAX_LAYER_FACES (MAX_POINT_LIGHTS * FACES_PER_POINT_LIGHT)
layout (triangles/*, invocations = 6*/) in;
layout (triangle_strip, max_vertices = /*MAX_LAYER_VERTICES_PER_FACE*//*96*/18) out;
//struct ShadowLocals {
// mat4 shadowMatrices;
// mat4 texture_mat;
//};
//
//layout (std140)
//uniform u_light_shadows {
// ShadowLocals shadowMats[/*MAX_LAYER_FACES*/192];
//};
// NOTE: We choose not to output FragPos currently to save on space limitations
// (see extensive documentation above). However, as these limitations have been
// relaxed (unless the total of all our varying output components can't exceed
// 128, which would mean FragPos would sum to 4 * 3 * 32 = 384; this could be
// remedied only by setting MAX_POINT_LIGHTS to ), we might enable it again soon.
//
// out vec3 FragPos; // FragPos from GS (output per emitvertex)
// flat out int FragLayer; // Current layer
// const vec3 normals[6] = vec3[](vec3(-1,0,0), vec3(1,0,0), vec3(0,-1,0), vec3(0,1,0), vec3(0,0,-1), vec3(0,0,1));
void main() {
// return;
// NOTE: Assuming that light_shadow_count.x < MAX_POINT_LIGHTS. We could min
// it, but that might make this less optimized, and I'd like to keep this loop as
// optimized as is reasonably possible.
// int face = gl_InvocationID;
// Part 1: emit directed lights.
/* if (face <= light_shadow_count.z) {
// Directed light.
for(int i = 0; i < VERTICES_PER_FACE; ++i) // for each triangle vertex
{
// NOTE: See above, we don't make FragPos a uniform.
FragPos = gl_in[i].gl_Position;
FragLayer = 0; // 0 is the directed light layer.
// vec4 FragPos = gl_in[i].gl_Position;
gl_Layer = i; // built-in variable that specifies to which face we render.
gl_Position = shadowMats[i].shadowMatrices * FragPos;
EmitVertex();
}
EndPrimitive();
} */
// Part 2: emit point lights.
/* if (light_shadow_count.x == 1) {
return;
} */
#if (SHADOW_MODE == SHADOW_MODE_MAP)
for (uint layer = 1u; layer <= min(light_shadow_count.x, 1u); ++layer)
{
int layer_base = int(layer) * FACES_PER_POINT_LIGHT;
// We use instancing here in order to increase the number of emitted vertices.
// int face = gl_InvocationID;
for(int face = 0; face < FACES_PER_POINT_LIGHT; ++face)
{
// int layer_face = layer * FACES_PER_POINT_LIGHT + face;
// int layer_face = layer * FACES_PER_POINT_LIGHT + face;
// for(int i = VERTICES_PER_FACE - 1; i >= 0; --i) // for each triangle vertex
for(int i = 0; i < VERTICES_PER_FACE; ++i) // for each triangle vertex
{
// NOTE: See above, we don't make FragPos a uniform.
vec3 fragPos = gl_in[i].gl_Position.xyz;
// FragPos = fragPos - (lights[((/*FragLayer*/layer - 1u) & 31u)].light_pos.xyz - focus_off.xyz);
// FragLayer = layer;
// float lightDistance = length(FragPos - lights[((layer - 1) & 31)].light_pos.xyz);
// lightDistance /= screen_res.w;
// vec4 FragPos = gl_in[i].gl_Position;
// NOTE: Our normals map to the same thing as cube map normals, *except* that their normal direction is
// swapped; we can fix this by doing normal ^ 0x1u. However, we also want to cull back faces, not front
// faces, so we only care about the shadow cast by the *back* of the triangle, which means we ^ 0x1u
// again and cancel it out.
// int face = int(((floatBitsToUint(gl_Position.w) >> 29) & 0x7u) ^ 0x1u);
int layer_face = layer_base + face;
gl_Layer = face;//layer_face; // built-in variable that specifies to which face we render.
gl_Position = shadowMats[layer_face].shadowMatrices * vec4(fragPos, 1.0);
// gl_Position.z = -((gl_Position.z + screen_res.z) / (screen_res.w - screen_res.z)) * lightDistance;
// gl_Position.z = gl_Position.z / screen_res.w;
// gl_Position.z = gl_Position.z / gl_Position.w;
// gl_Position.z = -1000.0 / (gl_Position.z + 10000.0);
// lightDistance = -(lightDistance + screen_res.z) / (screen_res.w - screen_res.z);
// gl_Position.z = lightDistance;
EmitVertex();
}
EndPrimitive();
}
}
#endif
}