// Adapted from https://learnopengl.com/Advanced-Lighting/Shadows/Point-Shadows // NOTE: We only technically need this for cube map arrays and geometry shader // instancing. #version 330 core // #extension ARB_texture_storage : enable #include #define LIGHTING_TYPE LIGHTING_TYPE_REFLECTION #define LIGHTING_REFLECTION_KIND LIGHTING_REFLECTION_KIND_GLOSSY #if (FLUID_MODE == FLUID_MODE_CHEAP) #define LIGHTING_TRANSPORT_MODE LIGHTING_TRANSPORT_MODE_IMPORTANCE #elif (FLUID_MODE == FLUID_MODE_SHINY) #define LIGHTING_TRANSPORT_MODE LIGHTING_TRANSPORT_MODE_RADIANCE #endif #define LIGHTING_DISTRIBUTION_SCHEME LIGHTING_DISTRIBUTION_SCHEME_MICROFACET #define LIGHTING_DISTRIBUTION LIGHTING_DISTRIBUTION_BECKMANN // Currently, we only need globals for the max light count (light_shadow_count.x) // and the far plane (scene_res.z). #include // Currently, we only need lights for the light position #include // Since our output primitive is a triangle strip, we have to render three vertices // each. #define VERTICES_PER_FACE 3 // Since we render our depth texture to a cube map, we need to render each face // six times. If we used other shadow mapping methods with fewer outputs, this would // shrink considerably. #define FACES_PER_POINT_LIGHT 6 // If MAX_VERTEX_UNIFORM_COMPONENTS_ARB = 512 on many platforms, and we want a mat4 // for each of 6 directions for each light, 20 is close to the maximum allowable // size. We could add a final matrix for the directional light of the sun or moon // to bring us to 126 matrices, which is just 2 off. // // To improve this limit, we could do many things, such as: // - choose an implementation that isn't cube maps (e.g. tetrahedrons or curves; // if there were an easy way to sample from tetrahedrons, we'd be at 32 * 4 = 128 // exactly, leaving no room for a solar body, though). // - Do more work in the geometry shader (e.g. just have a single projection // matrix per light, and derive the different-facing components; or there may be // other ways of greatly simplifying this). The tradeoff would be losing performance // here. // - Use ARB_instanced_arrays and switch lights with indexing, instead of a uniform // buffer. This would probably work fine (and ARB_instanced_arrays is supported on // pretty much every platform), but AFAIK it's possible that instanced arrays are // slower than uniform arraay access on many platforms. // - Don't try to do everything in one call (break this out into multiple passes). // // Actually, according to what I'm reading, MAX_GEOM_UNIFORM_COMPONENTS = 1024, and // gl_MaxGeometryUniformComponents = 1024. // // Also, this only applies to uniforms defined *outside* of uniform blocks, of which // there can be up to 12 (14 in OpenGL 4.3, which we definitely can't support). // GL_MAX_UNIFORM_BLOCK_SIZE has a minimum of 16384, which *easily* exceeds our usage // constraints. So this part might not matter. // // Other restrictions are easy to satisfy: // // gl_MaxGeometryVaryingComponents has a minimum of 64 and is the maximum number of // varying components; I think this is the number of out components per vertex, which // is technically 0, but would be 4 if we wrote FragPos. But it might also // be the *total* number of varying components, in which case if we wrote FragPos // it would be 4 * 20 * 6 * 3 = 1440, which would blow it out of the water. However, // I kind of doubt this interpretation because writing FragPos for each of 18 vertices, // as the original shader did, already yields 4 * 18 = 72, and it seems unlikely that // the original example exceeds OpenGL limits. // // gl_MaxGeometryOutputComponents has a minimum of 128 and is the maximum number of // components allowed in out variables; we easily fall under this since we actually // have 0 of these. However, if we were to write FragPos for each vertex, it *might* // cause us to exceed this limit, depending on whether it refers to the total output // component count *including* varying components, or not. See the previous // discussion; since 72 < 128 it's more plausible that this interpretation might be // correct, but hopefully it's not. // // gl_MaxGeometryInputComponents has a minimum of 64 and we easily fall under that // limit (I'm actually not sure we even have any user-defined input components?). // // gl_MaxGeometryTextureImageUnits = 16 and we have no texture image units (or maybe // 1, the one we bound?). This might come into play if we were to have attached // cubemaps instead of a single cubemap array, in which case it would limit us to // 16 lights *regardless* of any of the fixes mentioned above (i.e., we'd just have // to split up draw calls, I think). // // --- // // However, there is another limit to consider: GL_MAX_GEOMETRY_OUTPUT_VERTICES. Its // minimum is 256, and 20 * 6 * 3 = 360, which exceeds that. This introduces a new // limit of at most 14 point lights. // // Another, related limit is GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS. This counts // every component output ("component" is usually a 4-byte field of a vector, but maybe // this would improve with something like half-floats?), and has a minimum (as of // OpenGL 3.3) of 1024. Since even builtin outputs gl_Layer count against this total, // this means we issue 5 components per vertex, and 14 * 6 * 3 * 5 = 1260 > 1024. // // Ultimately, we find our maximum output limit of 11, ≤ 1024/5/3/6. // // If we choose to reserve a slot for a non-point light (and/or other uniforms), it // is just 10, or half what we got from VERTICES_PER_FACE (we could also round down to // 8 as a power of 2, if we had to). // // Unlike the input limits, whwich we can get around with "clever" solutions, it seems // likely that the only real way to defeat the vertex limits is to use instancing of // some sort (be it geometry shader or otherwise). This would restrict us to OpenGL // 4.0 or above. // // A further consideration (were we to switch to OpenGL 4.1-supported features, but // actually it is often supported on 3.3 hardware with ARB_viewport_array--whereas // geometry shader instancing is *not* supported on any 3.3 hardware, so would actually // require us to upgrade) would be setting gl_ViewportIndex. The main reason to consider // this is that it allows specifying a separate scissor rectangle per viewport. This // introduces two new constraints. Firstly, it adds an extra component to each vertex // (lowering our maximum to 9 faces per light ≤ 1024/6/3/6, or 8 if we want to support a // directional light). // // Secondly, a new constant (MAX_VIEWPORTS) is introduced, which would restrict the // total number of active viewports; the minimum value for this is 16. While this may // not seem all that relevant since our current hard limit is 11, the difference is that // this limit would apply *across* instanced calls (since it may be a "global" // restriction, tied to the OpenGL context; this means it couldn't even be a multiple // frame buffer thing, as there is usually one per window). This would also tie in // with gl_MaxGeometryTextureImageUnits, I guess. // // -- // // I just realized tht using cube map arrays at all bumps our required OpenGL // version to 4.0, so let's just do instancing... // // The instancing limit on MAX_GEOMETRY_SHADER_INVOCATIONS has a minimum of 32, which // would be sufficient to run through all 32 lights with a different cube map and // completely removes any imits on ight count. // // This should instantly bring us below all relevant limits in all cases considered // except for the two that would require 16. Unfortunately, 32 is also the *maximum* // number of point lights, which is much higher than the usual value, and the instance // count has to be a constant. If we were to instead geometry-shader-instance each // *face*, we'd get a maximum light count of 56 ≤ 1024/6/3, which is not as elegant // but is easily higher than 32. So, let's try using that instead. // // It is *possible* that using instancing on the *vertex* shader with the (dynamically // uniform) total number of instances set to the actual number of point lights, would // improve performance, since it would give us a 1:1 vertex input:output ratio, which // might be optimized in hardware. // // It also seems plausible that constructing a separate geometry shader with values // from 1 to 32 would be worthwhile, but that seems a little extreme. // // --- // // Since wgpu doesn't support geometry shaders anyway, it seems likely that we'll have // to do the multiple draw calls, anyway... I don't think gl_Layer can be set from // outside a geometry shader. But in wgpu, such a thing is much cheaper, anyway. #define MAX_POINT_LIGHTS 31 // We use geometry shader instancing to construct each face separately. #define MAX_LAYER_VERTICES_PER_FACE (MAX_POINT_LIGHTS * VERTICES_PER_FACE) #define MAX_LAYER_FACES (MAX_POINT_LIGHTS * FACES_PER_POINT_LIGHT) layout (triangles/*, invocations = 6*/) in; layout (triangle_strip, max_vertices = /*MAX_LAYER_VERTICES_PER_FACE*//*96*/18) out; struct ShadowLocals { mat4 shadowMatrices; }; layout (std140) uniform u_light_shadows { ShadowLocals shadowMats[/*MAX_LAYER_FACES*/192]; }; // NOTE: We choose not to output FragPos currently to save on space limitations // (see extensive documentation above). However, as these limitations have been // relaxed (unless the total of all our varying output components can't exceed // 128, which would mean FragPos would sum to 4 * 3 * 32 = 384; this could be // remedied only by setting MAX_POINT_LIGHTS to ), we might enable it again soon. // // out vec3 FragPos; // FragPos from GS (output per emitvertex) // flat out int FragLayer; // Current layer // const vec3 normals[6] = vec3[](vec3(-1,0,0), vec3(1,0,0), vec3(0,-1,0), vec3(0,1,0), vec3(0,0,-1), vec3(0,0,1)); void main() { // return; // NOTE: Assuming that light_shadow_count.x < MAX_POINT_LIGHTS. We could min // it, but that might make this less optimized, and I'd like to keep this loop as // optimized as is reasonably possible. // int face = gl_InvocationID; // Part 1: emit directed lights. /* if (face <= light_shadow_count.z) { // Directed light. for(int i = 0; i < VERTICES_PER_FACE; ++i) // for each triangle vertex { // NOTE: See above, we don't make FragPos a uniform. FragPos = gl_in[i].gl_Position; FragLayer = 0; // 0 is the directed light layer. // vec4 FragPos = gl_in[i].gl_Position; gl_Layer = i; // built-in variable that specifies to which face we render. gl_Position = shadowMats[i].shadowMatrices * FragPos; EmitVertex(); } EndPrimitive(); } */ // Part 2: emit point lights. /* if (light_shadow_count.x == 1) { return; } */ for (int layer = 1; layer <= /*light_shadow_count.x*/1; ++layer) { int layer_base = layer * FACES_PER_POINT_LIGHT; // We use instancing here in order to increase the number of emitted vertices. // int face = gl_InvocationID; for(int face = 0; face < FACES_PER_POINT_LIGHT; ++face) { // int layer_face = layer * FACES_PER_POINT_LIGHT + face; // int layer_face = layer * FACES_PER_POINT_LIGHT + face; for(int i = 0; i < VERTICES_PER_FACE; ++i) // for each triangle vertex { // NOTE: See above, we don't make FragPos a uniform. vec3 FragPos = gl_in[i].gl_Position.xyz; // FragPos = gl_in[i].gl_Position.xyz; // FragLayer = layer; // float lightDistance = length(FragPos - lights[((layer - 1) & 31)].light_pos.xyz); // lightDistance /= screen_res.w; // vec4 FragPos = gl_in[i].gl_Position; // NOTE: Our normals map to the same thing as cube map normals, *except* that their normal direction is // swapped; we can fix this by doing normal ^ 0x1u. However, we also want to cull back faces, not front // faces, so we only care about the shadow cast by the *back* of the triangle, which means we ^ 0x1u // again and cancel it out. // int face = int(((floatBitsToUint(gl_Position.w) >> 29) & 0x7u) ^ 0x1u); int layer_face = layer_base + face; gl_Layer = face;//layer_face; // built-in variable that specifies to which face we render. gl_Position = shadowMats[layer_face].shadowMatrices * vec4(FragPos, 1.0); // gl_Position.z = -((gl_Position.z + screen_res.z) / (screen_res.w - screen_res.z)) * lightDistance; // gl_Position.z = gl_Position.z / screen_res.w; // gl_Position.z = gl_Position.z / gl_Position.w; // gl_Position.z = -1000.0 / (gl_Position.z + 10000.0); // lightDistance = -(lightDistance + screen_res.z) / (screen_res.w - screen_res.z); // gl_Position.z = lightDistance; EmitVertex(); } EndPrimitive(); } } }