|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "rasterize_gl.h" |
|
#include "glutil.h" |
|
#include <vector> |
|
#define STRINGIFY_SHADER_SOURCE(x) #x |
|
|
|
|
|
|
|
|
|
#define ROUND_UP(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) |
|
static int ROUND_UP_BITS(uint32_t x, uint32_t y) |
|
{ |
|
|
|
if (x < (1u << y)) |
|
return x; |
|
uint32_t m = 0; |
|
while (x & ~m) |
|
m = (m << 1) | 1u; |
|
m >>= y; |
|
if (!(x & m)) |
|
return x; |
|
return (x | m) + 1u; |
|
} |
|
|
|
|
|
|
|
|
|
struct GLDrawCmd |
|
{ |
|
uint32_t count; |
|
uint32_t instanceCount; |
|
uint32_t firstIndex; |
|
uint32_t baseVertex; |
|
uint32_t baseInstance; |
|
}; |
|
|
|
|
|
|
|
|
|
static void compileGLShader(NVDR_CTX_ARGS, const RasterizeGLState& s, GLuint* pShader, GLenum shaderType, const char* src_buf) |
|
{ |
|
std::string src(src_buf); |
|
|
|
|
|
int n = src.find('\n') + 1; |
|
if (s.enableZModify) |
|
src.insert(n, "#define IF_ZMODIFY(x) x\n"); |
|
else |
|
src.insert(n, "#define IF_ZMODIFY(x)\n"); |
|
|
|
const char *cstr = src.c_str(); |
|
*pShader = 0; |
|
NVDR_CHECK_GL_ERROR(*pShader = glCreateShader(shaderType)); |
|
NVDR_CHECK_GL_ERROR(glShaderSource(*pShader, 1, &cstr, 0)); |
|
NVDR_CHECK_GL_ERROR(glCompileShader(*pShader)); |
|
} |
|
|
|
static void constructGLProgram(NVDR_CTX_ARGS, GLuint* pProgram, GLuint glVertexShader, GLuint glGeometryShader, GLuint glFragmentShader) |
|
{ |
|
*pProgram = 0; |
|
|
|
GLuint glProgram = 0; |
|
NVDR_CHECK_GL_ERROR(glProgram = glCreateProgram()); |
|
NVDR_CHECK_GL_ERROR(glAttachShader(glProgram, glVertexShader)); |
|
NVDR_CHECK_GL_ERROR(glAttachShader(glProgram, glGeometryShader)); |
|
NVDR_CHECK_GL_ERROR(glAttachShader(glProgram, glFragmentShader)); |
|
NVDR_CHECK_GL_ERROR(glLinkProgram(glProgram)); |
|
|
|
GLint linkStatus = 0; |
|
NVDR_CHECK_GL_ERROR(glGetProgramiv(glProgram, GL_LINK_STATUS, &linkStatus)); |
|
if (!linkStatus) |
|
{ |
|
GLint infoLen = 0; |
|
NVDR_CHECK_GL_ERROR(glGetProgramiv(glProgram, GL_INFO_LOG_LENGTH, &infoLen)); |
|
if (infoLen) |
|
{ |
|
const char* hdr = "glLinkProgram() failed:\n"; |
|
std::vector<char> info(strlen(hdr) + infoLen); |
|
strcpy(&info[0], hdr); |
|
NVDR_CHECK_GL_ERROR(glGetProgramInfoLog(glProgram, infoLen, &infoLen, &info[strlen(hdr)])); |
|
NVDR_CHECK(0, &info[0]); |
|
} |
|
NVDR_CHECK(0, "glLinkProgram() failed"); |
|
} |
|
|
|
*pProgram = glProgram; |
|
} |
|
|
|
|
|
|
|
|
|
void rasterizeInitGLContext(NVDR_CTX_ARGS, RasterizeGLState& s, int cudaDeviceIdx) |
|
{ |
|
|
|
s.glctx = createGLContext(cudaDeviceIdx); |
|
setGLContext(s.glctx); |
|
|
|
|
|
GLint vMajor = 0; |
|
GLint vMinor = 0; |
|
glGetIntegerv(GL_MAJOR_VERSION, &vMajor); |
|
glGetIntegerv(GL_MINOR_VERSION, &vMinor); |
|
glGetError(); |
|
LOG(INFO) << "OpenGL version reported as " << vMajor << "." << vMinor; |
|
NVDR_CHECK((vMajor == 4 && vMinor >= 4) || vMajor > 4, "OpenGL 4.4 or later is required"); |
|
|
|
|
|
int capMajor = 0; |
|
NVDR_CHECK_CUDA_ERROR(cudaDeviceGetAttribute(&capMajor, cudaDevAttrComputeCapabilityMajor, cudaDeviceIdx)); |
|
s.enableZModify = (capMajor >= 8); |
|
|
|
|
|
int num_outputs = s.enableDB ? 2 : 1; |
|
|
|
|
|
compileGLShader(NVDR_CTX_PARAMS, s, &s.glVertexShader, GL_VERTEX_SHADER, |
|
"#version 330\n" |
|
"#extension GL_ARB_shader_draw_parameters : enable\n" |
|
STRINGIFY_SHADER_SOURCE( |
|
layout(location = 0) in vec4 in_pos; |
|
out int v_layer; |
|
out int v_offset; |
|
void main() |
|
{ |
|
int layer = gl_DrawIDARB; |
|
gl_Position = in_pos; |
|
v_layer = layer; |
|
v_offset = gl_BaseInstanceARB; |
|
} |
|
) |
|
); |
|
|
|
|
|
if (s.enableDB) |
|
{ |
|
|
|
|
|
|
|
|
|
|
|
compileGLShader(NVDR_CTX_PARAMS, s, &s.glGeometryShader, GL_GEOMETRY_SHADER, |
|
"#version 430\n" |
|
STRINGIFY_SHADER_SOURCE( |
|
layout(triangles) in; |
|
layout(triangle_strip, max_vertices=3) out; |
|
layout(location = 0) uniform vec2 vp_scale; |
|
in int v_layer[]; |
|
in int v_offset[]; |
|
out vec4 var_uvzw; |
|
out vec4 var_db; |
|
void main() |
|
{ |
|
|
|
float w0 = gl_in[0].gl_Position.w; |
|
float w1 = gl_in[1].gl_Position.w; |
|
float w2 = gl_in[2].gl_Position.w; |
|
vec2 p0 = gl_in[0].gl_Position.xy; |
|
vec2 p1 = gl_in[1].gl_Position.xy; |
|
vec2 p2 = gl_in[2].gl_Position.xy; |
|
vec2 e0 = p0*w2 - p2*w0; |
|
vec2 e1 = p1*w2 - p2*w1; |
|
float a = e0.x*e1.y - e0.y*e1.x; |
|
|
|
|
|
float eps = 1e-6f; |
|
float ca = (abs(a) >= eps) ? a : (a < 0.f) ? -eps : eps; |
|
float ia = 1.f / ca; |
|
|
|
vec2 ascl = ia * vp_scale; |
|
float dudx = e1.y * ascl.x; |
|
float dudy = -e1.x * ascl.y; |
|
float dvdx = -e0.y * ascl.x; |
|
float dvdy = e0.x * ascl.y; |
|
|
|
float duwdx = w2 * dudx; |
|
float dvwdx = w2 * dvdx; |
|
float duvdx = w0 * dudx + w1 * dvdx; |
|
float duwdy = w2 * dudy; |
|
float dvwdy = w2 * dvdy; |
|
float duvdy = w0 * dudy + w1 * dvdy; |
|
|
|
vec4 db0 = vec4(duvdx - dvwdx, duvdy - dvwdy, dvwdx, dvwdy); |
|
vec4 db1 = vec4(duwdx, duwdy, duvdx - duwdx, duvdy - duwdy); |
|
vec4 db2 = vec4(duwdx, duwdy, dvwdx, dvwdy); |
|
|
|
int layer_id = v_layer[0]; |
|
int prim_id = gl_PrimitiveIDIn + v_offset[0]; |
|
|
|
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[0].gl_Position.x, gl_in[0].gl_Position.y, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_uvzw = vec4(1.f, 0.f, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_db = db0; EmitVertex(); |
|
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[1].gl_Position.x, gl_in[1].gl_Position.y, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_uvzw = vec4(0.f, 1.f, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_db = db1; EmitVertex(); |
|
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[2].gl_Position.x, gl_in[2].gl_Position.y, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_uvzw = vec4(0.f, 0.f, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_db = db2; EmitVertex(); |
|
} |
|
) |
|
); |
|
|
|
|
|
compileGLShader(NVDR_CTX_PARAMS, s, &s.glFragmentShader, GL_FRAGMENT_SHADER, |
|
"#version 430\n" |
|
STRINGIFY_SHADER_SOURCE( |
|
in vec4 var_uvzw; |
|
in vec4 var_db; |
|
layout(location = 0) out vec4 out_raster; |
|
layout(location = 1) out vec4 out_db; |
|
IF_ZMODIFY( |
|
layout(location = 1) uniform float in_dummy; |
|
) |
|
void main() |
|
{ |
|
int id_int = gl_PrimitiveID + 1; |
|
float id_float = (id_int <= 0x01000000) ? float(id_int) : intBitsToFloat(0x4a800000 + id_int); |
|
|
|
out_raster = vec4(var_uvzw.x, var_uvzw.y, var_uvzw.z / var_uvzw.w, id_float); |
|
out_db = var_db * var_uvzw.w; |
|
IF_ZMODIFY(gl_FragDepth = gl_FragCoord.z + in_dummy;) |
|
} |
|
) |
|
); |
|
|
|
|
|
compileGLShader(NVDR_CTX_PARAMS, s, &s.glFragmentShaderDP, GL_FRAGMENT_SHADER, |
|
"#version 430\n" |
|
STRINGIFY_SHADER_SOURCE( |
|
in vec4 var_uvzw; |
|
in vec4 var_db; |
|
layout(binding = 0) uniform sampler2DArray out_prev; |
|
layout(location = 0) out vec4 out_raster; |
|
layout(location = 1) out vec4 out_db; |
|
IF_ZMODIFY( |
|
layout(location = 1) uniform float in_dummy; |
|
) |
|
void main() |
|
{ |
|
int id_int = gl_PrimitiveID + 1; |
|
float id_float = (id_int <= 0x01000000) ? float(id_int) : intBitsToFloat(0x4a800000 + id_int); |
|
|
|
vec4 prev = texelFetch(out_prev, ivec3(gl_FragCoord.x, gl_FragCoord.y, gl_Layer), 0); |
|
float depth_new = var_uvzw.z / var_uvzw.w; |
|
if (prev.w == 0 || depth_new <= prev.z) |
|
discard; |
|
out_raster = vec4(var_uvzw.x, var_uvzw.y, depth_new, id_float); |
|
out_db = var_db * var_uvzw.w; |
|
IF_ZMODIFY(gl_FragDepth = gl_FragCoord.z + in_dummy;) |
|
} |
|
) |
|
); |
|
} |
|
else |
|
{ |
|
|
|
compileGLShader(NVDR_CTX_PARAMS, s, &s.glGeometryShader, GL_GEOMETRY_SHADER, |
|
"#version 330\n" |
|
STRINGIFY_SHADER_SOURCE( |
|
layout(triangles) in; |
|
layout(triangle_strip, max_vertices=3) out; |
|
in int v_layer[]; |
|
in int v_offset[]; |
|
out vec4 var_uvzw; |
|
void main() |
|
{ |
|
int layer_id = v_layer[0]; |
|
int prim_id = gl_PrimitiveIDIn + v_offset[0]; |
|
|
|
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[0].gl_Position.x, gl_in[0].gl_Position.y, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_uvzw = vec4(1.f, 0.f, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); EmitVertex(); |
|
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[1].gl_Position.x, gl_in[1].gl_Position.y, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_uvzw = vec4(0.f, 1.f, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); EmitVertex(); |
|
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[2].gl_Position.x, gl_in[2].gl_Position.y, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_uvzw = vec4(0.f, 0.f, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); EmitVertex(); |
|
} |
|
) |
|
); |
|
|
|
|
|
compileGLShader(NVDR_CTX_PARAMS, s, &s.glFragmentShader, GL_FRAGMENT_SHADER, |
|
"#version 430\n" |
|
STRINGIFY_SHADER_SOURCE( |
|
in vec4 var_uvzw; |
|
layout(location = 0) out vec4 out_raster; |
|
IF_ZMODIFY( |
|
layout(location = 1) uniform float in_dummy; |
|
) |
|
void main() |
|
{ |
|
int id_int = gl_PrimitiveID + 1; |
|
float id_float = (id_int <= 0x01000000) ? float(id_int) : intBitsToFloat(0x4a800000 + id_int); |
|
|
|
out_raster = vec4(var_uvzw.x, var_uvzw.y, var_uvzw.z / var_uvzw.w, id_float); |
|
IF_ZMODIFY(gl_FragDepth = gl_FragCoord.z + in_dummy;) |
|
} |
|
) |
|
); |
|
|
|
|
|
compileGLShader(NVDR_CTX_PARAMS, s, &s.glFragmentShaderDP, GL_FRAGMENT_SHADER, |
|
"#version 430\n" |
|
STRINGIFY_SHADER_SOURCE( |
|
in vec4 var_uvzw; |
|
layout(binding = 0) uniform sampler2DArray out_prev; |
|
layout(location = 0) out vec4 out_raster; |
|
IF_ZMODIFY( |
|
layout(location = 1) uniform float in_dummy; |
|
) |
|
void main() |
|
{ |
|
int id_int = gl_PrimitiveID + 1; |
|
float id_float = (id_int <= 0x01000000) ? float(id_int) : intBitsToFloat(0x4a800000 + id_int); |
|
|
|
vec4 prev = texelFetch(out_prev, ivec3(gl_FragCoord.x, gl_FragCoord.y, gl_Layer), 0); |
|
float depth_new = var_uvzw.z / var_uvzw.w; |
|
if (prev.w == 0 || depth_new <= prev.z) |
|
discard; |
|
out_raster = vec4(var_uvzw.x, var_uvzw.y, var_uvzw.z / var_uvzw.w, id_float); |
|
IF_ZMODIFY(gl_FragDepth = gl_FragCoord.z + in_dummy;) |
|
} |
|
) |
|
); |
|
} |
|
|
|
|
|
constructGLProgram(NVDR_CTX_PARAMS, &s.glProgram, s.glVertexShader, s.glGeometryShader, s.glFragmentShader); |
|
constructGLProgram(NVDR_CTX_PARAMS, &s.glProgramDP, s.glVertexShader, s.glGeometryShader, s.glFragmentShaderDP); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glGenFramebuffers(1, &s.glFBO)); |
|
NVDR_CHECK_GL_ERROR(glBindFramebuffer(GL_FRAMEBUFFER, s.glFBO)); |
|
|
|
|
|
GLenum draw_buffers[2] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 }; |
|
NVDR_CHECK_GL_ERROR(glDrawBuffers(num_outputs, draw_buffers)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glGenVertexArrays(1, &s.glVAO)); |
|
NVDR_CHECK_GL_ERROR(glBindVertexArray(s.glVAO)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glGenBuffers(1, &s.glPosBuffer)); |
|
NVDR_CHECK_GL_ERROR(glBindBuffer(GL_ARRAY_BUFFER, s.glPosBuffer)); |
|
NVDR_CHECK_GL_ERROR(glEnableVertexAttribArray(0)); |
|
NVDR_CHECK_GL_ERROR(glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, 0)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glGenBuffers(1, &s.glTriBuffer)); |
|
NVDR_CHECK_GL_ERROR(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s.glTriBuffer)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glEnable(GL_DEPTH_TEST)); |
|
NVDR_CHECK_GL_ERROR(glDepthFunc(GL_LESS)); |
|
NVDR_CHECK_GL_ERROR(glClearDepth(1.0)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glGenTextures(num_outputs, s.glColorBuffer)); |
|
for (int i=0; i < num_outputs; i++) |
|
{ |
|
NVDR_CHECK_GL_ERROR(glBindTexture(GL_TEXTURE_2D_ARRAY, s.glColorBuffer[i])); |
|
NVDR_CHECK_GL_ERROR(glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + i, s.glColorBuffer[i], 0)); |
|
} |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glGenTextures(1, &s.glDepthStencilBuffer)); |
|
NVDR_CHECK_GL_ERROR(glBindTexture(GL_TEXTURE_2D_ARRAY, s.glDepthStencilBuffer)); |
|
NVDR_CHECK_GL_ERROR(glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, s.glDepthStencilBuffer, 0)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glGenTextures(1, &s.glPrevOutBuffer)); |
|
} |
|
|
|
void rasterizeResizeBuffers(NVDR_CTX_ARGS, RasterizeGLState& s, bool& changes, int posCount, int triCount, int width, int height, int depth) |
|
{ |
|
changes = false; |
|
|
|
|
|
if (posCount > s.posCount) |
|
{ |
|
if (s.cudaPosBuffer) |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaPosBuffer)); |
|
s.posCount = (posCount > 64) ? ROUND_UP_BITS(posCount, 2) : 64; |
|
LOG(INFO) << "Increasing position buffer size to " << s.posCount << " float32"; |
|
NVDR_CHECK_GL_ERROR(glBufferData(GL_ARRAY_BUFFER, s.posCount * sizeof(float), NULL, GL_DYNAMIC_DRAW)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsGLRegisterBuffer(&s.cudaPosBuffer, s.glPosBuffer, cudaGraphicsRegisterFlagsWriteDiscard)); |
|
changes = true; |
|
} |
|
|
|
|
|
if (triCount > s.triCount) |
|
{ |
|
if (s.cudaTriBuffer) |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaTriBuffer)); |
|
s.triCount = (triCount > 64) ? ROUND_UP_BITS(triCount, 2) : 64; |
|
LOG(INFO) << "Increasing triangle buffer size to " << s.triCount << " int32"; |
|
NVDR_CHECK_GL_ERROR(glBufferData(GL_ELEMENT_ARRAY_BUFFER, s.triCount * sizeof(int32_t), NULL, GL_DYNAMIC_DRAW)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsGLRegisterBuffer(&s.cudaTriBuffer, s.glTriBuffer, cudaGraphicsRegisterFlagsWriteDiscard)); |
|
changes = true; |
|
} |
|
|
|
|
|
if (width > s.width || height > s.height || depth > s.depth) |
|
{ |
|
int num_outputs = s.enableDB ? 2 : 1; |
|
if (s.cudaColorBuffer[0]) |
|
for (int i=0; i < num_outputs; i++) |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaColorBuffer[i])); |
|
|
|
if (s.cudaPrevOutBuffer) |
|
{ |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaPrevOutBuffer)); |
|
s.cudaPrevOutBuffer = 0; |
|
} |
|
|
|
|
|
s.width = (width > s.width) ? width : s.width; |
|
s.height = (height > s.height) ? height : s.height; |
|
s.depth = (depth > s.depth) ? depth : s.depth; |
|
s.width = ROUND_UP(s.width, 32); |
|
s.height = ROUND_UP(s.height, 32); |
|
LOG(INFO) << "Increasing frame buffer size to (width, height, depth) = (" << s.width << ", " << s.height << ", " << s.depth << ")"; |
|
|
|
|
|
for (int i=0; i < num_outputs; i++) |
|
{ |
|
NVDR_CHECK_GL_ERROR(glBindTexture(GL_TEXTURE_2D_ARRAY, s.glColorBuffer[i])); |
|
NVDR_CHECK_GL_ERROR(glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, s.width, s.height, s.depth, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); |
|
} |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glBindTexture(GL_TEXTURE_2D_ARRAY, s.glDepthStencilBuffer)); |
|
NVDR_CHECK_GL_ERROR(glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_DEPTH24_STENCIL8, s.width, s.height, s.depth, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0)); |
|
|
|
|
|
for (int i=0; i < num_outputs; i++) |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsGLRegisterImage(&s.cudaColorBuffer[i], s.glColorBuffer[i], GL_TEXTURE_3D, cudaGraphicsRegisterFlagsReadOnly)); |
|
|
|
changes = true; |
|
} |
|
} |
|
|
|
void rasterizeRender(NVDR_CTX_ARGS, RasterizeGLState& s, cudaStream_t stream, const float* posPtr, int posCount, int vtxPerInstance, const int32_t* triPtr, int triCount, const int32_t* rangesPtr, int width, int height, int depth, int peeling_idx) |
|
{ |
|
|
|
if (peeling_idx < 1) |
|
{ |
|
if (triPtr) |
|
{ |
|
|
|
void* glPosPtr = NULL; |
|
void* glTriPtr = NULL; |
|
size_t posBytes = 0; |
|
size_t triBytes = 0; |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsMapResources(2, &s.cudaPosBuffer, stream)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsResourceGetMappedPointer(&glPosPtr, &posBytes, s.cudaPosBuffer)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsResourceGetMappedPointer(&glTriPtr, &triBytes, s.cudaTriBuffer)); |
|
NVDR_CHECK(posBytes >= posCount * sizeof(float), "mapped GL position buffer size mismatch"); |
|
NVDR_CHECK(triBytes >= triCount * sizeof(int32_t), "mapped GL triangle buffer size mismatch"); |
|
NVDR_CHECK_CUDA_ERROR(cudaMemcpyAsync(glPosPtr, posPtr, posCount * sizeof(float), cudaMemcpyDeviceToDevice, stream)); |
|
NVDR_CHECK_CUDA_ERROR(cudaMemcpyAsync(glTriPtr, triPtr, triCount * sizeof(int32_t), cudaMemcpyDeviceToDevice, stream)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnmapResources(2, &s.cudaPosBuffer, stream)); |
|
} |
|
else |
|
{ |
|
|
|
void* glPosPtr = NULL; |
|
size_t posBytes = 0; |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsMapResources(1, &s.cudaPosBuffer, stream)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsResourceGetMappedPointer(&glPosPtr, &posBytes, s.cudaPosBuffer)); |
|
NVDR_CHECK(posBytes >= posCount * sizeof(float), "mapped GL position buffer size mismatch"); |
|
NVDR_CHECK_CUDA_ERROR(cudaMemcpyAsync(glPosPtr, posPtr, posCount * sizeof(float), cudaMemcpyDeviceToDevice, stream)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnmapResources(1, &s.cudaPosBuffer, stream)); |
|
} |
|
} |
|
|
|
|
|
if (peeling_idx < 1) |
|
{ |
|
|
|
NVDR_CHECK_GL_ERROR(glUseProgram(s.glProgram)); |
|
} |
|
else |
|
{ |
|
|
|
if (!s.cudaPrevOutBuffer) |
|
{ |
|
NVDR_CHECK_GL_ERROR(glBindTexture(GL_TEXTURE_2D_ARRAY, s.glPrevOutBuffer)); |
|
NVDR_CHECK_GL_ERROR(glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, s.width, s.height, s.depth, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)); |
|
NVDR_CHECK_GL_ERROR(glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)); |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsGLRegisterImage(&s.cudaPrevOutBuffer, s.glPrevOutBuffer, GL_TEXTURE_3D, cudaGraphicsRegisterFlagsReadOnly)); |
|
} |
|
|
|
|
|
GLuint glTempBuffer = s.glPrevOutBuffer; |
|
s.glPrevOutBuffer = s.glColorBuffer[0]; |
|
s.glColorBuffer[0] = glTempBuffer; |
|
|
|
|
|
cudaGraphicsResource_t cudaTempBuffer = s.cudaPrevOutBuffer; |
|
s.cudaPrevOutBuffer = s.cudaColorBuffer[0]; |
|
s.cudaColorBuffer[0] = cudaTempBuffer; |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glBindTexture(GL_TEXTURE_2D_ARRAY, s.glColorBuffer[0])); |
|
NVDR_CHECK_GL_ERROR(glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, s.glColorBuffer[0], 0)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glBindTexture(GL_TEXTURE_2D_ARRAY, s.glPrevOutBuffer)); |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glUseProgram(s.glProgramDP)); |
|
} |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glViewport(0, 0, width, height)); |
|
NVDR_CHECK_GL_ERROR(glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT)); |
|
|
|
|
|
if (s.enableDB) |
|
NVDR_CHECK_GL_ERROR(glUniform2f(0, 2.f / (float)width, 2.f / (float)height)); |
|
|
|
|
|
if (s.enableZModify) |
|
NVDR_CHECK_GL_ERROR(glUniform1f(1, 0.f)); |
|
|
|
|
|
if (depth == 1 && !rangesPtr) |
|
{ |
|
|
|
NVDR_CHECK_GL_ERROR(glDrawElements(GL_TRIANGLES, triCount, GL_UNSIGNED_INT, 0)); |
|
} |
|
else |
|
{ |
|
|
|
std::vector<GLDrawCmd> drawCmdBuffer(depth); |
|
|
|
if (!rangesPtr) |
|
{ |
|
|
|
|
|
|
|
for (int i=0; i < depth; i++) |
|
{ |
|
GLDrawCmd& cmd = drawCmdBuffer[i]; |
|
cmd.firstIndex = 0; |
|
cmd.count = triCount; |
|
cmd.baseVertex = vtxPerInstance * i; |
|
cmd.baseInstance = 0; |
|
cmd.instanceCount = 1; |
|
} |
|
} |
|
else |
|
{ |
|
|
|
|
|
|
|
for (int i=0, j=0; i < depth; i++) |
|
{ |
|
GLDrawCmd& cmd = drawCmdBuffer[i]; |
|
int first = rangesPtr[j++]; |
|
int count = rangesPtr[j++]; |
|
NVDR_CHECK(first >= 0 && count >= 0, "range contains negative values"); |
|
NVDR_CHECK((first + count) * 3 <= triCount, "range extends beyond end of triangle buffer"); |
|
cmd.firstIndex = first * 3; |
|
cmd.count = count * 3; |
|
cmd.baseVertex = 0; |
|
cmd.baseInstance = first; |
|
cmd.instanceCount = 1; |
|
} |
|
} |
|
|
|
|
|
NVDR_CHECK_GL_ERROR(glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, &drawCmdBuffer[0], depth, sizeof(GLDrawCmd))); |
|
} |
|
} |
|
|
|
void rasterizeCopyResults(NVDR_CTX_ARGS, RasterizeGLState& s, cudaStream_t stream, float** outputPtr, int width, int height, int depth) |
|
{ |
|
|
|
cudaArray_t array = 0; |
|
cudaChannelFormatDesc arrayDesc = {}; |
|
cudaExtent arrayExt = {}; |
|
int num_outputs = s.enableDB ? 2 : 1; |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsMapResources(num_outputs, s.cudaColorBuffer, stream)); |
|
for (int i=0; i < num_outputs; i++) |
|
{ |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsSubResourceGetMappedArray(&array, s.cudaColorBuffer[i], 0, 0)); |
|
NVDR_CHECK_CUDA_ERROR(cudaArrayGetInfo(&arrayDesc, &arrayExt, NULL, array)); |
|
NVDR_CHECK(arrayDesc.f == cudaChannelFormatKindFloat, "CUDA mapped array data kind mismatch"); |
|
NVDR_CHECK(arrayDesc.x == 32 && arrayDesc.y == 32 && arrayDesc.z == 32 && arrayDesc.w == 32, "CUDA mapped array data width mismatch"); |
|
NVDR_CHECK(arrayExt.width >= width && arrayExt.height >= height && arrayExt.depth >= depth, "CUDA mapped array extent mismatch"); |
|
cudaMemcpy3DParms p = {0}; |
|
p.srcArray = array; |
|
p.dstPtr.ptr = outputPtr[i]; |
|
p.dstPtr.pitch = width * 4 * sizeof(float); |
|
p.dstPtr.xsize = width; |
|
p.dstPtr.ysize = height; |
|
p.extent.width = width; |
|
p.extent.height = height; |
|
p.extent.depth = depth; |
|
p.kind = cudaMemcpyDeviceToDevice; |
|
NVDR_CHECK_CUDA_ERROR(cudaMemcpy3DAsync(&p, stream)); |
|
} |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnmapResources(num_outputs, s.cudaColorBuffer, stream)); |
|
} |
|
|
|
void rasterizeReleaseBuffers(NVDR_CTX_ARGS, RasterizeGLState& s) |
|
{ |
|
int num_outputs = s.enableDB ? 2 : 1; |
|
|
|
if (s.cudaPosBuffer) |
|
{ |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaPosBuffer)); |
|
s.cudaPosBuffer = 0; |
|
} |
|
|
|
if (s.cudaTriBuffer) |
|
{ |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaTriBuffer)); |
|
s.cudaTriBuffer = 0; |
|
} |
|
|
|
for (int i=0; i < num_outputs; i++) |
|
{ |
|
if (s.cudaColorBuffer[i]) |
|
{ |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaColorBuffer[i])); |
|
s.cudaColorBuffer[i] = 0; |
|
} |
|
} |
|
|
|
if (s.cudaPrevOutBuffer) |
|
{ |
|
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnregisterResource(s.cudaPrevOutBuffer)); |
|
s.cudaPrevOutBuffer = 0; |
|
} |
|
} |
|
|
|
|
|
|