diff --git a/cuda_rasterizer/forward.cu b/cuda_rasterizer/forward.cu index c419a328..7ec44380 100644 --- a/cuda_rasterizer/forward.cu +++ b/cuda_rasterizer/forward.cu @@ -266,11 +266,13 @@ renderCUDA( int W, int H, const float2* __restrict__ points_xy_image, const float* __restrict__ features, + const float* __restrict__ depths, const float4* __restrict__ conic_opacity, float* __restrict__ final_T, uint32_t* __restrict__ n_contrib, const float* __restrict__ bg_color, - float* __restrict__ out_color) + float* __restrict__ out_color, + float* __restrict__ out_depth) { // Identify current tile and associated min/max pixel range. auto block = cg::this_thread_block(); @@ -301,6 +303,7 @@ renderCUDA( uint32_t contributor = 0; uint32_t last_contributor = 0; float C[CHANNELS] = { 0 }; + float D = { 0 }; // Iterate over batches until all done or range is complete for (int i = 0; i < rounds; i++, toDo -= BLOCK_SIZE) @@ -353,6 +356,7 @@ renderCUDA( // Eq. (3) from 3D Gaussian splatting paper. for (int ch = 0; ch < CHANNELS; ch++) C[ch] += features[collected_id[j] * CHANNELS + ch] * alpha * T; + D += depths[collected_id[j]] * alpha * T; T = test_T; @@ -370,6 +374,7 @@ renderCUDA( n_contrib[pix_id] = last_contributor; for (int ch = 0; ch < CHANNELS; ch++) out_color[ch * H * W + pix_id] = C[ch] + T * bg_color[ch]; + out_depth[pix_id] = D; } } @@ -380,11 +385,13 @@ void FORWARD::render( int W, int H, const float2* means2D, const float* colors, + const float* depths, const float4* conic_opacity, float* final_T, uint32_t* n_contrib, const float* bg_color, - float* out_color) + float* out_color, + float* out_depth) { renderCUDA << > > ( ranges, @@ -392,11 +399,13 @@ void FORWARD::render( W, H, means2D, colors, + depths, conic_opacity, final_T, n_contrib, bg_color, - out_color); + out_color, + out_depth); } void FORWARD::preprocess(int P, int D, int M, @@ -452,4 +461,4 @@ void FORWARD::preprocess(int P, int D, int M, tiles_touched, prefiltered ); -} \ No newline at end of file +} diff --git a/cuda_rasterizer/forward.h b/cuda_rasterizer/forward.h index 3c11cb91..5722e1ed 100644 --- a/cuda_rasterizer/forward.h +++ b/cuda_rasterizer/forward.h @@ -55,11 +55,13 @@ namespace FORWARD int W, int H, const float2* points_xy_image, const float* features, + const float* depths, const float4* conic_opacity, float* final_T, uint32_t* n_contrib, const float* bg_color, - float* out_color); + float* out_color, + float* out_depth); } diff --git a/cuda_rasterizer/rasterizer.h b/cuda_rasterizer/rasterizer.h index 2cde606d..6ede4eb8 100644 --- a/cuda_rasterizer/rasterizer.h +++ b/cuda_rasterizer/rasterizer.h @@ -49,6 +49,7 @@ namespace CudaRasterizer const float tan_fovx, float tan_fovy, const bool prefiltered, float* out_color, + float* out_depth, int* radii = nullptr); static void backward( diff --git a/cuda_rasterizer/rasterizer_impl.cu b/cuda_rasterizer/rasterizer_impl.cu index d7b9d6ab..8cece89e 100644 --- a/cuda_rasterizer/rasterizer_impl.cu +++ b/cuda_rasterizer/rasterizer_impl.cu @@ -216,6 +216,7 @@ int CudaRasterizer::Rasterizer::forward( const float tan_fovx, float tan_fovy, const bool prefiltered, float* out_color, + float* out_depth, int* radii) { const float focal_y = height / (2.0f * tan_fovy); @@ -326,11 +327,13 @@ int CudaRasterizer::Rasterizer::forward( width, height, geomState.means2D, feature_ptr, + geomState.depths, geomState.conic_opacity, imgState.accum_alpha, imgState.n_contrib, background, - out_color); + out_color, + out_depth); return num_rendered; } @@ -430,4 +433,4 @@ void CudaRasterizer::Rasterizer::backward( dL_dsh, (glm::vec3*)dL_dscale, (glm::vec4*)dL_drot); -} \ No newline at end of file +} diff --git a/diff_gaussian_rasterization/__init__.py b/diff_gaussian_rasterization/__init__.py index 4b072f7a..8b37f093 100644 --- a/diff_gaussian_rasterization/__init__.py +++ b/diff_gaussian_rasterization/__init__.py @@ -75,16 +75,16 @@ def forward( ) # Invoke C++/CUDA rasterizer - num_rendered, color, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args) + num_rendered, color, depth, radii, geomBuffer, binningBuffer, imgBuffer = _C.rasterize_gaussians(*args) # Keep relevant tensors for backward ctx.raster_settings = raster_settings ctx.num_rendered = num_rendered ctx.save_for_backward(colors_precomp, means3D, scales, rotations, cov3Ds_precomp, radii, sh, geomBuffer, binningBuffer, imgBuffer) - return color, radii + return color, radii, depth @staticmethod - def backward(ctx, grad_out_color, _): + def backward(ctx, grad_out_color, grad_radii, grad_depth): # Restore necessary values from context num_rendered = ctx.num_rendered diff --git a/rasterize_points.cu b/rasterize_points.cu index 90e10be2..ad70804f 100644 --- a/rasterize_points.cu +++ b/rasterize_points.cu @@ -32,7 +32,7 @@ std::function resizeFunctional(torch::Tensor& t) { return lambda; } -std::tuple +std::tuple RasterizeGaussiansCUDA( const torch::Tensor& background, const torch::Tensor& means3D, @@ -65,6 +65,7 @@ RasterizeGaussiansCUDA( auto float_opts = means3D.options().dtype(torch::kFloat32); torch::Tensor out_color = torch::full({NUM_CHANNELS, H, W}, 0.0, float_opts); + torch::Tensor out_depth = torch::full({1, H, W}, 0.0, float_opts); torch::Tensor radii = torch::full({P}, 0, means3D.options().dtype(torch::kInt32)); torch::Device device(torch::kCUDA); @@ -107,9 +108,10 @@ RasterizeGaussiansCUDA( tan_fovy, prefiltered, out_color.contiguous().data(), + out_depth.contiguous().data(), radii.contiguous().data()); } - return std::make_tuple(rendered, out_color, radii, geomBuffer, binningBuffer, imgBuffer); + return std::make_tuple(rendered, out_color, out_depth, radii, geomBuffer, binningBuffer, imgBuffer); } std::tuple diff --git a/rasterize_points.h b/rasterize_points.h index 9be145d6..8f36814d 100644 --- a/rasterize_points.h +++ b/rasterize_points.h @@ -15,7 +15,7 @@ #include #include -std::tuple +std::tuple RasterizeGaussiansCUDA( const torch::Tensor& background, const torch::Tensor& means3D,