Spaces:

paulpanwang
/

Diffsplat

Running on Zero

App Files Files Community

Diffsplat / extensions /RaDe-GS /utils /graphics_utils.py

paulpanwang

Upload folder using huggingface_hub

476e0f0 verified 10 months ago

raw

history blame contribute delete

7.7 kB

	#
	# Copyright (C) 2023, Inria
	# GRAPHDECO research group, https://team.inria.fr/graphdeco
	# All rights reserved.
	#
	# This software is free for non-commercial, research and evaluation use
	# under the terms of the LICENSE.md file.
	#
	# For inquiries contact george.drettakis@inria.fr
	#

	import torch
	import math
	import numpy as np
	from typing import NamedTuple
	import cv2
	import os

	class BasicPointCloud(NamedTuple):
	points : np.array
	colors : np.array
	normals : np.array

	def geom_transform_points(points, transf_matrix):
	P, _ = points.shape
	ones = torch.ones(P, 1, dtype=points.dtype, device=points.device)
	points_hom = torch.cat([points, ones], dim=1)
	points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0))

	denom = points_out[..., 3:] + 0.0000001
	return (points_out[..., :3] / denom).squeeze(dim=0)

	def getWorld2View(R, t):
	Rt = np.zeros((4, 4))
	Rt[:3, :3] = R.transpose()
	Rt[:3, 3] = t
	Rt[3, 3] = 1.0
	return np.float32(Rt)

	def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
	""" get world 2 camera matrix

	Args:
	R (_type_): c2w rotation
	t (_type_): w2c camera center
	translate (_type_, optional): _description_. Defaults to np.array([.0, .0, .0]).
	scale (float, optional): _description_. Defaults to 1.0.

	Returns:
	_type_: _description_
	"""
	# compose w2c matrix
	Rt = np.zeros((4, 4))
	Rt[:3, :3] = R.transpose()
	Rt[:3, 3] = t
	Rt[3, 3] = 1.0

	# invert to get c2w
	C2W = np.linalg.inv(Rt)
	cam_center = C2W[:3, 3]
	cam_center = (cam_center + translate) * scale
	C2W[:3, 3] = cam_center
	# get the final w2c matrix
	Rt = np.linalg.inv(C2W)
	return np.float32(Rt)

	def getProjectionMatrix(znear, zfar, fovX, fovY):
	tanHalfFovY = math.tan((fovY / 2))
	tanHalfFovX = math.tan((fovX / 2))

	top = tanHalfFovY * znear
	bottom = -top
	right = tanHalfFovX * znear
	left = -right

	P = torch.zeros(4, 4)

	z_sign = 1.0

	P[0, 0] = 2.0 * znear / (right - left)
	P[1, 1] = 2.0 * znear / (top - bottom)
	P[0, 2] = (right + left) / (right - left)
	P[1, 2] = (top + bottom) / (top - bottom)
	P[3, 2] = z_sign
	P[2, 2] = z_sign * zfar / (zfar - znear)
	P[2, 3] = -(zfar * znear) / (zfar - znear)
	return P

	def fov2focal(fov, pixels):
	return pixels / (2 * math.tan(fov / 2))

	def focal2fov(focal, pixels):
	return 2math.atan(pixels/(2focal))


	# the following functions depths_double_to_points and depth_double_to_normal are adopted from https://github.com/hugoycj/2dgs-gaustudio/blob/main/utils/graphics_utils.py
	def depths_double_to_points(view, depthmap1, depthmap2):
	W, H = view.image_width, view.image_height
	fx = W / (2 * math.tan(view.FoVx / 2.))
	fy = H / (2 * math.tan(view.FoVy / 2.))
	intrins_inv = torch.tensor(
	[[1/fx, 0.,-W/(2 * fx)],
	[0., 1/fy, -H/(2 * fy),],
	[0., 0., 1.0]]
	).float().cuda()
	grid_x, grid_y = torch.meshgrid(torch.arange(W)+0.5, torch.arange(H)+0.5, indexing='xy')
	points = torch.stack([grid_x, grid_y, torch.ones_like(grid_x)], dim=0).reshape(3, -1).float().cuda()
	rays_d = intrins_inv @ points
	points1 = depthmap1.reshape(1,-1) * rays_d
	points2 = depthmap2.reshape(1,-1) * rays_d
	return points1.reshape(3,H,W), points2.reshape(3,H,W)



	def point_double_to_normal(view, points1, points2):
	points = torch.stack([points1, points2],dim=0)
	output = torch.zeros_like(points)
	dx = points[...,2:, 1:-1] - points[...,:-2, 1:-1]
	dy = points[...,1:-1, 2:] - points[...,1:-1, :-2]
	normal_map = torch.nn.functional.normalize(torch.cross(dx, dy, dim=1), dim=1)
	output[...,1:-1, 1:-1] = normal_map
	return output

	def depth_double_to_normal(view, depth1, depth2):
	points1, points2 = depths_double_to_points(view, depth1, depth2)
	return point_double_to_normal(view, points1, points2)

	def bilinear_sampler(img, coords, mask=False):
	""" Wrapper for grid_sample, uses pixel coordinates """
	H, W = img.shape[-2:]
	xgrid, ygrid = coords.split([1,1], dim=-1)
	xgrid = 2*xgrid/(W-1) - 1
	ygrid = 2*ygrid/(H-1) - 1

	grid = torch.cat([xgrid, ygrid], dim=-1)
	img = torch.nn.functional.grid_sample(img, grid, align_corners=True)

	if mask:
	mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
	return img, mask.float()

	return img


	# project the reference point cloud into the source view, then project back
	#extrinsics here refers c2w
	def reproject_with_depth(depth_ref, intrinsics_ref, extrinsics_ref, depth_src, intrinsics_src, extrinsics_src):
	width, height = depth_ref.shape[1], depth_ref.shape[0]
	## step1. project reference pixels to the source view
	# reference view x, y
	x_ref, y_ref = np.meshgrid(np.arange(0, width), np.arange(0, height))
	x_ref, y_ref = x_ref.reshape([-1]), y_ref.reshape([-1])
	# reference 3D space
	xyz_ref = np.matmul(np.linalg.inv(intrinsics_ref),
	np.vstack((x_ref, y_ref, np.ones_like(x_ref))) * depth_ref.reshape([-1]))
	# source 3D space
	xyz_src = np.matmul(np.matmul(extrinsics_src, np.linalg.inv(extrinsics_ref)),
	np.vstack((xyz_ref, np.ones_like(x_ref))))[:3]
	# source view x, y
	K_xyz_src = np.matmul(intrinsics_src, xyz_src)
	xy_src = K_xyz_src[:2] / K_xyz_src[2:3]

	## step2. reproject the source view points with source view depth estimation
	# find the depth estimation of the source view
	x_src = xy_src[0].reshape([height, width]).astype(np.float32)
	y_src = xy_src[1].reshape([height, width]).astype(np.float32)
	sampled_depth_src = cv2.remap(depth_src, x_src, y_src, interpolation=cv2.INTER_LINEAR)
	# mask = sampled_depth_src > 0

	# source 3D space
	# NOTE that we should use sampled source-view depth_here to project back
	xyz_src = np.matmul(np.linalg.inv(intrinsics_src),
	np.vstack((xy_src, np.ones_like(x_ref))) * sampled_depth_src.reshape([-1]))
	# reference 3D space
	xyz_reprojected = np.matmul(np.matmul(extrinsics_ref, np.linalg.inv(extrinsics_src)),
	np.vstack((xyz_src, np.ones_like(x_ref))))[:3]
	# source view x, y, depth
	depth_reprojected = xyz_reprojected[2].reshape([height, width]).astype(np.float32)
	K_xyz_reprojected = np.matmul(intrinsics_ref, xyz_reprojected)
	xy_reprojected = K_xyz_reprojected[:2] / K_xyz_reprojected[2:3]
	x_reprojected = xy_reprojected[0].reshape([height, width]).astype(np.float32)
	y_reprojected = xy_reprojected[1].reshape([height, width]).astype(np.float32)

	return depth_reprojected, x_reprojected, y_reprojected, x_src, y_src



	def check_geometric_consistency(depth_ref, intrinsics_ref, extrinsics_ref, depth_src, intrinsics_src, extrinsics_src, thre1=0.5, thre2=0.01):
	width, height = depth_ref.shape[1], depth_ref.shape[0]
	x_ref, y_ref = np.meshgrid(np.arange(0, width), np.arange(0, height))
	depth_reprojected, x2d_reprojected, y2d_reprojected, x2d_src, y2d_src = reproject_with_depth(depth_ref, intrinsics_ref, extrinsics_ref,
	depth_src, intrinsics_src, extrinsics_src)
	# check \|p_reproj-p_1\| < 1
	dist = np.sqrt((x2d_reprojected - x_ref) 2 + (y2d_reprojected - y_ref) 2)

	# check \|d_reproj-d_1\| / d_1 < 0.01
	depth_diff = np.abs(depth_reprojected - depth_ref)
	relative_depth_diff = depth_diff / depth_ref

	mask = np.logical_and(dist < thre1, relative_depth_diff < thre2)
	# mask = dist < 0.2
	depth_reprojected[~mask] = 0

	return mask, depth_reprojected, x2d_src, y2d_src, relative_depth_diff