Spaces:

blair-johnson
/

image-to-relief

Sleeping

image-to-relief / app.py

Blair Johnson

default to open body

f78c2b3 4 months ago

7.54 kB

	import gradio as gr
	import torch
	import numpy as np
	import cv2
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForDepthEstimation
	import tempfile
	from stl import mesh
	from sklearn.decomposition import PCA
	import pillow_heif
	pillow_heif.register_heif_opener()

	MAX_RESOLUTION = 1.5e6
	MODEL_ID = "depth-anything/Depth-Anything-V2-Large-hf"
	device = "cuda" if torch.cuda.is_available() else "cpu"
	processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=True)
	model = AutoModelForDepthEstimation.from_pretrained(MODEL_ID).to(device)
	print("Model loaded successfully.")

	def create_3d_model(
	input_filepath: str,
	texture_strength: float,
	max_z: float,
	min_z: float,
	x_length: float,
	do_pca_correction: bool,
	depth_map_smoothing: int,
	texture_smoothing: int,
	close_body: int
	):
	if input_filepath is None: raise gr.Error("Please upload an image.")
	if max_z <= min_z: raise gr.Error("Max Z-height must be greater than Min Z-height.")

	try:
	image_pil = Image.open(input_filepath)
	input_image = np.array(image_pil.convert("RGB"))
	except Exception as e:
	raise gr.Error(f"Could not open image file. Error: {e}")

	# resize large images
	h, w, _ = input_image.shape
	if h * w > MAX_RESOLUTION:
	gr.Info("Image is large, downsampling to improve performance...")
	ratio = (MAX_RESOLUTION / (h * w)) ** 0.5
	new_w, new_h = int(w * ratio), int(h * ratio)
	input_image = cv2.resize(input_image, (new_w, new_h), interpolation=cv2.INTER_AREA)

	image = Image.fromarray(input_image).convert("RGB")
	with torch.no_grad():
	inputs = processor(images=image, return_tensors="pt").to(device)
	outputs = model(**inputs)
	predicted_depth = outputs.predicted_depth
	depth = torch.nn.functional.interpolate(
	predicted_depth.unsqueeze(1), size=(image.height, image.width),
	mode="bicubic", align_corners=False,
	).squeeze().cpu().numpy()
	depth_normalized = (depth - depth.min()) / (depth.max() - depth.min())

	# smoothing base depthmap
	if depth_map_smoothing > 1:
	ksize = int(depth_map_smoothing)
	if ksize % 2 == 0: ksize += 1
	depth_normalized = cv2.GaussianBlur(depth_normalized, (ksize, ksize), 0)

	gray_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2GRAY)
	brightness_normalized = gray_image.astype(float) / 255.0

	# smoothing brightness
	if texture_smoothing > 1:
	ksize = int(texture_smoothing)
	if ksize % 2 == 0: ksize += 1
	brightness_normalized = cv2.GaussianBlur(brightness_normalized, (ksize, ksize), 0)

	if brightness_normalized.shape != depth_normalized.shape:
	brightness_normalized = cv2.resize(
	brightness_normalized, (depth_normalized.shape[1], depth_normalized.shape[0]),
	interpolation=cv2.INTER_LINEAR
	)

	combined_map = depth_normalized + (brightness_normalized * texture_strength)
	c_min, c_max = combined_map.min(), combined_map.max()
	if c_max > c_min:
	combined_map_rescaled = (combined_map - c_min) / (c_max - c_min)
	else:
	combined_map_rescaled = np.zeros_like(combined_map)

	z_data = min_z + combined_map_rescaled * (max_z - min_z)

	# Planar correction with PCA
	if do_pca_correction:
	height, width = z_data.shape
	y_length = x_length * (height / width)
	x_coords_1d, y_coords_1d = np.linspace(0, x_length, width), np.linspace(y_length, 0, height)
	x_grid, y_grid = np.meshgrid(x_coords_1d, y_coords_1d)
	points = np.stack([x_grid.flatten(), y_grid.flatten(), z_data.flatten()], axis=1)
	n_points, n_samples = points.shape[0], min(points.shape[0], 50000)
	sample_indices = np.random.choice(n_points, n_samples, replace=False)
	pca = PCA(n_components=3)
	pca.fit(points[sample_indices])
	normal = pca.components_[2]
	if normal[2] < 0:
	normal *= -1
	p0 = pca.mean_
	z_plane = p0[2] - (normal[0] * (x_grid - p0[0]) + normal[1] * (y_grid - p0[1])) / normal[2]
	corrected_z = z_data - z_plane
	cz_min, cz_max = corrected_z.min(), corrected_z.max()
	if cz_max > cz_min:
	z_data = min_z + (corrected_z - cz_min) / (cz_max - cz_min) * (max_z - min_z)

	# STL mesh
	height, width = z_data.shape
	y_length = x_length * (height / width)
	vertices = np.zeros((height, width, 3))
	x_coords, y_coords = np.linspace(0, x_length, width), np.linspace(y_length, 0, height)
	vertices[:, :, 0], vertices[:, :, 1], vertices[:, :, 2] = x_coords[np.newaxis, :], y_coords[:, np.newaxis], z_data
	faces = []
	for i in range(height-1):
	for j in range(width-1):
	v1,v2,v3,v4 = vertices[i,j], vertices[i+1,j], vertices[i+1,j+1], vertices[i,j+1]
	faces.extend([[v1, v2, v3], [v1, v3, v4]])

	if close_body:
	v_tl, v_tr, v_bl, v_br = vertices[0,0], vertices[0,width-1], vertices[height-1,0], vertices[height-1,width-1]
	b_tl,b_tr,b_bl,b_br = np.array([v_tl[0],v_tl[1],0]), np.array([v_tr[0],v_tr[1],0]), np.array([v_bl[0],v_bl[1],0]), np.array([v_br[0],v_br[1],0])

	faces.extend([
	[v_tl, b_tl, b_tr], [v_tl, b_tr, v_tr], #top wall
	[v_br, b_br, b_bl], [v_br, b_bl, v_bl], #bottom wall
	[v_bl, b_bl, b_tl], [v_bl, b_tl, v_tl], #left
	[v_tr, b_tr, b_br], [v_tr, b_br, v_br], #right
	[b_tl, b_br, b_bl], [b_tl, b_tr, b_br] #base
	])

	surface = mesh.Mesh(np.zeros(len(faces), dtype=mesh.Mesh.dtype))
	surface.vectors = np.array(faces)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".stl") as tmpfile:
	surface.save(tmpfile.name)
	return tmpfile.name, tmpfile.name

	with gr.Blocks(theme='base') as demo:
	gr.Markdown("# Image to 3D Relief Generator")
	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(type="filepath", label="Upload Image")
	gr.Markdown("### Model Parameters")
	texture_strength = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.001, label="Brightness Texture Strength")
	depth_map_smoothing = gr.Slider(minimum=0, maximum=51, value=0, step=1, label="Depth Map Smoothing", info="Smooths the base geometry. 0 = none.")
	texture_smoothing = gr.Slider(minimum=0, maximum=51, value=0, step=1, label="Texture Smoothing", info="Smooths the brightness texture. 0 = none.")

	x_length = gr.Number(value=100, label="X Length (units)")
	min_z = gr.Number(value=0.5, label="Min Z-Height (units)")
	max_z = gr.Number(value=5.0, label="Max Z-Height (units)")
	do_pca = gr.Checkbox(value=True, label="Enable PCA Planar Correction")
	close_body = gr.Checkbox(value=False, label="Close Body")

	generate_btn = gr.Button("Generate STL", variant="primary")

	with gr.Column(scale=1):
	gr.Markdown("### 3D Model Output")
	output_model = gr.Model3D(label="Generated 3D Model")
	output_file = gr.File(label="Download STL File")

	generate_btn.click(
	fn=create_3d_model,
	inputs=[
	input_image, texture_strength, max_z, min_z, x_length, do_pca,
	depth_map_smoothing, texture_smoothing, close_body
	],
	outputs=[output_model, output_file]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)