Code release 🎉

huangyangyi · Oct 26, 2023 · 810ed77 · 810ed77
1 parent 7148939
commit 810ed77
Show file tree

Hide file tree

Showing 186 changed files with 97,965 additions and 1 deletion.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,20 @@
+__pycache__
+data/*
+exp/demo/*
+openai
+*.o
+*.so
+*.pyx
+core/lib/freqencoder/build
+core/lib/gridencoder/build
+core/lib/freqencoder/dist
+core/lib/gridencoder/dist
+*.egg-info
+logs
+*.pt
+*.pth
+thirdparties/MODNet
+thirdparties/clip
+clip_ckpts
+input_data/demo/*
+.DS_Store
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2023 Yangyi Huang
+Copyright (c) 2023 Yangyi Huang, Hongwei Yi, Yuliang Xiu, Tingting Liao, Jiaxiang Tang, Deng Cai, Justus Thies
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -40,6 +40,30 @@
 TeCH considers image-based reconstruction as a conditional generation task, taking conditions from both the input image and the derived descriptions. It is capable of reconstructing "lifelike" 3D clothed humans. <strong>“Lifelike”</strong> refers to 1) a detailed full-body geometry, including facial features and clothing wrinkles, in both frontal and unseen regions, and 2) a high-quality texture with consistent color and intricate patterns.
 <br/>
 
+## Installation
+
+Please follow the [Installation Instruction](docs/install.md) to setup all the required packages.
+
+## Getting Started
+
+We provide a running script at `scripts/run.sh`. Before getting started, you need to set your own environment variables of `CUDA_HOME` and `REPLICATE_API_TOKEN`([get your token here](https://replicate.com/signin?next=/account/api-tokens)) in the script.
+
+After that, you can use TeCH to create a highly detailed clothed human textured mesh from a single image, for example:
+
+```shell
+sh scripts/run.sh input/examples/name.img exp/examples/name
+```
+
+The results will be save in the experiment folder `exp/examples/name`, and the textured mesh will be saved as `exp/examples/name/obj/name_texture.obj`
+
+Noted that in the "Step 3", the current version of Dreambooth implementation requires 2\*32G GPU memory. And 1\*32G GPU memory is efficient for other steps. The entire training process for a subject takes ~3 hours on our V100 GPUs.
+
+## TODOs
+
+- [ ] Release of evaluation protocals and results data for comparison (on CAPE & THUman 2.0 datasets).
+- [ ] Try to use the diffusers version of DreamBooth to save training memory.
+- [ ] Further improvement of efficiency and robustness.
+
 ## Citation
 
 ```bibtex
@@ -50,3 +74,9 @@ TeCH considers image-based reconstruction as a conditional generation task, taki
   year={2024}
 }
 ```
+## License
+This code and model are available for non-commercial scientific research purposes as defined in the LICENSE (i.e., MIT LICENSE). 
+Note that, using TeCH, you have to register SMPL-X and agree with the LICENSE of it, and it's not MIT LICENSE, you can check the LICENSE of SMPL-X from https://github.com/vchoutas/smplx/blob/main/LICENSE.
+
+## Acknowledgment
+This implementation is mainly built based on [Stable Dreamfusion](https://github.com/ashawkey/stable-dreamfusion), [ECON](https://github.com/YuliangXiu/ECON) [DreamBooth-Stable-Diffusion](https://github.com/XavierXiao/Dreambooth-Stable-Diffusion), and the BLIP API from Salesforce on [Replicate](https://replicate.com/salesforce/blip)
diff --git a/configs/default.yaml b/configs/default.yaml
@@ -0,0 +1,160 @@
+workspace: null
+exp_root: null
+stage: null
+use_gl: False
+profile: False
+fp16: False
+
+model:
+  use_dmtet_network: False
+  use_explicit_tet: False
+  use_color_network: false
+  tet_shell_offset: 0.1
+  tet_shell_decimate: 0.9
+  tet_offset_scale: 0.
+  tet_grid_scale: 0.
+  tet_grid_volume: 0.00000005
+  tet_num_subdiv: 0
+  dmtet_network: hash
+  render_ssaa: 4
+  use_texture_2d: false
+  use_vertex_tex: False
+  mesh_scale: 1.0
+  albedo_res: 2048
+  different_bg: false
+  single_bg_color: False
+  use_can_pose_space: False
+  geo_hash_max_res: 1024
+  geo_hash_num_levels: 16
+  geo_hash_max_res: 1024
+  color_hash_num_levels: 16
+  color_hash_max_res: 2048
+  color_num_layers: 1
+  color_hidden_dim: 32
+  min_near: 0.01
+
+
+train:
+  dmtet_lr: 0.1
+  init_texture_3d: False
+  init_mesh: True
+  init_mesh_padding: 0.
+  tet_subdiv_steps: null
+  workspace: null
+  eval_interval: 10
+  lock_geo: False
+  fp16: False
+  render_ssaa: 4
+  w: 512
+  h: 512
+
+  iters: 0
+  lr: 0.001
+  warm_iters: 0
+  min_lr: 0
+
+  ckpt: latest
+  pretrained: null
+
+  optim: adan
+
+  render_relative_normal: true
+  albedo_sample_ratio: 1.0
+  normal_sample_ratio: 0.
+  textureless_sample_ratio: 0.
+  can_pose_sample_ratio: 0.
+  train_both: false
+
+  loss_mask_erosion: 10
+
+  lambda_normal: 0.
+  lambda_lap: 0.
+  lambda_recon: 0.
+  lambda_sil: 0.
+  lambda_color_chamfer: 0.
+
+  crop_for_lpips: false
+  use_lap_loss: false
+  single_directional_color_chamfer: False
+  color_chamfer_step: 0
+  color_chamfer_space: rgb
+
+  decay_lnorm_cosine_cycle: null
+  decay_lnorm_cosine_max_iter: null
+  decay_lnorm_iter: null
+  decay_lnorm_ratio: null
+
+  jitter_pose: False
+  radius_range: [0.7, 1.3]
+  height_range: [-0.4, 0.4]
+  fovy_range: [40, 70]
+  theta_range: [60, 120]
+  phi_range: [0., 360.]
+  phi_diff: 30
+  angle_front: 60
+  angle_overhead: 30
+  face_sample_ratio: 0.3
+  face_height_range: [0., 0.]
+  face_radius_range: [0.3, 0.4]
+  face_phi_diff: 30
+  face_theta_range: [90, 90]
+  face_phi_range: [-90, 90]
+
+  init_empty_tex: False
+
+data:
+  load_input_image: True
+  img: null
+  load_front_normal: false
+  front_normal_img: null
+  load_back_normal: false
+  back_normal_img: null
+  load_keypoints: True
+  keypoints_path: null
+  load_result_mesh: False
+  last_model: null
+  last_ref_model: null
+  smpl_model: null
+  load_apose_mesh: False
+  can_pose_folder: null
+  load_occ_mask: False
+  occ_mask: null
+  loss_mask: null
+  load_da_pose_mesh: False
+  da_pose_mesh: null
+
+guidance:
+  type: stable-diffusion
+  use_view_prompt: True
+  sd_version: 1.5
+  guidance_scale: 100.
+  step_range: [0.02, 0.25]
+  use_dreambooth: True
+  hf_key: null
+  head_hf_key: null
+  lora: null
+  text: null
+  text_geo: null
+  text_head: null
+  text_extra: ''
+  normal_text: null
+  normal_text_extra: ''
+  textureless_text: null
+  textureless_text_extra: ''
+  negative: ''
+  negative_normal: ''
+  negative_textureless: ''
+  controlnet: null
+  controlnet_guidance_geometry: null
+  controlnet_conditioning_scale: 0.
+  controlnet_openpose_guidance: null
+
+test:
+  test: false
+  not_test_video: False
+  save_mesh: True
+  save_uv: False
+  write_image: False
+  W: 800
+  H: 800
+
diff --git a/configs/tech_geometry.yaml b/configs/tech_geometry.yaml
@@ -0,0 +1,41 @@
+exp_root: null
+stage: geometry
+model:
+    use_dmtet_network: True
+    tet_offset_scale: 0.
+    tet_grid_volume: 5e-8
+    tet_num_subdiv: 1
+    render_ssaa: 4
+train:
+    iters: 10000
+    tet_subdiv_steps: [5000]
+    use_lap_loss: True
+    normal_sample_ratio: 1.0
+    radius_range: [0.7, 1.3]
+    height_range: [-0.4, 0.4]
+    theta_range: [60, 120]
+    phi_diff: 30
+    face_sample_ratio: 0.3
+    face_height_range: [0., 0.]
+    face_radius_range: [0.3, 0.4]
+    face_phi_diff: 30
+    face_theta_range: [90, 90]
+    face_phi_range: [-90, 90]
+    render_relative_normal: True
+    lambda_lap: 1e4
+    lambda_sil: 1e4
+    lambda_normal: 1e4
+    lambda_recon: 0.
+    lambda_color_chamfer: 0.
+    decay_lnorm_cosine_cycle: 5000
+    decay_lnorm_cosine_max_iter: 10000
+
+data:
+    load_input_image: True
+    load_front_normal: True
+    load_back_normal: True
+guidance:
+    normal_text: "a smooth and detailed sculpture of"
+    use_view_prompt: True
+    guidance_scale: 100.
+    step_range: [0.02, 0.25]
diff --git a/configs/tech_texture.yaml b/configs/tech_texture.yaml
@@ -0,0 +1,44 @@
+exp_root: ''
+stage: texture
+model:
+    use_dmtet_network: false
+    use_color_network: true
+    tet_offset_scale: 0.
+    tet_grid_volume: 5e-8
+    tet_num_subdiv: 1
+    render_ssaa: 4
+    use_can_pose_space: True
+train:
+    lock_geo: True
+    iters: 7000
+    normal_sample_ratio: 0.
+    radius_range: [0.7, 1.3]
+    height_range: [-0.4, 0.4]
+    theta_range: [60, 120]
+    phi_diff: 30
+    face_sample_ratio: 0.3
+    face_height_range: [0., 0.]
+    face_radius_range: [0.3, 0.4]
+    face_phi_diff: 30
+    face_theta_range: [90, 90]
+    face_phi_range: [-90, 90]
+    lambda_lap: 0.
+    lambda_sil: 0.
+    lambda_normal: 0.
+    lambda_recon: 10000.
+    lambda_color_chamfer: 1e6
+    color_chamfer_step: 5000
+    crop_for_lpips: true
+test:
+    save_mesh: true
+    test: False
+data:
+    load_input_image: True
+    load_front_normal: True
+    load_back_normal: True
+    load_result_mesh: True
+    load_apose_mesh: True
+guidance:
+    use_view_prompt: True
+    guidance_scale: 100.
+    step_range: [0.02, 0.25]