Spaces:
Sleeping
Sleeping
| import os | |
| from dataclasses import dataclass, field | |
| from typing import Any, Dict, List, Optional, Iterable | |
| import os.path as osp | |
| from hydra.core.config_store import ConfigStore | |
| from hydra.conf import RunDir | |
| class CustomHydraRunDir(RunDir): | |
| dir: str = './outputs/${run.name}/single' | |
| class RunConfig: | |
| name: str = 'debug' | |
| job: str = 'train' | |
| mixed_precision: str = 'fp16' # 'no' | |
| cpu: bool = False | |
| seed: int = 42 | |
| val_before_training: bool = True | |
| vis_before_training: bool = True | |
| limit_train_batches: Optional[int] = None | |
| limit_val_batches: Optional[int] = None | |
| max_steps: int = 100_000 | |
| checkpoint_freq: int = 1_000 | |
| val_freq: int = 5_000 | |
| vis_freq: int = 5_000 | |
| # vis_freq: int = 10_000 | |
| log_step_freq: int = 20 | |
| print_step_freq: int = 100 | |
| # config to run demo | |
| stage1_name: str = 'stage1' # experiment name to the stage 1 model | |
| stage2_name: str = 'stage2' # experiment name to the stage 2 model | |
| image_path: str = '' # the path to the images for running demo, can be a single file or a glob pattern | |
| share: bool = False # whether to run gradio with a temporal public url or not | |
| # abs path to working dir | |
| code_dir_abs: str = osp.dirname(osp.dirname(osp.abspath(__file__))) | |
| # Inference configs | |
| num_inference_steps: int = 1000 | |
| diffusion_scheduler: Optional[str] = 'ddpm' | |
| num_samples: int = 1 | |
| # num_sample_batches: Optional[int] = None | |
| num_sample_batches: Optional[int] = 2000 # XH: change to 2 | |
| sample_from_ema: bool = False | |
| sample_save_evolutions: bool = False # temporarily set by default | |
| save_name: str = 'sample' # XH: additional save name | |
| redo: bool = False | |
| # for parallel sampling in slurm | |
| batch_start: int = 0 | |
| batch_end: Optional[int] = None | |
| # Training configs | |
| freeze_feature_model: bool = True | |
| # Coloring training configs | |
| coloring_training_noise_std: float = 0.0 | |
| coloring_sample_dir: Optional[str] = None | |
| sample_mode: str = 'sample' # whether from noise or from some intermediate steps | |
| sample_noise_step: int = 500 # add noise to GT up to some steps, and then denoise | |
| sample_save_gt: bool = True | |
| class LoggingConfig: | |
| wandb: bool = True | |
| wandb_project: str = 'pc2' | |
| class PointCloudProjectionModelConfig: | |
| # Feature extraction arguments | |
| image_size: int = '${dataset.image_size}' | |
| image_feature_model: str = 'vit_base_patch16_224_mae' # or 'vit_small_patch16_224_msn' or 'identity' | |
| use_local_colors: bool = True | |
| use_local_features: bool = True | |
| use_global_features: bool = False | |
| use_mask: bool = True | |
| use_distance_transform: bool = True | |
| # Point cloud data arguments. Note these are here because the processing happens | |
| # inside the model, rather than inside the dataset. | |
| scale_factor: float = "${dataset.scale_factor}" | |
| colors_mean: float = 0.5 | |
| colors_std: float = 0.5 | |
| color_channels: int = 3 | |
| predict_shape: bool = True | |
| predict_color: bool = False | |
| # added by XH | |
| load_sample_init: bool = False # load init samples from file | |
| sample_init_scale: float = 1.0 # scale the initial pc samples | |
| test_init_with_gtpc: bool = False # test time init samples with GT samples | |
| consistent_center: bool = True # use consistent center prediction by CCD-3DR | |
| voxel_resolution_multiplier: float = 1 # increase network voxel resolution | |
| # predict binary segmentation | |
| predict_binary: bool = False # True for stage 1 model, False for others | |
| lw_binary: float = 3.0 # to have roughly the same magnitude of the binary segmentation loss | |
| # for separate model | |
| binary_training_noise_std: float = 0.1 # from github doc for predicting color | |
| self_conditioning: bool = False | |
| class PVCNNAEModelConfig(PointCloudProjectionModelConfig): | |
| "my own model config, must inherit parent class" | |
| model_name: str = 'pvcnn-ae' | |
| latent_dim: int = 1024 | |
| num_dec_blocks: int = 6 | |
| block_dims: List[int] = field(default_factory=lambda: [512, 256]) | |
| num_points: int = 1500 | |
| bottleneck_dim: int = -1 # the input dim to the last MLP layer | |
| class PointCloudDiffusionModelConfig(PointCloudProjectionModelConfig): | |
| model_name: str = 'pc2-diff-ho' # default as behave | |
| # Diffusion arguments | |
| beta_start: float = 1e-5 # 0.00085 | |
| beta_end: float = 8e-3 # 0.012 | |
| beta_schedule: str = 'linear' # 'custom' | |
| dm_pred_type: str = 'epsilon' # diffusion model prediction type, sample (x0) or noise | |
| ddim_eta: float = 1.0 # DDIM eta parameter: 0 is the default one which does deterministic generation | |
| # Point cloud model arguments | |
| point_cloud_model: str = 'pvcnn' | |
| point_cloud_model_embed_dim: int = 64 | |
| dataset_type: str = '${dataset.type}' | |
| class CrossAttnHOModelConfig(PointCloudDiffusionModelConfig): | |
| model_name: str = 'diff-ho-attn' | |
| attn_type: str = 'coord3d+posenc-learnable' | |
| attn_weight: float = 1.0 | |
| point_visible_test: str = 'combine' # To compute point visibility: use all points or only human/object points | |
| class DirectTransModelConfig(PointCloudProjectionModelConfig): | |
| model_name: str = 'direct-transl-ho' | |
| pooling: str = "avg" | |
| act: str = 'gelu' | |
| out_act: str = 'relu' | |
| # feat_dims_transl: Iterable[Any] = (384, 256, 128, 6) # cannot use List[int] https://github.com/facebookresearch/hydra/issues/1752#issuecomment-893174197 | |
| # feat_dims_scale: Iterable[Any] = (384, 128, 64, 2) | |
| feat_dims_transl: List[int] = field(default_factory=lambda: [384, 256, 128, 6]) | |
| feat_dims_scale: List[int] = field(default_factory=lambda: [384, 128, 64, 2]) | |
| lw_transl: float = 10000.0 | |
| lw_scale: float = 10000.0 | |
| class PointCloudColoringModelConfig(PointCloudProjectionModelConfig): | |
| # Projection arguments | |
| predict_shape: bool = False | |
| predict_color: bool = True | |
| # Point cloud model arguments | |
| point_cloud_model: str = 'pvcnn' | |
| point_cloud_model_layers: int = 1 | |
| point_cloud_model_embed_dim: int = 64 | |
| class DatasetConfig: | |
| type: str | |
| class PointCloudDatasetConfig(DatasetConfig): | |
| eval_split: str = 'val' | |
| max_points: int = 16_384 | |
| image_size: int = 224 | |
| scale_factor: float = 1.0 | |
| restrict_model_ids: Optional[List] = None # for only running on a subset of data points | |
| class CO3DConfig(PointCloudDatasetConfig): | |
| type: str = 'co3dv2' | |
| # root: str = os.getenv('CO3DV2_DATASET_ROOT') | |
| root: str = "/BS/xxie-2/work/co3d/hydrant" | |
| category: str = 'hydrant' | |
| subset_name: str = 'fewview_dev' | |
| mask_images: bool = '${model.use_mask}' | |
| class ShapeNetR2N2Config(PointCloudDatasetConfig): | |
| # added by XH | |
| fix_sample: bool = True | |
| category: str = 'chair' | |
| type: str = 'shapenet_r2n2' | |
| root: str = "/BS/chiban2/work/data_shapenet/ShapeNetCore.v1" | |
| r2n2_dir: str = "/BS/databases20/3d-r2n2" | |
| shapenet_dir: str = "/BS/chiban2/work/data_shapenet/ShapeNetCore.v1" | |
| preprocessed_r2n2_dir: str = "${dataset.root}/r2n2_preprocessed_renders" | |
| splits_file: str = "${dataset.root}/r2n2_standard_splits_from_ShapeNet_taxonomy.json" | |
| # splits_file: str = "${dataset.root}/pix2mesh_splits_val05.json" # <-- incorrect | |
| scale_factor: float = 7.0 | |
| point_cloud_filename: str = 'pointcloud_r2n2.npz' # should use 'pointcloud_mesh.npz' | |
| class BehaveDatasetConfig(PointCloudDatasetConfig): | |
| # added by XH | |
| type: str = 'behave' | |
| fix_sample: bool = True | |
| behave_dir: str = "/BS/xxie-5/static00/behave_release/sequences/" | |
| split_file: str = "" # specify you dataset split file here | |
| scale_factor: float = 7.0 # use the same as shapenet | |
| sample_ratio_hum: float = 0.5 | |
| image_size: int = 224 | |
| normalize_type: str = 'comb' | |
| smpl_type: str = 'gt' # use which SMPL mesh to obtain normalization parameters | |
| test_transl_type: str = 'norm' | |
| load_corr_points: bool = False # load autoencoder points for object and SMPL | |
| uniform_obj_sample: bool = False | |
| # configs for direct translation prediction | |
| bkg_type: str = 'none' | |
| bbox_params: str = 'none' | |
| ho_segm_pred_path: Optional[str] = None | |
| use_gt_transl: bool = False | |
| cam_noise_std: float = 0. # add noise to the camera pose | |
| sep_same_crop: bool = False # use same input image crop to separate models | |
| aug_blur: float = 0. # blur augmentation | |
| std_coverage: float=3.5 # a heuristic value to estimate translation | |
| v2v_path: str = '' # object v2v corr path | |
| class ShapeDatasetConfig(BehaveDatasetConfig): | |
| "the dataset to train AE for aligned shapes" | |
| type: str = 'shape' | |
| fix_sample: bool = False | |
| split_file: str = "/BS/xxie-2/work/pc2-diff/experiments/splits/shapes-chair.pkl" | |
| # TODO | |
| class ShapeNetNMRConfig(PointCloudDatasetConfig): | |
| type: str = 'shapenet_nmr' | |
| shapenet_nmr_dir: str = "/work/lukemk/machine-learning-datasets/3d-reconstruction/ShapeNet_NMR/NMR_Dataset" | |
| synset_names: str = 'chair' # comma-separated or 'all' | |
| augmentation: str = 'all' | |
| scale_factor: float = 7.0 | |
| class AugmentationConfig: | |
| # need to specify the variable type in order to define it properly | |
| max_radius: int = 0 # generate a random square to mask object, this is the radius for the square in pixel size, zero means no occlusion | |
| class DataloaderConfig: | |
| # batch_size: int = 8 # 2 for debug | |
| batch_size: int = 16 | |
| num_workers: int = 14 # 0 for debug # suggested by accelerator for gpu20 | |
| class LossConfig: | |
| diffusion_weight: float = 1.0 | |
| rgb_weight: float = 1.0 | |
| consistency_weight: float = 1.0 | |
| class CheckpointConfig: | |
| resume: Optional[str] = "test" | |
| resume_training: bool = True | |
| resume_training_optimizer: bool = True | |
| resume_training_scheduler: bool = True | |
| resume_training_state: bool = True | |
| class ExponentialMovingAverageConfig: | |
| use_ema: bool = False | |
| # # From Diffusers EMA (should probably switch) | |
| # ema_inv_gamma: float = 1.0 | |
| # ema_power: float = 0.75 | |
| # ema_max_decay: float = 0.9999 | |
| decay: float = 0.999 | |
| update_every: int = 20 | |
| class OptimizerConfig: | |
| type: str | |
| name: str | |
| lr: float = 3e-4 | |
| weight_decay: float = 0.0 | |
| scale_learning_rate_with_batch_size: bool = False | |
| gradient_accumulation_steps: int = 1 | |
| clip_grad_norm: Optional[float] = 50.0 # 5.0 | |
| kwargs: Dict = field(default_factory=lambda: dict()) | |
| class AdadeltaOptimizerConfig(OptimizerConfig): | |
| type: str = 'torch' | |
| name: str = 'Adadelta' | |
| kwargs: Dict = field(default_factory=lambda: dict( | |
| weight_decay=1e-6, | |
| )) | |
| class AdamOptimizerConfig(OptimizerConfig): | |
| type: str = 'torch' | |
| name: str = 'AdamW' | |
| weight_decay: float = 1e-6 | |
| kwargs: Dict = field(default_factory=lambda: dict(betas=(0.95, 0.999))) | |
| class SchedulerConfig: | |
| type: str | |
| kwargs: Dict = field(default_factory=lambda: dict()) | |
| class LinearSchedulerConfig(SchedulerConfig): | |
| type: str = 'transformers' | |
| kwargs: Dict = field(default_factory=lambda: dict( | |
| name='linear', | |
| num_warmup_steps=0, | |
| num_training_steps="${run.max_steps}", | |
| )) | |
| class CosineSchedulerConfig(SchedulerConfig): | |
| type: str = 'transformers' | |
| kwargs: Dict = field(default_factory=lambda: dict( | |
| name='cosine', | |
| num_warmup_steps=2000, # 0 | |
| num_training_steps="${run.max_steps}", | |
| )) | |
| class ProjectConfig: | |
| run: RunConfig | |
| logging: LoggingConfig | |
| dataset: PointCloudDatasetConfig | |
| augmentations: AugmentationConfig | |
| dataloader: DataloaderConfig | |
| loss: LossConfig | |
| model: PointCloudProjectionModelConfig | |
| ema: ExponentialMovingAverageConfig | |
| checkpoint: CheckpointConfig | |
| optimizer: OptimizerConfig | |
| scheduler: SchedulerConfig | |
| defaults: List[Any] = field(default_factory=lambda: [ | |
| 'custom_hydra_run_dir', | |
| {'run': 'default'}, | |
| {'logging': 'default'}, | |
| {'model': 'ho-attn'}, | |
| # {'dataset': 'co3d'}, | |
| {'dataset': 'behave'}, | |
| {'augmentations': 'default'}, | |
| {'dataloader': 'default'}, | |
| {'ema': 'default'}, | |
| {'loss': 'default'}, | |
| {'checkpoint': 'default'}, | |
| {'optimizer': 'adam'}, # default adamw | |
| {'scheduler': 'linear'}, | |
| # {'scheduler': 'cosine'}, | |
| ]) | |
| cs = ConfigStore.instance() | |
| cs.store(name='custom_hydra_run_dir', node=CustomHydraRunDir, package="hydra.run") | |
| cs.store(group='run', name='default', node=RunConfig) | |
| cs.store(group='logging', name='default', node=LoggingConfig) | |
| cs.store(group='model', name='diffrec', node=PointCloudDiffusionModelConfig) | |
| cs.store(group='model', name='coloring_model', node=PointCloudColoringModelConfig) | |
| cs.store(group='model', name='direct-transl', node=DirectTransModelConfig) | |
| cs.store(group='model', name='ho-attn', node=CrossAttnHOModelConfig) | |
| cs.store(group='model', name='pvcnn-ae', node=PVCNNAEModelConfig) | |
| cs.store(group='dataset', name='co3d', node=CO3DConfig) | |
| # TODO | |
| cs.store(group='dataset', name='shapenet_r2n2', node=ShapeNetR2N2Config) | |
| cs.store(group='dataset', name='behave', node=BehaveDatasetConfig) | |
| cs.store(group='dataset', name='shape', node=ShapeDatasetConfig) | |
| # cs.store(group='dataset', name='shapenet_nmr', node=ShapeNetNMRConfig) | |
| cs.store(group='augmentations', name='default', node=AugmentationConfig) | |
| cs.store(group='dataloader', name='default', node=DataloaderConfig) | |
| cs.store(group='loss', name='default', node=LossConfig) | |
| cs.store(group='ema', name='default', node=ExponentialMovingAverageConfig) | |
| cs.store(group='checkpoint', name='default', node=CheckpointConfig) | |
| cs.store(group='optimizer', name='adadelta', node=AdadeltaOptimizerConfig) | |
| cs.store(group='optimizer', name='adam', node=AdamOptimizerConfig) | |
| cs.store(group='scheduler', name='linear', node=LinearSchedulerConfig) | |
| cs.store(group='scheduler', name='cosine', node=CosineSchedulerConfig) | |
| cs.store(name='configs', node=ProjectConfig) | |