Upload folder using huggingface_hub
Browse files- .gitattributes +4 -0
- hub/.lock/iic___cv_stable-diffusion-v2_image-inpainting_base +0 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/.mdl +0 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/.msc +0 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/.mv +1 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/README.md +173 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/configuration.json +17 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/feature_extractor/preprocessor_config.json +20 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/inpainting_demo.gif +3 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_01.png +3 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_02.png +3 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_03.png +3 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/model_index.json +33 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/requirements.txt +3 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/scheduler/scheduler_config.json +13 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/text_encoder/config.json +25 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/text_encoder/pytorch_model.bin +3 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/merges.txt +0 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/special_tokens_map.json +24 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/tokenizer_config.json +34 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/vocab.json +0 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/unet/config.json +44 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/unet/diffusion_pytorch_model.bin +3 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/vae/config.json +30 -0
- hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/vae/diffusion_pytorch_model.bin +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/inpainting_demo.gif filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_01.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_02.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_03.png filter=lfs diff=lfs merge=lfs -text
|
hub/.lock/iic___cv_stable-diffusion-v2_image-inpainting_base
ADDED
|
File without changes
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/.mdl
ADDED
|
Binary file (71 Bytes). View file
|
|
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/.msc
ADDED
|
Binary file (1.56 kB). View file
|
|
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/.mv
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Revision:master,CreatedAt:1755071739
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/README.md
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tasks:
|
| 3 |
+
- image-inpainting
|
| 4 |
+
widgets:
|
| 5 |
+
- task: image-inpainting
|
| 6 |
+
inputs:
|
| 7 |
+
- type: image
|
| 8 |
+
name: image
|
| 9 |
+
validator:
|
| 10 |
+
max_size: 5M
|
| 11 |
+
max_resolution: 1920*1080
|
| 12 |
+
- name: mask
|
| 13 |
+
parameters:
|
| 14 |
+
- name: prompt
|
| 15 |
+
title: Prompt
|
| 16 |
+
type: string
|
| 17 |
+
examples:
|
| 18 |
+
- name: 1
|
| 19 |
+
title: 示例1
|
| 20 |
+
inputs:
|
| 21 |
+
- data: git://img/test_01.png
|
| 22 |
+
parameters:
|
| 23 |
+
- name: prompt
|
| 24 |
+
type: string
|
| 25 |
+
value: background
|
| 26 |
+
- name: 2
|
| 27 |
+
title: 示例2
|
| 28 |
+
inputs:
|
| 29 |
+
- data: git://img/test_02.png
|
| 30 |
+
parameters:
|
| 31 |
+
- name: prompt
|
| 32 |
+
type: string
|
| 33 |
+
value: background
|
| 34 |
+
- name: 3
|
| 35 |
+
title: 示例3
|
| 36 |
+
inputs:
|
| 37 |
+
- data: git://img/test_03.png
|
| 38 |
+
parameters:
|
| 39 |
+
- name: prompt
|
| 40 |
+
type: string
|
| 41 |
+
value: background
|
| 42 |
+
inferencespec:
|
| 43 |
+
cpu: 4
|
| 44 |
+
memory: 16000
|
| 45 |
+
gpu: 1
|
| 46 |
+
gpu_memory: 16000
|
| 47 |
+
|
| 48 |
+
domain:
|
| 49 |
+
- cv
|
| 50 |
+
frameworks:
|
| 51 |
+
- pytorch
|
| 52 |
+
customized-quickstart: True
|
| 53 |
+
finetune-support: False
|
| 54 |
+
license: Apache License 2.0
|
| 55 |
+
tags:
|
| 56 |
+
- Image Inpainting
|
| 57 |
+
- Stable Diffusion
|
| 58 |
+
- stablediffusion model
|
| 59 |
+
- 图像填充
|
| 60 |
+
- 图像修复
|
| 61 |
+
- 图像修补
|
| 62 |
+
|
| 63 |
+
---
|
| 64 |
+
# Stable Diffusion v2 for Image Inpainting 图像填充模型
|
| 65 |
+
该模型为图像填充模型,输入一个抹除部分内容的图像,实现端到端的图像填充,返回填充后的完整图像。
|
| 66 |
+
|
| 67 |
+
模型效果如下:
|
| 68 |
+
|
| 69 |
+
<img src="./img/inpainting_demo.gif">
|
| 70 |
+
|
| 71 |
+
## 模型描述
|
| 72 |
+
该模型基于Stable Diffusion v2与diffusers进行构建。
|
| 73 |
+
## 模型期望使用方式和适用范围
|
| 74 |
+
|
| 75 |
+
1. 该模型适用于多种场景的图像输入,给定图像(Image)和需要修补填充区域的掩码(Mask),生成修补填充后的新图像;
|
| 76 |
+
2. 该模型推理时对机器GPU显存有一定要求;在FP16模式下并开启enable_attention_slicing选项时,对于16G显存的显卡,建议的最大输入分辨率为1920x1080;FP32模式建议使用含较大显存(如32G及以上)GPU的机器进行推理。如果没有GPU显卡或显存不足够,可以尝试使用CPU模式进行推理。
|
| 77 |
+
### 如何使用Demo Service
|
| 78 |
+
通过在页面右侧绘制Mask,即可快速体验模型效果:
|
| 79 |
+
- 建议点击右上角的最大化按钮后再绘制Mask,充分抹除物体可以带来更好的Inpainting效果;
|
| 80 |
+
- 希望抹除物体并还原背景时,Prompt默认为"background";希望生成其他物体时,可以更改Prompt来描述希望生成的物体;Prompt需要英文输入;
|
| 81 |
+
- 建议上传体验图像的分辨率不超过1280x720,更大尺寸的图像推理时需要更大的GPU显存和更长的推理时间,可以在Notebook或本地调用Pipeline体验。
|
| 82 |
+
### 如何使用Pipeline
|
| 83 |
+
在 ModelScope 框架上,提供输入图像和掩码,即可以通过简单的 Pipeline 调用来使用Stable Diffusion v2图像填充模型。
|
| 84 |
+
#### 推理代码范例
|
| 85 |
+
```python
|
| 86 |
+
import cv2
|
| 87 |
+
import torch
|
| 88 |
+
from modelscope.outputs import OutputKeys
|
| 89 |
+
from modelscope.pipelines import pipeline
|
| 90 |
+
from modelscope.utils.constant import Tasks
|
| 91 |
+
|
| 92 |
+
input_location = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_inpainting/image_inpainting_1.png'
|
| 93 |
+
input_mask_location = 'https://modelscope.oss-cn-beijing.aliyuncs.com/test/images/image_inpainting/image_inpainting_mask_1.png'
|
| 94 |
+
prompt = 'background'
|
| 95 |
+
output_image_path = './result.png'
|
| 96 |
+
|
| 97 |
+
input = {
|
| 98 |
+
'image': input_location,
|
| 99 |
+
'mask': input_mask_location,
|
| 100 |
+
'prompt': prompt
|
| 101 |
+
}
|
| 102 |
+
image_inpainting = pipeline(
|
| 103 |
+
Tasks.image_inpainting,
|
| 104 |
+
model='damo/cv_stable-diffusion-v2_image-inpainting_base',
|
| 105 |
+
device='gpu',
|
| 106 |
+
torch_dtype=torch.float16,
|
| 107 |
+
enable_attention_slicing=True, model_revision='master')
|
| 108 |
+
output = image_inpainting(input)[OutputKeys.OUTPUT_IMG]
|
| 109 |
+
cv2.imwrite(output_image_path, output)
|
| 110 |
+
print('pipeline: the output image path is {}'.format(output_image_path))
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
如果遇到报错类似于:
|
| 114 |
+
```text
|
| 115 |
+
No module named 'transformers.models.qwen3'
|
| 116 |
+
```
|
| 117 |
+
请考虑卸载awq:
|
| 118 |
+
```shell
|
| 119 |
+
pip uninstall autoawq
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
#### 推理代码说明
|
| 123 |
+
|
| 124 |
+
- Pipeline初始化参数
|
| 125 |
+
- 可缺省参数device,默认值为'gpu',可设置为'cpu'。
|
| 126 |
+
- 可缺省参数torch_dtype,默认值为torch.float16,可设置为torch.float32。
|
| 127 |
+
- 可缺省参数enable_attention_slicing,默认值为True,开启将减少GPU显存占用,可关闭。
|
| 128 |
+
|
| 129 |
+
- Pipeline调用参数
|
| 130 |
+
- 输入要求:输入字典中必须指定的字段有'image','mask';其他可选输入字段及其默认值包括:
|
| 131 |
+
```python
|
| 132 |
+
'prompt': 'background',
|
| 133 |
+
'num_inference_steps': 50,
|
| 134 |
+
'guidance_scale': 7.5,
|
| 135 |
+
'negative_prompt': None,
|
| 136 |
+
'num_images_per_prompt': 1,
|
| 137 |
+
'eta': 0.0
|
| 138 |
+
```
|
| 139 |
+
- 额外参数:
|
| 140 |
+
- prompt参数也支持在Pipeline调用时作为单独参数传入;但如果input中存在prompt字段,将会优先使用input中的prompt。
|
| 141 |
+
|
| 142 |
+
- 由于GPU显存限制,本项目默认支持开启FP16推理,可以在构建pipeline时传入参数torch_dtype=torch.float32来使用FP32;同时torch_dtype参数可缺省,默认值为torch.float16。
|
| 143 |
+
- 本项目支持使用CPU进行推理,可以在构建pipeline时传入参数device='cpu';CPU模式下torch_dtype仅支持torch.float32。
|
| 144 |
+
|
| 145 |
+
### 模型局限性以及可能的偏差
|
| 146 |
+
|
| 147 |
+
- 实际测试中,FP16模式下生成的图像较FP32模式质量有所下降。
|
| 148 |
+
- 在一些背景较为简单平滑的场景下,Stable Diffusion可能生成一些无意义的前景物体,可以通过调整Prompt和模型参数进行消除。
|
| 149 |
+
- 在一些场景下,指定某些不同的Prompt时,Stable Diffusion可能生成错误的前景物体;可以生成多次,取效果较好的结果。
|
| 150 |
+
- 目前模型推理前会Resize输入图像以匹配输入尺寸要求。
|
| 151 |
+
|
| 152 |
+
## 训练介绍
|
| 153 |
+
本模型根据diffusers开源库构建,由Stability-AI从 stable-diffusion-2-base (512-base-ema.ckpt) 微调 200k steps。 并使用了LAMA中提出的掩码生成策略。请参考[模型来源](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting)。
|
| 154 |
+
|
| 155 |
+
## 说明与引用
|
| 156 |
+
本算法模型源自一些开源项目:
|
| 157 |
+
|
| 158 |
+
- [https://github.com/Stability-AI/stablediffusion](https://github.com/Stability-AI/stablediffusion)
|
| 159 |
+
- [https://github.com/huggingface/diffusers](https://github.com/huggingface/diffusers)
|
| 160 |
+
- [https://huggingface.co/stabilityai/stable-diffusion-2-inpainting](https://huggingface.co/stabilityai/stable-diffusion-2-inpainting)
|
| 161 |
+
|
| 162 |
+
如果你觉得这个模型对你有所帮助,请考虑引用下面的相关论文:
|
| 163 |
+
```
|
| 164 |
+
@misc{rombach2021highresolution,
|
| 165 |
+
title={High-Resolution Image Synthesis with Latent Diffusion Models},
|
| 166 |
+
author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer},
|
| 167 |
+
year={2021},
|
| 168 |
+
eprint={2112.10752},
|
| 169 |
+
archivePrefix={arXiv},
|
| 170 |
+
primaryClass={cs.CV}
|
| 171 |
+
}
|
| 172 |
+
```
|
| 173 |
+
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/configuration.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"framework": "pytorch",
|
| 3 |
+
"task": "image-inpainting",
|
| 4 |
+
"pipeline": {
|
| 5 |
+
"type": "image-inpainting-sdv2"
|
| 6 |
+
},
|
| 7 |
+
"model": {
|
| 8 |
+
"type": "image-inpainting-sdv2"
|
| 9 |
+
},
|
| 10 |
+
"modelsetting": {
|
| 11 |
+
"num_inference_steps": 50,
|
| 12 |
+
"guidance_scale": 7.5,
|
| 13 |
+
"num_images_per_prompt": 1,
|
| 14 |
+
"eta": 0.0
|
| 15 |
+
},
|
| 16 |
+
"allow_remote": true
|
| 17 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/feature_extractor/preprocessor_config.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"crop_size": 224,
|
| 3 |
+
"do_center_crop": true,
|
| 4 |
+
"do_convert_rgb": true,
|
| 5 |
+
"do_normalize": true,
|
| 6 |
+
"do_resize": true,
|
| 7 |
+
"feature_extractor_type": "CLIPFeatureExtractor",
|
| 8 |
+
"image_mean": [
|
| 9 |
+
0.48145466,
|
| 10 |
+
0.4578275,
|
| 11 |
+
0.40821073
|
| 12 |
+
],
|
| 13 |
+
"image_std": [
|
| 14 |
+
0.26862954,
|
| 15 |
+
0.26130258,
|
| 16 |
+
0.27577711
|
| 17 |
+
],
|
| 18 |
+
"resample": 3,
|
| 19 |
+
"size": 224
|
| 20 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/inpainting_demo.gif
ADDED
|
Git LFS Details
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_01.png
ADDED
|
Git LFS Details
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_02.png
ADDED
|
Git LFS Details
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/img/test_03.png
ADDED
|
Git LFS Details
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/model_index.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "StableDiffusionInpaintPipeline",
|
| 3 |
+
"_diffusers_version": "0.8.0",
|
| 4 |
+
"feature_extractor": [
|
| 5 |
+
"transformers",
|
| 6 |
+
"CLIPFeatureExtractor"
|
| 7 |
+
],
|
| 8 |
+
"safety_checker": [
|
| 9 |
+
null,
|
| 10 |
+
null
|
| 11 |
+
],
|
| 12 |
+
"scheduler": [
|
| 13 |
+
"diffusers",
|
| 14 |
+
"PNDMScheduler"
|
| 15 |
+
],
|
| 16 |
+
"text_encoder": [
|
| 17 |
+
"transformers",
|
| 18 |
+
"CLIPTextModel"
|
| 19 |
+
],
|
| 20 |
+
"tokenizer": [
|
| 21 |
+
"transformers",
|
| 22 |
+
"CLIPTokenizer"
|
| 23 |
+
],
|
| 24 |
+
"unet": [
|
| 25 |
+
"diffusers",
|
| 26 |
+
"UNet2DConditionModel"
|
| 27 |
+
],
|
| 28 |
+
"vae": [
|
| 29 |
+
"diffusers",
|
| 30 |
+
"AutoencoderKL"
|
| 31 |
+
],
|
| 32 |
+
"requires_safety_checker": false
|
| 33 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
huggingface-hub==0.25.*
|
| 2 |
+
transformers==4.48.3
|
| 3 |
+
diffusers==0.28.0
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/scheduler/scheduler_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "PNDMScheduler",
|
| 3 |
+
"_diffusers_version": "0.8.0",
|
| 4 |
+
"beta_end": 0.012,
|
| 5 |
+
"beta_schedule": "scaled_linear",
|
| 6 |
+
"beta_start": 0.00085,
|
| 7 |
+
"clip_sample": false,
|
| 8 |
+
"num_train_timesteps": 1000,
|
| 9 |
+
"set_alpha_to_one": false,
|
| 10 |
+
"skip_prk_steps": true,
|
| 11 |
+
"steps_offset": 1,
|
| 12 |
+
"trained_betas": null
|
| 13 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/text_encoder/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "./hf-models/stable-diffusion-v2-inpainting/text_encoder",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"CLIPTextModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_factor": 1.0,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 4096,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 77,
|
| 17 |
+
"model_type": "clip_text_model",
|
| 18 |
+
"num_attention_heads": 16,
|
| 19 |
+
"num_hidden_layers": 23,
|
| 20 |
+
"pad_token_id": 1,
|
| 21 |
+
"projection_dim": 512,
|
| 22 |
+
"torch_dtype": "float32",
|
| 23 |
+
"transformers_version": "4.25.0.dev0",
|
| 24 |
+
"vocab_size": 49408
|
| 25 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/text_encoder/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9c787e9388134c1a25dc69934a51a32a2683b38b8a9b017e1f3a692b8ed6b98
|
| 3 |
+
size 1361679905
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|startoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "!",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<|endoftext|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": true,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"bos_token": {
|
| 4 |
+
"__type": "AddedToken",
|
| 5 |
+
"content": "<|startoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false
|
| 10 |
+
},
|
| 11 |
+
"do_lower_case": true,
|
| 12 |
+
"eos_token": {
|
| 13 |
+
"__type": "AddedToken",
|
| 14 |
+
"content": "<|endoftext|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": true,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false
|
| 19 |
+
},
|
| 20 |
+
"errors": "replace",
|
| 21 |
+
"model_max_length": 77,
|
| 22 |
+
"name_or_path": "./hf-models/stable-diffusion-v2-inpainting/tokenizer",
|
| 23 |
+
"pad_token": "<|endoftext|>",
|
| 24 |
+
"special_tokens_map_file": "./special_tokens_map.json",
|
| 25 |
+
"tokenizer_class": "CLIPTokenizer",
|
| 26 |
+
"unk_token": {
|
| 27 |
+
"__type": "AddedToken",
|
| 28 |
+
"content": "<|endoftext|>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false
|
| 33 |
+
}
|
| 34 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/unet/config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.8.0",
|
| 4 |
+
"_name_or_path": "./hf-models/stable-diffusion-v2-inpainting/unet",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"attention_head_dim": [
|
| 7 |
+
5,
|
| 8 |
+
10,
|
| 9 |
+
20,
|
| 10 |
+
20
|
| 11 |
+
],
|
| 12 |
+
"block_out_channels": [
|
| 13 |
+
320,
|
| 14 |
+
640,
|
| 15 |
+
1280,
|
| 16 |
+
1280
|
| 17 |
+
],
|
| 18 |
+
"center_input_sample": false,
|
| 19 |
+
"cross_attention_dim": 1024,
|
| 20 |
+
"down_block_types": [
|
| 21 |
+
"CrossAttnDownBlock2D",
|
| 22 |
+
"CrossAttnDownBlock2D",
|
| 23 |
+
"CrossAttnDownBlock2D",
|
| 24 |
+
"DownBlock2D"
|
| 25 |
+
],
|
| 26 |
+
"downsample_padding": 1,
|
| 27 |
+
"dual_cross_attention": false,
|
| 28 |
+
"flip_sin_to_cos": true,
|
| 29 |
+
"freq_shift": 0,
|
| 30 |
+
"in_channels": 9,
|
| 31 |
+
"layers_per_block": 2,
|
| 32 |
+
"mid_block_scale_factor": 1,
|
| 33 |
+
"norm_eps": 1e-05,
|
| 34 |
+
"norm_num_groups": 32,
|
| 35 |
+
"out_channels": 4,
|
| 36 |
+
"sample_size": 64,
|
| 37 |
+
"up_block_types": [
|
| 38 |
+
"UpBlock2D",
|
| 39 |
+
"CrossAttnUpBlock2D",
|
| 40 |
+
"CrossAttnUpBlock2D",
|
| 41 |
+
"CrossAttnUpBlock2D"
|
| 42 |
+
],
|
| 43 |
+
"use_linear_projection": true
|
| 44 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/unet/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f2f6cff77df1279d280950c6566ecbac4c3e822d17c25e5aef97ef6dde1bdb7
|
| 3 |
+
size 3463992293
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/vae/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AutoencoderKL",
|
| 3 |
+
"_diffusers_version": "0.8.0",
|
| 4 |
+
"_name_or_path": "./hf-models/stable-diffusion-v2-inpainting/vae",
|
| 5 |
+
"act_fn": "silu",
|
| 6 |
+
"block_out_channels": [
|
| 7 |
+
128,
|
| 8 |
+
256,
|
| 9 |
+
512,
|
| 10 |
+
512
|
| 11 |
+
],
|
| 12 |
+
"down_block_types": [
|
| 13 |
+
"DownEncoderBlock2D",
|
| 14 |
+
"DownEncoderBlock2D",
|
| 15 |
+
"DownEncoderBlock2D",
|
| 16 |
+
"DownEncoderBlock2D"
|
| 17 |
+
],
|
| 18 |
+
"in_channels": 3,
|
| 19 |
+
"latent_channels": 4,
|
| 20 |
+
"layers_per_block": 2,
|
| 21 |
+
"norm_num_groups": 32,
|
| 22 |
+
"out_channels": 3,
|
| 23 |
+
"sample_size": 512,
|
| 24 |
+
"up_block_types": [
|
| 25 |
+
"UpDecoderBlock2D",
|
| 26 |
+
"UpDecoderBlock2D",
|
| 27 |
+
"UpDecoderBlock2D",
|
| 28 |
+
"UpDecoderBlock2D"
|
| 29 |
+
]
|
| 30 |
+
}
|
hub/models/iic/cv_stable-diffusion-v2_image-inpainting_base/vae/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4302e1efa25f3a47ceb7536bc335715ad9d1f203e90c2d25507600d74006e89
|
| 3 |
+
size 334715313
|