diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..cd66100ce281c073e08cf1a252d9eb0c68bbf812 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/misc/capybara.png filter=lfs diff=lfs merge=lfs -text
+assets/misc/logo.png filter=lfs diff=lfs merge=lfs -text
+assets/misc/teaser.png filter=lfs diff=lfs merge=lfs -text
+assets/misc/wechat.jpg filter=lfs diff=lfs merge=lfs -text
+text_encoder/Glyph-SDXL-v2/assets/Arial.ttf filter=lfs diff=lfs merge=lfs -text
+vision_encoder/siglip/redux.png filter=lfs diff=lfs merge=lfs -text
diff --git a/assets/misc/capybara.png b/assets/misc/capybara.png
new file mode 100644
index 0000000000000000000000000000000000000000..c7c70708b044a16ab4e80d890ede332042bb4d0f
--- /dev/null
+++ b/assets/misc/capybara.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d97cd36756797897dc14d0967d06431f4dfb69251fb231a1a83e40231104944
+size 1448701
diff --git a/assets/misc/logo.png b/assets/misc/logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..e0d82f499aa7013c92355e355680b5394674e719
--- /dev/null
+++ b/assets/misc/logo.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d8b3d271da6938062726e78e10b3c8c467a2b13d9c2ed9f3ec7f9a87cf3b6ab
+size 1174200
diff --git a/assets/misc/teaser.png b/assets/misc/teaser.png
new file mode 100644
index 0000000000000000000000000000000000000000..4a36281d5dc86dc1f63e7a56790f2d517abd66d4
--- /dev/null
+++ b/assets/misc/teaser.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5674e14c278791ef314c7244f44794d007130ec044df1905123a2a6b7bb124f5
+size 1004764
diff --git a/assets/misc/wechat.jpg b/assets/misc/wechat.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..09388310ecc1637a8a3902fc03d8d6eea2a4e831
--- /dev/null
+++ b/assets/misc/wechat.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44ec70f3dab32577129751067e99422e151d382101e763ad9532558aee16216e
+size 214887
diff --git a/scheduler/scheduler_config.json b/scheduler/scheduler_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..a0f8a3a40d306aede296e4f8dd19808e93cc0b3c
--- /dev/null
+++ b/scheduler/scheduler_config.json
@@ -0,0 +1,15 @@
+{
+ "_class_name": "FlowMatchDiscreteScheduler",
+ "_diffusers_version": "0.35.0",
+ "flux_base_shift": 0.5,
+ "flux_base_token": 256.0,
+ "flux_max_shift": 1.15,
+ "flux_max_token": 4096.0,
+ "flux_shift_factor": 1.0,
+ "n_tokens": null,
+ "num_train_timesteps": 1000,
+ "reverse": true,
+ "shift": 7.0,
+ "solver": "euler",
+ "use_flux_shift": false
+}
diff --git a/text_encoder/Glyph-SDXL-v2/.msc b/text_encoder/Glyph-SDXL-v2/.msc
new file mode 100644
index 0000000000000000000000000000000000000000..4fe76411747c397abd1ae49b29989b69111f49ff
Binary files /dev/null and b/text_encoder/Glyph-SDXL-v2/.msc differ
diff --git a/text_encoder/Glyph-SDXL-v2/.mv b/text_encoder/Glyph-SDXL-v2/.mv
new file mode 100644
index 0000000000000000000000000000000000000000..e011baae2848c95432e2c5e21a0f069a0b8291c3
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/.mv
@@ -0,0 +1 @@
+Revision:master,CreatedAt:1718991679
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/README.md b/text_encoder/Glyph-SDXL-v2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1a54f9550937c220d62bc35d0a38f4004c8f6159
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/README.md
@@ -0,0 +1,87 @@
+---
+language:
+- en
+library_name: glyph-byt5
+---
+
+# Glyph-ByT5-v2: A Strong Aesthetic Baseline for Accurate Multilingual Visual Text Rendering
+
+We introduce **Glyph-ByT5-v2**, a customized text encoder for accurate **multilingual** visual text rendering and improved aesthetics.
+As an extension of **Glyph-SDXL**, our multilingual version supports visual text rendering for up to 10 different languages: English, Chinese, Japanese, Korean, French, German, Spanish, Italian, Portuguese and Russian.
+Combined with SDXL, our proposed **Glyph-SDXL-v2** achieves accurate multilingual design image visual text rendering.
+
+
+> [**Glyph-ByT5-v2: A Strong Aesthetic Baseline for Accurate Multilingual Visual Text Rendering**](https://glyph-byt5-v2.github.io/)
+> [Zeyu Liu](https://github.com/lzy-tony), [Weicong Liang](https://scholar.google.com/citations?user=QvHDIygAAAAJ&hl=zh-CN), [Yiming Zhao](https://scholar.google.com.hk/citations?user=_knPaYsAAAAJ&hl=zh-CN), [Bohan Chen](https://github.com/BHCHENGIT), [Ji Li](https://sites.google.com/a/usc.edu/jili/), [Yuhui Yuan](https://www.microsoft.com/en-us/research/people/yuyua/)
+> Microsoft Research Asia; Tsinghua University; Peking University; University of Liverpool
+> Preprint
+
+## Model Sources
+
+
+
+- **Repository:** [https://github.com/AIGText/Glyph-ByT5]
+- **Paper:** [https://arxiv.org/abs/2406.10208]
+- **Project Page:** [https://glyph-byt5-v2.github.io/]
+
+
+## Model Description
+
+Please check our [paper](https://arxiv.org/abs/2406.10208) and [project page](https://glyph-byt5-v2.github.io/) for more details. Detail usage and inference code can be found [here](https://github.com/AIGText/Glyph-ByT5).
+
+## Visualization
+
+
+
+## Quick Usage
+
+```
+python inference_v2.py configs/glyph_sdxl_v2_albedo.py checkpoints examples/xiaoman.json --out_folder work_dirs/xiaoman --device cuda --sampler dpm
+```
+
+## More Configurations
+
+We list some more useful configurations for easy usage:
+
+| Argument/Config | Place | Default | Description |
+| ----------------------------- | ---------- | ----------------------------------- | ------------------------------------------------------------ |
+| cfg | argument | 5.0 | Classifier-free guidance |
+| sampler | argument | dpm | Sampler, provide support for dpm (DPM++ 2M Karras) and euler (EulerDiscreteScheduler) |
+| pretrained_model_name_or_path | config | stablediffusionapi/albedobase-xl-20 | Base model |
+| seed | annotation | None | Seed for inference |
+
+
+## Citation
+
+If you find our work useful in your research, please consider citing:
+
+```
+@misc{liu2024glyphbyt5v2,
+ title={Glyph-ByT5-v2: A Strong Aesthetic Baseline for Accurate Multilingual Visual Text Rendering},
+ author={Zeyu Liu and Weicong Liang and Yiming Zhao and Bohan Chen and Ji Li and Yuhui Yuan},
+ year={2024},
+ eprint={2406.10208},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+and
+
+```
+@misc{liu2024glyphbyt5,
+ title={Glyph-ByT5: A Customized Text Encoder for Accurate Visual Text Rendering},
+ author={Zeyu Liu and Weicong Liang and Zhanhao Liang and Chong Luo and Ji Li and Gao Huang and Yuhui Yuan},
+ year={2024},
+ eprint={2403.09622},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/Arial.ttf b/text_encoder/Glyph-SDXL-v2/assets/Arial.ttf
new file mode 100644
index 0000000000000000000000000000000000000000..fa960f48d5545af1391c0fcb24f223e37f22863e
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/Arial.ttf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35c0f3559d8db569e36c31095b8a60d441643d95f59139de40e23fada819b833
+size 275572
diff --git a/text_encoder/Glyph-SDXL-v2/assets/chinese_char.txt b/text_encoder/Glyph-SDXL-v2/assets/chinese_char.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2506e2a28c78a58fd60905d170533bf1caf9103a
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/chinese_char.txt
@@ -0,0 +1,1000 @@
+的
+一
+是
+不
+了
+在
+人
+有
+我
+他
+这
+个
+们
+中
+来
+上
+大
+为
+和
+国
+地
+到
+以
+说
+时
+要
+就
+出
+会
+可
+也
+你
+对
+生
+能
+而
+子
+那
+得
+于
+着
+下
+自
+之
+年
+过
+发
+后
+作
+里
+用
+道
+行
+所
+然
+家
+种
+事
+成
+方
+多
+经
+么
+去
+法
+学
+如
+都
+同
+现
+当
+没
+动
+面
+起
+看
+定
+天
+分
+还
+进
+好
+小
+部
+其
+些
+主
+样
+理
+心
+她
+本
+前
+开
+但
+因
+只
+从
+想
+实
+日
+军
+者
+意
+无
+力
+它
+与
+长
+把
+机
+十
+民
+第
+公
+此
+已
+工
+使
+情
+明
+性
+知
+全
+三
+又
+关
+点
+正
+业
+外
+将
+两
+高
+间
+由
+问
+很
+最
+重
+并
+物
+手
+应
+战
+向
+头
+文
+体
+政
+美
+相
+见
+被
+利
+什
+二
+等
+产
+或
+新
+己
+制
+身
+果
+加
+西
+斯
+月
+话
+合
+回
+特
+代
+内
+信
+表
+化
+老
+给
+世
+位
+次
+度
+门
+任
+常
+先
+海
+通
+教
+儿
+原
+东
+声
+提
+立
+及
+比
+员
+解
+水
+名
+真
+论
+处
+走
+义
+各
+入
+几
+口
+认
+条
+平
+系
+气
+题
+活
+尔
+更
+别
+打
+女
+变
+四
+神
+总
+何
+电
+数
+安
+少
+报
+才
+结
+反
+受
+目
+太
+量
+再
+感
+建
+务
+做
+接
+必
+场
+件
+计
+管
+期
+市
+直
+德
+资
+命
+山
+金
+指
+克
+许
+统
+区
+保
+至
+队
+形
+社
+便
+空
+决
+治
+展
+马
+科
+司
+五
+基
+眼
+书
+非
+则
+听
+白
+却
+界
+达
+光
+放
+强
+即
+像
+难
+且
+权
+思
+王
+象
+完
+设
+式
+色
+路
+记
+南
+品
+住
+告
+类
+求
+据
+程
+北
+边
+死
+张
+该
+交
+规
+万
+取
+拉
+格
+望
+觉
+术
+领
+共
+确
+传
+师
+观
+清
+今
+切
+院
+让
+识
+候
+带
+导
+争
+运
+笑
+飞
+风
+步
+改
+收
+根
+干
+造
+言
+联
+持
+组
+每
+济
+车
+亲
+极
+林
+服
+快
+办
+议
+往
+元
+英
+士
+证
+近
+失
+转
+夫
+令
+准
+布
+始
+怎
+呢
+存
+未
+远
+叫
+台
+单
+影
+具
+罗
+字
+爱
+击
+流
+备
+兵
+连
+调
+深
+商
+算
+质
+团
+集
+百
+需
+价
+花
+党
+华
+城
+石
+级
+整
+府
+离
+况
+亚
+请
+技
+际
+约
+示
+复
+病
+息
+究
+线
+似
+官
+火
+断
+精
+满
+支
+视
+消
+越
+器
+容
+照
+须
+九
+增
+研
+写
+称
+企
+八
+功
+吗
+包
+片
+史
+委
+乎
+查
+轻
+易
+早
+曾
+除
+农
+找
+装
+广
+显
+吧
+阿
+李
+标
+谈
+吃
+图
+念
+六
+引
+历
+首
+医
+局
+突
+专
+费
+号
+尽
+另
+周
+较
+注
+语
+仅
+考
+落
+青
+随
+选
+列
+武
+红
+响
+虽
+推
+势
+参
+希
+古
+众
+构
+房
+半
+节
+土
+投
+某
+案
+黑
+维
+革
+划
+敌
+致
+陈
+律
+足
+态
+护
+七
+兴
+派
+孩
+验
+责
+营
+星
+够
+章
+音
+跟
+志
+底
+站
+严
+巴
+例
+防
+族
+供
+效
+续
+施
+留
+讲
+型
+料
+终
+答
+紧
+黄
+绝
+奇
+察
+母
+京
+段
+依
+批
+群
+项
+故
+按
+河
+米
+围
+江
+织
+害
+斗
+双
+境
+客
+纪
+采
+举
+杀
+攻
+父
+苏
+密
+低
+朝
+友
+诉
+止
+细
+愿
+千
+值
+仍
+男
+钱
+破
+网
+热
+助
+倒
+育
+属
+坐
+帝
+限
+船
+脸
+职
+速
+刻
+乐
+否
+刚
+威
+毛
+状
+率
+甚
+独
+球
+般
+普
+怕
+弹
+校
+苦
+创
+假
+久
+错
+承
+印
+晚
+兰
+试
+股
+拿
+脑
+预
+谁
+益
+阳
+若
+哪
+微
+尼
+继
+送
+急
+血
+惊
+伤
+素
+药
+适
+波
+夜
+省
+初
+喜
+卫
+源
+食
+险
+待
+述
+陆
+习
+置
+居
+劳
+财
+环
+排
+福
+纳
+欢
+雷
+警
+获
+模
+充
+负
+云
+停
+木
+游
+龙
+树
+疑
+层
+冷
+洲
+冲
+射
+略
+范
+竟
+句
+室
+异
+激
+汉
+村
+哈
+策
+演
+简
+卡
+罪
+判
+担
+州
+静
+退
+既
+衣
+您
+宗
+积
+余
+痛
+检
+差
+富
+灵
+协
+角
+占
+配
+征
+修
+皮
+挥
+胜
+降
+阶
+审
+沉
+坚
+善
+妈
+刘
+读
+啊
+超
+免
+压
+银
+买
+皇
+养
+伊
+怀
+执
+副
+乱
+抗
+犯
+追
+帮
+宣
+佛
+岁
+航
+优
+怪
+香
+著
+田
+铁
+控
+税
+左
+右
+份
+穿
+艺
+背
+阵
+草
+脚
+概
+恶
+块
+顿
+敢
+守
+酒
+岛
+托
+央
+户
+烈
+洋
+哥
+索
+胡
+款
+靠
+评
+版
+宝
+座
+释
+景
+顾
+弟
+登
+货
+互
+付
+伯
+慢
+欧
+换
+闻
+危
+忙
+核
+暗
+姐
+介
+坏
+讨
+丽
+良
+序
+升
+监
+临
+亮
+露
+永
+呼
+味
+野
+架
+域
+沙
+掉
+括
+舰
+鱼
+杂
+误
+湾
+吉
+减
+编
+楚
+肯
+测
+败
+屋
+跑
+梦
+散
+温
+困
+剑
+渐
+封
+救
+贵
+枪
+缺
+楼
+县
+尚
+毫
+移
+娘
+朋
+画
+班
+智
+亦
+耳
+恩
+短
+掌
+恐
+遗
+固
+席
+松
+秘
+谢
+鲁
+遇
+康
+虑
+幸
+均
+销
+钟
+诗
+藏
+赶
+剧
+票
+损
+忽
+巨
+炮
+旧
+端
+探
+湖
+录
+叶
+春
+乡
+附
+吸
+予
+礼
+港
+雨
+呀
+板
+庭
+妇
+归
+睛
+饭
+额
+含
+顺
+输
+摇
+招
+婚
+脱
+补
+谓
+督
+毒
+油
+疗
+旅
+泽
+材
+灭
+逐
+莫
+笔
+亡
+鲜
+词
+圣
+择
+寻
+厂
+睡
+博
+勒
+烟
+授
+诺
+伦
+岸
+奥
+唐
+卖
+俄
+炸
+载
+洛
+健
+堂
+旁
+宫
+喝
+借
+君
+禁
+阴
+园
+谋
+宋
+避
+抓
+荣
+姑
+孙
+逃
+牙
+束
+跳
+顶
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/color_idx.json b/text_encoder/Glyph-SDXL-v2/assets/color_idx.json
new file mode 100644
index 0000000000000000000000000000000000000000..8181bd294a73465b691d125ad439c8d3d85ff9fc
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/color_idx.json
@@ -0,0 +1 @@
+{"white": 0, "black": 1, "darkslategray": 2, "dimgray": 3, "darkolivegreen": 4, "midnightblue": 5, "saddlebrown": 6, "sienna": 7, "whitesmoke": 8, "darkslateblue": 9, "indianred": 10, "linen": 11, "maroon": 12, "khaki": 13, "sandybrown": 14, "gray": 15, "gainsboro": 16, "teal": 17, "peru": 18, "gold": 19, "snow": 20, "firebrick": 21, "crimson": 22, "chocolate": 23, "tomato": 24, "brown": 25, "goldenrod": 26, "antiquewhite": 27, "rosybrown": 28, "steelblue": 29, "floralwhite": 30, "seashell": 31, "darkgreen": 32, "oldlace": 33, "darkkhaki": 34, "burlywood": 35, "red": 36, "darkgray": 37, "orange": 38, "royalblue": 39, "seagreen": 40, "lightgray": 41, "tan": 42, "coral": 43, "beige": 44, "palevioletred": 45, "wheat": 46, "lavender": 47, "darkcyan": 48, "slateblue": 49, "slategray": 50, "orangered": 51, "silver": 52, "olivedrab": 53, "forestgreen": 54, "darkgoldenrod": 55, "ivory": 56, "darkorange": 57, "yellow": 58, "hotpink": 59, "ghostwhite": 60, "lightcoral": 61, "indigo": 62, "bisque": 63, "darkred": 64, "darksalmon": 65, "lightslategray": 66, "dodgerblue": 67, "lightpink": 68, "mistyrose": 69, "mediumvioletred": 70, "cadetblue": 71, "deeppink": 72, "salmon": 73, "palegoldenrod": 74, "blanchedalmond": 75, "lightseagreen": 76, "cornflowerblue": 77, "yellowgreen": 78, "greenyellow": 79, "navajowhite": 80, "papayawhip": 81, "mediumslateblue": 82, "purple": 83, "blueviolet": 84, "pink": 85, "cornsilk": 86, "lightsalmon": 87, "mediumpurple": 88, "moccasin": 89, "turquoise": 90, "mediumseagreen": 91, "lavenderblush": 92, "mediumblue": 93, "darkseagreen": 94, "mediumturquoise": 95, "paleturquoise": 96, "skyblue": 97, "lemonchiffon": 98, "olive": 99, "peachpuff": 100, "lightyellow": 101, "lightsteelblue": 102, "mediumorchid": 103, "plum": 104, "darkturquoise": 105, "aliceblue": 106, "mediumaquamarine": 107, "orchid": 108, "powderblue": 109, "blue": 110, "darkorchid": 111, "violet": 112, "lightskyblue": 113, "lightcyan": 114, "lightgoldenrodyellow": 115, "navy": 116, "thistle": 117, "honeydew": 118, "mintcream": 119, "lightblue": 120, "darkblue": 121, "darkmagenta": 122, "deepskyblue": 123, "magenta": 124, "limegreen": 125, "darkviolet": 126, "cyan": 127, "palegreen": 128, "aquamarine": 129, "lawngreen": 130, "lightgreen": 131, "azure": 132, "chartreuse": 133, "green": 134, "mediumspringgreen": 135, "lime": 136, "springgreen": 137}
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/font_idx_512.json b/text_encoder/Glyph-SDXL-v2/assets/font_idx_512.json
new file mode 100644
index 0000000000000000000000000000000000000000..9b40ddd4b31d3235b5b5b9200121f43d0264282f
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/font_idx_512.json
@@ -0,0 +1 @@
+{"Montserrat-Regular": 0, "Poppins-Italic": 1, "GlacialIndifference-Regular": 2, "OpenSans-ExtraBoldItalic": 3, "Montserrat-Bold": 4, "Now-Regular": 5, "Garet-Regular": 6, "LeagueSpartan-Bold": 7, "DMSans-Regular": 8, "OpenSauceOne-Regular": 9, "OpenSans-ExtraBold": 10, "KGPrimaryPenmanship": 11, "Anton-Regular": 12, "Aileron-BlackItalic": 13, "Quicksand-Light": 14, "Roboto-BoldItalic": 15, "TheSeasons-It": 16, "Kollektif": 17, "Inter-BoldItalic": 18, "Poppins-Medium": 19, "Poppins-Light": 20, "RoxboroughCF-RegularItalic": 21, "PlayfairDisplay-SemiBold": 22, "Agrandir-Italic": 23, "Lato-Regular": 24, "MoreSugarRegular": 25, "CanvaSans-RegularItalic": 26, "PublicSans-Italic": 27, "CodePro-NormalLC": 28, "Belleza-Regular": 29, "JosefinSans-Bold": 30, "HKGrotesk-Bold": 31, "Telegraf-Medium": 32, "BrittanySignatureRegular": 33, "Raleway-ExtraBoldItalic": 34, "Mont-RegularItalic": 35, "Arimo-BoldItalic": 36, "Lora-Italic": 37, "ArchivoBlack-Regular": 38, "Poppins": 39, "Barlow-Black": 40, "CormorantGaramond-Bold": 41, "LibreBaskerville-Regular": 42, "CanvaSchoolFontRegular": 43, "BebasNeueBold": 44, "LazydogRegular": 45, "FredokaOne-Regular": 46, "Horizon-Bold": 47, "Nourd-Regular": 48, "Hatton-Regular": 49, "Nunito-ExtraBoldItalic": 50, "CerebriSans-Regular": 51, "Montserrat-Light": 52, "TenorSans": 53, "Norwester-Regular": 54, "ClearSans-Bold": 55, "Cardo-Regular": 56, "Alice-Regular": 57, "Oswald-Regular": 58, "Gaegu-Bold": 59, "Muli-Black": 60, "TAN-PEARL-Regular": 61, "CooperHewitt-Book": 62, "Agrandir-Grand": 63, "BlackMango-Thin": 64, "DMSerifDisplay-Regular": 65, "Antonio-Bold": 66, "Sniglet-Regular": 67, "BeVietnam-Regular": 68, "NunitoSans10pt-BlackItalic": 69, "AbhayaLibre-ExtraBold": 70, "Rubik-Regular": 71, "PPNeueMachina-Regular": 72, "TAN - MON CHERI-Regular": 73, "Jua-Regular": 74, "Playlist-Script": 75, "SourceSansPro-BoldItalic": 76, "MoonTime-Regular": 77, "Eczar-ExtraBold": 78, "Gatwick-Regular": 79, "MonumentExtended-Regular": 80, "BarlowSemiCondensed-Regular": 81, "BarlowCondensed-Regular": 82, "Alegreya-Regular": 83, "DreamAvenue": 84, "RobotoCondensed-Italic": 85, "BobbyJones-Regular": 86, "Garet-ExtraBold": 87, "YesevaOne-Regular": 88, "Dosis-ExtraBold": 89, "LeagueGothic-Regular": 90, "OpenSans-Italic": 91, "TANAEGEAN-Regular": 92, "Maharlika-Regular": 93, "MarykateRegular": 94, "Cinzel-Regular": 95, "Agrandir-Wide": 96, "Chewy-Regular": 97, "BodoniFLF-BoldItalic": 98, "Nunito-BlackItalic": 99, "LilitaOne": 100, "HandyCasualCondensed-Regular": 101, "Ovo": 102, "Livvic-Regular": 103, "Agrandir-Narrow": 104, "CrimsonPro-Italic": 105, "AnonymousPro-Bold": 106, "NF-OneLittleFont-Bold": 107, "RedHatDisplay-BoldItalic": 108, "CodecPro-Regular": 109, "HalimunRegular": 110, "LibreFranklin-Black": 111, "TeXGyreTermes-BoldItalic": 112, "Shrikhand-Regular": 113, "TTNormsPro-Italic": 114, "Gagalin-Regular": 115, "OpenSans-Bold": 116, "GreatVibes-Regular": 117, "Breathing": 118, "HeroLight-Regular": 119, "KGPrimaryDots": 120, "Quicksand-Bold": 121, "Brice-ExtraLightSemiExpanded": 122, "Lato-BoldItalic": 123, "Fraunces9pt-Italic": 124, "AbrilFatface-Regular": 125, "BerkshireSwash-Regular": 126, "Atma-Bold": 127, "HolidayRegular": 128, "BebasNeueCyrillic": 129, "IntroRust-Base": 130, "Gistesy": 131, "BDScript-Regular": 132, "ApricotsRegular": 133, "Prompt-Black": 134, "TAN MERINGUE": 135, "Sukar Regular": 136, "GentySans-Regular": 137, "NeueEinstellung-Normal": 138, "Garet-Bold": 139, "FiraSans-Black": 140, "BantayogLight": 141, "NotoSerifDisplay-Black": 142, "TTChocolates-Regular": 143, "Ubuntu-Regular": 144, "Assistant-Bold": 145, "ABeeZee-Regular": 146, "LexendDeca-Regular": 147, "KingredSerif": 148, "Radley-Regular": 149, "BrownSugar": 150, "MigraItalic-ExtraboldItalic": 151, "ChildosArabic-Regular": 152, "PeaceSans": 153, "LondrinaSolid-Black": 154, "SpaceMono-BoldItalic": 155, "RobotoMono-Light": 156, "CourierPrime-Regular": 157, "Alata-Regular": 158, "Amsterdam-One": 159, "IreneFlorentina-Regular": 160, "CatchyMager": 161, "Alta_regular": 162, "ArticulatCF-Regular": 163, "Raleway-Regular": 164, "BrasikaDisplay": 165, "TANAngleton-Italic": 166, "NotoSerifDisplay-ExtraCondensedItalic": 167, "Bryndan Write": 168, "TTCommonsPro-It": 169, "AlexBrush-Regular": 170, "Antic-Regular": 171, "TTHoves-Bold": 172, "DroidSerif": 173, "AblationRegular": 174, "Marcellus-Regular": 175, "Sanchez-Italic": 176, "JosefinSans": 177, "Afrah-Regular": 178, "PinyonScript": 179, "TTInterphases-BoldItalic": 180, "Yellowtail-Regular": 181, "Gliker-Regular": 182, "BobbyJonesSoft-Regular": 183, "IBMPlexSans": 184, "Amsterdam-Three": 185, "Amsterdam-FourSlant": 186, "TTFors-Regular": 187, "Quattrocento": 188, "Sifonn-Basic": 189, "AlegreyaSans-Black": 190, "Daydream": 191, "AristotelicaProTx-Rg": 192, "NotoSerif": 193, "EBGaramond-Italic": 194, "HammersmithOne-Regular": 195, "RobotoSlab-Regular": 196, "DO-Sans-Regular": 197, "KGPrimaryDotsLined": 198, "Blinker-Regular": 199, "TAN NIMBUS": 200, "Blueberry-Regular": 201, "Rosario-Regular": 202, "Forum": 203, "MistrullyRegular": 204, "SourceSerifPro-Regular": 205, "Bugaki-Regular": 206, "CMUSerif-Roman": 207, "GulfsDisplay-NormalItalic": 208, "PTSans-Bold": 209, "Sensei-Medium": 210, "SquadaOne-Regular": 211, "Arapey-Italic": 212, "Parisienne-Regular": 213, "Aleo-Italic": 214, "QuicheDisplay-Italic": 215, "RocaOne-It": 216, "Funtastic-Regular": 217, "PTSerif-BoldItalic": 218, "Muller-RegularItalic": 219, "ArgentCF-Regular": 220, "Brightwall-Italic": 221, "Knewave-Regular": 222, "TYSerif-D": 223, "Agrandir-Tight": 224, "AlfaSlabOne-Regular": 225, "TANTangkiwood-Display": 226, "Kief-Montaser-Regular": 227, "Gotham-Book": 228, "JuliusSansOne-Regular": 229, "CocoGothic-Italic": 230, "SairaCondensed-Regular": 231, "DellaRespira-Regular": 232, "Questrial-Regular": 233, "BukhariScript-Regular": 234, "HelveticaWorld-Bold": 235, "TANKINDRED-Display": 236, "CinzelDecorative-Regular": 237, "Vidaloka-Regular": 238, "AlegreyaSansSC-Black": 239, "FeelingPassionate-Regular": 240, "QuincyCF-Regular": 241, "FiraCode-Regular": 242, "Genty-Regular": 243, "Nickainley-Normal": 244, "RubikOne-Regular": 245, "Gidole-Regular": 246, "Borsok": 247, "Gordita-RegularItalic": 248, "Scripter-Regular": 249, "Buffalo-Regular": 250, "KleinText-Regular": 251, "Creepster-Regular": 252, "Arvo-Bold": 253, "GabrielSans-NormalItalic": 254, "Heebo-Black": 255, "LexendExa-Regular": 256, "BrixtonSansTC-Regular": 257, "GildaDisplay-Regular": 258, "ChunkFive-Roman": 259, "Amaranth-BoldItalic": 260, "BubbleboddyNeue-Regular": 261, "MavenPro-Bold": 262, "TTDrugs-Italic": 263, "CyGrotesk-KeyRegular": 264, "VarelaRound-Regular": 265, "Ruda-Black": 266, "SafiraMarch": 267, "BloggerSans": 268, "TANHEADLINE-Regular": 269, "SloopScriptPro-Regular": 270, "NeueMontreal-Regular": 271, "Schoolbell-Regular": 272, "SigherRegular": 273, "InriaSerif-Regular": 274, "JetBrainsMono-Regular": 275, "MADEEvolveSans": 276, "Dekko": 277, "Handyman-Regular": 278, "Aileron-BoldItalic": 279, "Bright-Italic": 280, "Solway-Regular": 281, "Higuen-Regular": 282, "WedgesItalic": 283, "TANASHFORD-BOLD": 284, "IBMPlexMono": 285, "RacingSansOne-Regular": 286, "RegularBrush": 287, "OpenSans-LightItalic": 288, "SpecialElite-Regular": 289, "FuturaLTPro-Medium": 290, "MaragsaDisplay": 291, "BigShouldersDisplay-Regular": 292, "BDSans-Regular": 293, "RasputinRegular": 294, "Yvesyvesdrawing-BoldItalic": 295, "Bitter-Regular": 296, "LuckiestGuy-Regular": 297, "CanvaSchoolFontDotted": 298, "TTFirsNeue-Italic": 299, "Sunday-Regular": 300, "HKGothic-MediumItalic": 301, "CaveatBrush-Regular": 302, "HeliosExt": 303, "ArchitectsDaughter-Regular": 304, "Angelina": 305, "Calistoga-Regular": 306, "ArchivoNarrow-Regular": 307, "ObjectSans-MediumSlanted": 308, "AyrLucidityCondensed-Regular": 309, "Nexa-RegularItalic": 310, "Lustria-Regular": 311, "Amsterdam-TwoSlant": 312, "Virtual-Regular": 313, "Brusher-Regular": 314, "NF-Lepetitcochon-Regular": 315, "TANTWINKLE": 316, "LeJour-Serif": 317, "Prata-Regular": 318, "PPWoodland-Regular": 319, "PlayfairDisplay-BoldItalic": 320, "AmaticSC-Regular": 321, "Cabin-Regular": 322, "Manjari-Bold": 323, "MrDafoe-Regular": 324, "TTRamillas-Italic": 325, "Luckybones-Bold": 326, "DarkerGrotesque-Light": 327, "BellabooRegular": 328, "CormorantSC-Bold": 329, "GochiHand-Regular": 330, "Atteron": 331, "RocaTwo-Lt": 332, "ZCOOLXiaoWei-Regular": 333, "TANSONGBIRD": 334, "HeadingNow-74Regular": 335, "Luthier-BoldItalic": 336, "Oregano-Regular": 337, "AyrTropikaIsland-Int": 338, "Mali-Regular": 339, "DidactGothic-Regular": 340, "Lovelace-Regular": 341, "BakerieSmooth-Regular": 342, "CarterOne": 343, "HussarBd": 344, "OldStandard-Italic": 345, "TAN-ASTORIA-Display": 346, "rugratssans-Regular": 347, "BMHANNA": 348, "BetterSaturday": 349, "AdigianaToybox": 350, "Sailors": 351, "PlayfairDisplaySC-Italic": 352, "Etna-Regular": 353, "Revive80Signature": 354, "CAGenerated": 355, "Poppins-Regular": 356, "Jonathan-Regular": 357, "Pacifico-Regular": 358, "Saira-Black": 359, "Loubag-Regular": 360, "Decalotype-Black": 361, "Mansalva-Regular": 362, "Allura-Regular": 363, "ProximaNova-Bold": 364, "TANMIGNON-DISPLAY": 365, "ArsenicaAntiqua-Regular": 366, "BreulGroteskA-RegularItalic": 367, "HKModular-Bold": 368, "TANNightingale-Regular": 369, "AristotelicaProCndTxt-Rg": 370, "Aprila-Regular": 371, "Tomorrow-Regular": 372, "AngellaWhite": 373, "KaushanScript-Regular": 374, "NotoSans": 375, "LeJour-Script": 376, "BrixtonTC-Regular": 377, "OleoScript-Regular": 378, "Cakerolli-Regular": 379, "Lobster-Regular": 380, "FrunchySerif-Regular": 381, "PorcelainRegular": 382, "AlojaExtended": 383, "SergioTrendy-Italic": 384, "LovelaceText-Bold": 385, "Anaktoria": 386, "JimmyScript-Light": 387, "IBMPlexSerif": 388, "Marta": 389, "Mango-Regular": 390, "Overpass-Italic": 391, "Hagrid-Regular": 392, "ElikaGorica": 393, "Amiko-Regular": 394, "EFCOBrookshire-Regular": 395, "Caladea-Regular": 396, "MoonlightBold": 397, "Staatliches-Regular": 398, "Helios-Bold": 399, "Satisfy-Regular": 400, "NexaScript-Regular": 401, "Trocchi-Regular": 402, "March": 403, "IbarraRealNova-Regular": 404, "Nectarine-Regular": 405, "Overpass-Light": 406, "TruetypewriterPolyglOTT": 407, "Bangers-Regular": 408, "Lazord-BoldExpandedItalic": 409, "Chloe-Regular": 410, "BaskervilleDisplayPT-Regular": 411, "Bright-Regular": 412, "Vollkorn-Regular": 413, "Harmattan": 414, "SortsMillGoudy-Regular": 415, "Biryani-Bold": 416, "SugoProDisplay-Italic": 417, "Lazord-BoldItalic": 418, "Alike-Regular": 419, "PermanentMarker-Regular": 420, "Sacramento-Regular": 421, "HKGroteskPro-Italic": 422, "Aleo-BoldItalic": 423, "Noot": 424, "TANGARLAND-Regular": 425, "Twister": 426, "Arsenal-Italic": 427, "Bogart-Italic": 428, "BethEllen-Regular": 429, "Caveat-Regular": 430, "BalsamiqSans-Bold": 431, "BreeSerif-Regular": 432, "CodecPro-ExtraBold": 433, "Pierson-Light": 434, "CyGrotesk-WideRegular": 435, "Lumios-Marker": 436, "Comfortaa-Bold": 437, "TraceFontRegular": 438, "RTL-AdamScript-Regular": 439, "EastmanGrotesque-Italic": 440, "Kalam-Bold": 441, "ChauPhilomeneOne-Regular": 442, "Coiny-Regular": 443, "Lovera": 444, "Gellatio": 445, "TitilliumWeb-Bold": 446, "OilvareBase-Italic": 447, "Catamaran-Black": 448, "Anteb-Italic": 449, "SueEllenFrancisco": 450, "SweetApricot": 451, "BrightSunshine": 452, "IM_FELL_Double_Pica_Italic": 453, "Granaina-limpia": 454, "TANPARFAIT": 455, "AcherusGrotesque-Regular": 456, "AwesomeLathusca-Italic": 457, "Signika-Bold": 458, "Andasia": 459, "DO-AllCaps-Slanted": 460, "Zenaida-Regular": 461, "Fahkwang-Regular": 462, "Play-Regular": 463, "BERNIERRegular-Regular": 464, "PlumaThin-Regular": 465, "SportsWorld": 466, "Garet-Black": 467, "CarolloPlayscript-BlackItalic": 468, "Cheque-Regular": 469, "SEGO": 470, "BobbyJones-Condensed": 471, "NexaSlab-RegularItalic": 472, "DancingScript-Regular": 473, "PaalalabasDisplayWideBETA": 474, "Magnolia-Script": 475, "OpunMai-400It": 476, "MadelynFill-Regular": 477, "ZingRust-Base": 478, "FingerPaint-Regular": 479, "BostonAngel-Light": 480, "Gliker-RegularExpanded": 481, "Ahsing": 482, "Engagement-Regular": 483, "EyesomeScript": 484, "LibraSerifModern-Regular": 485, "London-Regular": 486, "AtkinsonHyperlegible-Regular": 487, "StadioNow-TextItalic": 488, "Aniyah": 489, "ITCAvantGardePro-Bold": 490, "Comica-Regular": 491, "Coustard-Regular": 492, "Brice-BoldCondensed": 493, "TANNEWYORK-Bold": 494, "TANBUSTER-Bold": 495, "Alatsi-Regular": 496, "TYSerif-Book": 497, "Jingleberry": 498, "Rajdhani-Bold": 499, "LobsterTwo-BoldItalic": 500, "BestLight-Medium": 501, "Hitchcut-Regular": 502, "GermaniaOne-Regular": 503, "Emitha-Script": 504, "LemonTuesday": 505, "Cubao_Free_Regular": 506, "MonterchiSerif-Regular": 507, "AllertaStencil-Regular": 508, "RTL-Sondos-Regular": 509, "HomemadeApple-Regular": 510, "CosmicOcto-Medium": 511}
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/cn.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/cn.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a357682d54ff5065c7ca13ca82eb5c542bbf796
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/cn.json
@@ -0,0 +1 @@
+[["cn-HelloFont-FangHuaTi", 0], ["cn-HelloFont-ID-DianFangSong-Bold", 1], ["cn-HelloFont-ID-DianFangSong", 2], ["cn-HelloFont-ID-DianHei-CEJ", 3], ["cn-HelloFont-ID-DianHei-DEJ", 4], ["cn-HelloFont-ID-DianHei-EEJ", 5], ["cn-HelloFont-ID-DianHei-FEJ", 6], ["cn-HelloFont-ID-DianHei-GEJ", 7], ["cn-HelloFont-ID-DianKai-Bold", 8], ["cn-HelloFont-ID-DianKai", 9], ["cn-HelloFont-WenYiHei", 10], ["cn-Hellofont-ID-ChenYanXingKai", 11], ["cn-Hellofont-ID-DaZiBao", 12], ["cn-Hellofont-ID-DaoCaoRen", 13], ["cn-Hellofont-ID-JianSong", 14], ["cn-Hellofont-ID-JiangHuZhaoPaiHei", 15], ["cn-Hellofont-ID-KeSong", 16], ["cn-Hellofont-ID-LeYuanTi", 17], ["cn-Hellofont-ID-Pinocchio", 18], ["cn-Hellofont-ID-QiMiaoTi", 19], ["cn-Hellofont-ID-QingHuaKai", 20], ["cn-Hellofont-ID-QingHuaXingKai", 21], ["cn-Hellofont-ID-ShanShuiXingKai", 22], ["cn-Hellofont-ID-ShouXieQiShu", 23], ["cn-Hellofont-ID-ShouXieTongZhenTi", 24], ["cn-Hellofont-ID-TengLingTi", 25], ["cn-Hellofont-ID-XiaoLiShu", 26], ["cn-Hellofont-ID-XuanZhenSong", 27], ["cn-Hellofont-ID-ZhongLingXingKai", 28], ["cn-HellofontIDJiaoTangTi", 29], ["cn-HellofontIDJiuZhuTi", 30], ["cn-HuXiaoBao-SaoBao", 31], ["cn-HuXiaoBo-NanShen", 32], ["cn-HuXiaoBo-ZhenShuai", 33], ["cn-SourceHanSansSC-Bold", 34], ["cn-SourceHanSansSC-ExtraLight", 35], ["cn-SourceHanSansSC-Heavy", 36], ["cn-SourceHanSansSC-Light", 37], ["cn-SourceHanSansSC-Medium", 38], ["cn-SourceHanSansSC-Normal", 39], ["cn-SourceHanSansSC-Regular", 40], ["cn-SourceHanSerifSC-Bold", 41], ["cn-SourceHanSerifSC-ExtraLight", 42], ["cn-SourceHanSerifSC-Heavy", 43], ["cn-SourceHanSerifSC-Light", 44], ["cn-SourceHanSerifSC-Medium", 45], ["cn-SourceHanSerifSC-Regular", 46], ["cn-SourceHanSerifSC-SemiBold", 47], ["cn-xiaowei", 48], ["cn-AaJianHaoTi", 49], ["cn-AlibabaPuHuiTi-Bold", 50], ["cn-AlibabaPuHuiTi-Heavy", 51], ["cn-AlibabaPuHuiTi-Light", 52], ["cn-AlibabaPuHuiTi-Medium", 53], ["cn-AlibabaPuHuiTi-Regular", 54], ["cn-CanvaAcidBoldSC", 55], ["cn-CanvaBreezeCN", 56], ["cn-CanvaBumperCropSC", 57], ["cn-CanvaCakeShopCN", 58], ["cn-CanvaEndeavorBlackSC", 59], ["cn-CanvaJoyHeiCN", 60], ["cn-CanvaLiCN", 61], ["cn-CanvaOrientalBrushCN", 62], ["cn-CanvaPoster", 63], ["cn-CanvaQinfuCalligraphyCN", 64], ["cn-CanvaSweetHeartCN", 65], ["cn-CanvaSwordLikeDreamCN", 66], ["cn-CanvaTangyuanHandwritingCN", 67], ["cn-CanvaWanderWorldCN", 68], ["cn-CanvaWenCN", 69], ["cn-DianZiChunYi", 70], ["cn-GenSekiGothicTW-H", 71], ["cn-GenWanMinTW-L", 72], ["cn-GenYoMinTW-B", 73], ["cn-GenYoMinTW-EL", 74], ["cn-GenYoMinTW-H", 75], ["cn-GenYoMinTW-M", 76], ["cn-GenYoMinTW-R", 77], ["cn-GenYoMinTW-SB", 78], ["cn-HYQiHei-AZEJ", 79], ["cn-HYQiHei-EES", 80], ["cn-HanaMinA", 81], ["cn-HappyZcool-2016", 82], ["cn-HelloFont ZJ KeKouKeAiTi", 83], ["cn-HelloFont-ID-BoBoTi", 84], ["cn-HelloFont-ID-FuGuHei-25", 85], ["cn-HelloFont-ID-FuGuHei-35", 86], ["cn-HelloFont-ID-FuGuHei-45", 87], ["cn-HelloFont-ID-FuGuHei-55", 88], ["cn-HelloFont-ID-FuGuHei-65", 89], ["cn-HelloFont-ID-FuGuHei-75", 90], ["cn-HelloFont-ID-FuGuHei-85", 91], ["cn-HelloFont-ID-HeiKa", 92], ["cn-HelloFont-ID-HeiTang", 93], ["cn-HelloFont-ID-JianSong-95", 94], ["cn-HelloFont-ID-JueJiangHei-50", 95], ["cn-HelloFont-ID-JueJiangHei-55", 96], ["cn-HelloFont-ID-JueJiangHei-60", 97], ["cn-HelloFont-ID-JueJiangHei-65", 98], ["cn-HelloFont-ID-JueJiangHei-70", 99], ["cn-HelloFont-ID-JueJiangHei-75", 100], ["cn-HelloFont-ID-JueJiangHei-80", 101], ["cn-HelloFont-ID-KuHeiTi", 102], ["cn-HelloFont-ID-LingDongTi", 103], ["cn-HelloFont-ID-LingLiTi", 104], ["cn-HelloFont-ID-MuFengTi", 105], ["cn-HelloFont-ID-NaiNaiJiangTi", 106], ["cn-HelloFont-ID-PangDu", 107], ["cn-HelloFont-ID-ReLieTi", 108], ["cn-HelloFont-ID-RouRun", 109], ["cn-HelloFont-ID-SaShuangShouXieTi", 110], ["cn-HelloFont-ID-WangZheFengFan", 111], ["cn-HelloFont-ID-YouQiTi", 112], ["cn-Hellofont-ID-XiaLeTi", 113], ["cn-Hellofont-ID-XianXiaTi", 114], ["cn-HuXiaoBoKuHei", 115], ["cn-IDDanMoXingKai", 116], ["cn-IDJueJiangHei", 117], ["cn-IDMeiLingTi", 118], ["cn-IDQQSugar", 119], ["cn-LiuJianMaoCao-Regular", 120], ["cn-LongCang-Regular", 121], ["cn-MaShanZheng-Regular", 122], ["cn-PangMenZhengDao-3", 123], ["cn-PangMenZhengDao-Cu", 124], ["cn-PangMenZhengDao", 125], ["cn-SentyCaramel", 126], ["cn-SourceHanSerifSC", 127], ["cn-WenCang-Regular", 128], ["cn-WenQuanYiMicroHei", 129], ["cn-XianErTi", 130], ["cn-YRDZSTJF", 131], ["cn-YS-HelloFont-BangBangTi", 132], ["cn-ZCOOLKuaiLe-Regular", 133], ["cn-ZCOOLQingKeHuangYou-Regular", 134], ["cn-ZCOOLXiaoWei-Regular", 135], ["cn-ZCOOL_KuHei", 136], ["cn-ZhiMangXing-Regular", 137], ["cn-baotuxiaobaiti", 138], ["cn-jiangxizhuokai-Regular", 139], ["cn-zcool-gdh", 140], ["cn-zcoolqingkehuangyouti-Regular", 141], ["cn-zcoolwenyiti", 142]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/de.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/de.json
new file mode 100644
index 0000000000000000000000000000000000000000..f231d0d5b2f6b95e4455303cec0fc5bf9f1b3d3f
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/de.json
@@ -0,0 +1 @@
+[["en-Montserrat-Regular", 0], ["en-Poppins-Italic", 1], ["en-GlacialIndifference-Regular", 2], ["en-OpenSans-ExtraBoldItalic", 3], ["en-Montserrat-Bold", 4], ["en-Now-Regular", 5], ["en-Garet-Regular", 6], ["en-LeagueSpartan-Bold", 7], ["en-DMSans-Regular", 8], ["en-OpenSauceOne-Regular", 9], ["en-OpenSans-ExtraBold", 10], ["en-KGPrimaryPenmanship", 11], ["en-Anton-Regular", 12], ["en-Aileron-BlackItalic", 13], ["en-Quicksand-Light", 14], ["en-Roboto-BoldItalic", 15], ["en-TheSeasons-It", 16], ["en-Kollektif", 17], ["en-Inter-BoldItalic", 18], ["en-Poppins-Medium", 19], ["en-Poppins-Light", 20], ["en-RoxboroughCF-RegularItalic", 21], ["en-PlayfairDisplay-SemiBold", 22], ["en-Agrandir-Italic", 23], ["en-Lato-Regular", 24], ["en-MoreSugarRegular", 25], ["en-CanvaSans-RegularItalic", 26], ["en-PublicSans-Italic", 27], ["en-CodePro-NormalLC", 28], ["en-Belleza-Regular", 29], ["en-JosefinSans-Bold", 30], ["en-HKGrotesk-Bold", 31], ["en-Telegraf-Medium", 32], ["en-BrittanySignatureRegular", 33], ["en-Raleway-ExtraBoldItalic", 34], ["en-Mont-RegularItalic", 35], ["en-Arimo-BoldItalic", 36], ["en-Lora-Italic", 37], ["en-ArchivoBlack-Regular", 38], ["en-Poppins", 39], ["en-Barlow-Black", 40], ["en-CormorantGaramond-Bold", 41], ["en-LibreBaskerville-Regular", 42], ["en-LazydogRegular", 45], ["en-FredokaOne-Regular", 46], ["en-Horizon-Bold", 47], ["en-Nourd-Regular", 48], ["en-Hatton-Regular", 49], ["en-Nunito-ExtraBoldItalic", 50], ["en-CerebriSans-Regular", 51], ["en-Montserrat-Light", 52], ["en-TenorSans", 53], ["en-ClearSans-Bold", 55], ["en-Cardo-Regular", 56], ["en-Alice-Regular", 57], ["en-Oswald-Regular", 58], ["en-Muli-Black", 60], ["en-TAN-PEARL-Regular", 61], ["en-CooperHewitt-Book", 62], ["en-Agrandir-Grand", 63], ["en-BlackMango-Thin", 64], ["en-DMSerifDisplay-Regular", 65], ["en-Antonio-Bold", 66], ["en-Sniglet-Regular", 67], ["en-BeVietnam-Regular", 68], ["en-NunitoSans10pt-BlackItalic", 69], ["en-AbhayaLibre-ExtraBold", 70], ["en-Rubik-Regular", 71], ["en-PPNeueMachina-Regular", 72], ["en-TAN - MON CHERI-Regular", 73], ["en-SourceSansPro-BoldItalic", 76], ["en-MoonTime-Regular", 77], ["en-Eczar-ExtraBold", 78], ["en-Gatwick-Regular", 79], ["en-MonumentExtended-Regular", 80], ["en-BarlowSemiCondensed-Regular", 81], ["en-BarlowCondensed-Regular", 82], ["en-Alegreya-Regular", 83], ["en-DreamAvenue", 84], ["en-RobotoCondensed-Italic", 85], ["en-BobbyJones-Regular", 86], ["en-Garet-ExtraBold", 87], ["en-YesevaOne-Regular", 88], ["en-Dosis-ExtraBold", 89], ["en-LeagueGothic-Regular", 90], ["en-OpenSans-Italic", 91], ["en-TANAEGEAN-Regular", 92], ["en-Maharlika-Regular", 93], ["en-Agrandir-Wide", 96], ["en-Chewy-Regular", 97], ["en-BodoniFLF-BoldItalic", 98], ["en-Nunito-BlackItalic", 99], ["en-LilitaOne", 100], ["en-HandyCasualCondensed-Regular", 101], ["en-Ovo", 102], ["en-Livvic-Regular", 103], ["en-Agrandir-Narrow", 104], ["en-CrimsonPro-Italic", 105], ["en-AnonymousPro-Bold", 106], ["en-NF-OneLittleFont-Bold", 107], ["en-RedHatDisplay-BoldItalic", 108], ["en-CodecPro-Regular", 109], ["en-HalimunRegular", 110], ["en-LibreFranklin-Black", 111], ["en-TeXGyreTermes-BoldItalic", 112], ["en-Shrikhand-Regular", 113], ["en-TTNormsPro-Italic", 114], ["en-OpenSans-Bold", 116], ["en-GreatVibes-Regular", 117], ["en-Breathing", 118], ["en-HeroLight-Regular", 119], ["en-KGPrimaryDots", 120], ["en-Quicksand-Bold", 121], ["en-Brice-ExtraLightSemiExpanded", 122], ["en-Lato-BoldItalic", 123], ["en-Fraunces9pt-Italic", 124], ["en-AbrilFatface-Regular", 125], ["en-BerkshireSwash-Regular", 126], ["en-Atma-Bold", 127], ["en-HolidayRegular", 128], ["en-Gistesy", 131], ["en-BDScript-Regular", 132], ["en-Prompt-Black", 134], ["en-TAN MERINGUE", 135], ["en-GentySans-Regular", 137], ["en-NeueEinstellung-Normal", 138], ["en-Garet-Bold", 139], ["en-FiraSans-Black", 140], ["en-BantayogLight", 141], ["en-NotoSerifDisplay-Black", 142], ["en-TTChocolates-Regular", 143], ["en-Ubuntu-Regular", 144], ["en-Assistant-Bold", 145], ["en-ABeeZee-Regular", 146], ["en-LexendDeca-Regular", 147], ["en-KingredSerif", 148], ["en-Radley-Regular", 149], ["en-BrownSugar", 150], ["en-MigraItalic-ExtraboldItalic", 151], ["en-ChildosArabic-Regular", 152], ["en-PeaceSans", 153], ["en-LondrinaSolid-Black", 154], ["en-SpaceMono-BoldItalic", 155], ["en-RobotoMono-Light", 156], ["en-CourierPrime-Regular", 157], ["en-Alata-Regular", 158], ["en-Amsterdam-One", 159], ["en-IreneFlorentina-Regular", 160], ["en-CatchyMager", 161], ["en-Alta_regular", 162], ["en-ArticulatCF-Regular", 163], ["en-Raleway-Regular", 164], ["en-BrasikaDisplay", 165], ["en-TANAngleton-Italic", 166], ["en-NotoSerifDisplay-ExtraCondensedItalic", 167], ["en-Bryndan Write", 168], ["en-TTCommonsPro-It", 169], ["en-AlexBrush-Regular", 170], ["en-Antic-Regular", 171], ["en-TTHoves-Bold", 172], ["en-DroidSerif", 173], ["en-Marcellus-Regular", 175], ["en-Sanchez-Italic", 176], ["en-JosefinSans", 177], ["en-Afrah-Regular", 178], ["en-PinyonScript", 179], ["en-TTInterphases-BoldItalic", 180], ["en-Yellowtail-Regular", 181], ["en-Gliker-Regular", 182], ["en-BobbyJonesSoft-Regular", 183], ["en-IBMPlexSans", 184], ["en-Amsterdam-Three", 185], ["en-Amsterdam-FourSlant", 186], ["en-TTFors-Regular", 187], ["en-Quattrocento", 188], ["en-Sifonn-Basic", 189], ["en-AlegreyaSans-Black", 190], ["en-Daydream", 191], ["en-AristotelicaProTx-Rg", 192], ["en-NotoSerif", 193], ["en-EBGaramond-Italic", 194], ["en-HammersmithOne-Regular", 195], ["en-RobotoSlab-Regular", 196], ["en-KGPrimaryDotsLined", 198], ["en-Blinker-Regular", 199], ["en-TAN NIMBUS", 200], ["en-Blueberry-Regular", 201], ["en-Rosario-Regular", 202], ["en-Forum", 203], ["en-MistrullyRegular", 204], ["en-SourceSerifPro-Regular", 205], ["en-Bugaki-Regular", 206], ["en-CMUSerif-Roman", 207], ["en-GulfsDisplay-NormalItalic", 208], ["en-PTSans-Bold", 209], ["en-Sensei-Medium", 210], ["en-SquadaOne-Regular", 211], ["en-Arapey-Italic", 212], ["en-Parisienne-Regular", 213], ["en-Aleo-Italic", 214], ["en-QuicheDisplay-Italic", 215], ["en-RocaOne-It", 216], ["en-Funtastic-Regular", 217], ["en-PTSerif-BoldItalic", 218], ["en-Muller-RegularItalic", 219], ["en-ArgentCF-Regular", 220], ["en-Brightwall-Italic", 221], ["en-Knewave-Regular", 222], ["en-TYSerif-D", 223], ["en-Agrandir-Tight", 224], ["en-AlfaSlabOne-Regular", 225], ["en-TANTangkiwood-Display", 226], ["en-Kief-Montaser-Regular", 227], ["en-Gotham-Book", 228], ["en-CocoGothic-Italic", 230], ["en-SairaCondensed-Regular", 231], ["en-DellaRespira-Regular", 232], ["en-Questrial-Regular", 233], ["en-BukhariScript-Regular", 234], ["en-HelveticaWorld-Bold", 235], ["en-TANKINDRED-Display", 236], ["en-Vidaloka-Regular", 238], ["en-AlegreyaSansSC-Black", 239], ["en-FeelingPassionate-Regular", 240], ["en-QuincyCF-Regular", 241], ["en-FiraCode-Regular", 242], ["en-Genty-Regular", 243], ["en-Nickainley-Normal", 244], ["en-RubikOne-Regular", 245], ["en-Gidole-Regular", 246], ["en-Gordita-RegularItalic", 248], ["en-Scripter-Regular", 249], ["en-Buffalo-Regular", 250], ["en-KleinText-Regular", 251], ["en-Arvo-Bold", 253], ["en-GabrielSans-NormalItalic", 254], ["en-Heebo-Black", 255], ["en-LexendExa-Regular", 256], ["en-BrixtonSansTC-Regular", 257], ["en-GildaDisplay-Regular", 258], ["en-Amaranth-BoldItalic", 260], ["en-BubbleboddyNeue-Regular", 261], ["en-MavenPro-Bold", 262], ["en-TTDrugs-Italic", 263], ["en-CyGrotesk-KeyRegular", 264], ["en-VarelaRound-Regular", 265], ["en-Ruda-Black", 266], ["en-SafiraMarch", 267], ["en-BloggerSans", 268], ["en-TANHEADLINE-Regular", 269], ["en-SloopScriptPro-Regular", 270], ["en-NeueMontreal-Regular", 271], ["en-Schoolbell-Regular", 272], ["en-InriaSerif-Regular", 274], ["en-JetBrainsMono-Regular", 275], ["en-MADEEvolveSans", 276], ["en-Dekko", 277], ["en-Handyman-Regular", 278], ["en-Aileron-BoldItalic", 279], ["en-Solway-Regular", 281], ["en-Higuen-Regular", 282], ["en-WedgesItalic", 283], ["en-TANASHFORD-BOLD", 284], ["en-IBMPlexMono", 285], ["en-RacingSansOne-Regular", 286], ["en-RegularBrush", 287], ["en-OpenSans-LightItalic", 288], ["en-SpecialElite-Regular", 289], ["en-FuturaLTPro-Medium", 290], ["en-MaragsaDisplay", 291], ["en-BigShouldersDisplay-Regular", 292], ["en-BDSans-Regular", 293], ["en-RasputinRegular", 294], ["en-Yvesyvesdrawing-BoldItalic", 295], ["en-Bitter-Regular", 296], ["en-TTFirsNeue-Italic", 299], ["en-Sunday-Regular", 300], ["en-HKGothic-MediumItalic", 301], ["en-CaveatBrush-Regular", 302], ["en-ArchitectsDaughter-Regular", 304], ["en-Angelina", 305], ["en-Calistoga-Regular", 306], ["en-ArchivoNarrow-Regular", 307], ["en-ObjectSans-MediumSlanted", 308], ["en-Nexa-RegularItalic", 310], ["en-Lustria-Regular", 311], ["en-Amsterdam-TwoSlant", 312], ["en-Virtual-Regular", 313], ["en-NF-Lepetitcochon-Regular", 315], ["en-LeJour-Serif", 317], ["en-Prata-Regular", 318], ["en-PPWoodland-Regular", 319], ["en-PlayfairDisplay-BoldItalic", 320], ["en-AmaticSC-Regular", 321], ["en-Cabin-Regular", 322], ["en-MrDafoe-Regular", 324], ["en-TTRamillas-Italic", 325], ["en-Luckybones-Bold", 326], ["en-DarkerGrotesque-Light", 327], ["en-CormorantSC-Bold", 329], ["en-GochiHand-Regular", 330], ["en-Atteron", 331], ["en-RocaTwo-Lt", 332], ["en-TANSONGBIRD", 334], ["en-HeadingNow-74Regular", 335], ["en-Luthier-BoldItalic", 336], ["en-Oregano-Regular", 337], ["en-AyrTropikaIsland-Int", 338], ["en-Mali-Regular", 339], ["en-DidactGothic-Regular", 340], ["en-Lovelace-Regular", 341], ["en-BakerieSmooth-Regular", 342], ["en-CarterOne", 343], ["en-HussarBd", 344], ["en-OldStandard-Italic", 345], ["en-TAN-ASTORIA-Display", 346], ["en-rugratssans-Regular", 347], ["en-BetterSaturday", 349], ["en-AdigianaToybox", 350], ["en-Sailors", 351], ["en-PlayfairDisplaySC-Italic", 352], ["en-Etna-Regular", 353], ["en-Revive80Signature", 354], ["en-CAGenerated", 355], ["en-Poppins-Regular", 356], ["en-Jonathan-Regular", 357], ["en-Pacifico-Regular", 358], ["en-Saira-Black", 359], ["en-Loubag-Regular", 360], ["en-Decalotype-Black", 361], ["en-Mansalva-Regular", 362], ["en-Allura-Regular", 363], ["en-ProximaNova-Bold", 364], ["en-TANMIGNON-DISPLAY", 365], ["en-ArsenicaAntiqua-Regular", 366], ["en-BreulGroteskA-RegularItalic", 367], ["en-HKModular-Bold", 368], ["en-TANNightingale-Regular", 369], ["en-AristotelicaProCndTxt-Rg", 370], ["en-Aprila-Regular", 371], ["en-Tomorrow-Regular", 372], ["en-AngellaWhite", 373], ["en-KaushanScript-Regular", 374], ["en-NotoSans", 375], ["en-LeJour-Script", 376], ["en-BrixtonTC-Regular", 377], ["en-OleoScript-Regular", 378], ["en-Cakerolli-Regular", 379], ["en-Lobster-Regular", 380], ["en-FrunchySerif-Regular", 381], ["en-PorcelainRegular", 382], ["en-AlojaExtended", 383], ["en-SergioTrendy-Italic", 384], ["en-LovelaceText-Bold", 385], ["en-Anaktoria", 386], ["en-JimmyScript-Light", 387], ["en-IBMPlexSerif", 388], ["en-Marta", 389], ["en-Mango-Regular", 390], ["en-Overpass-Italic", 391], ["en-Hagrid-Regular", 392], ["en-ElikaGorica", 393], ["en-Amiko-Regular", 394], ["en-EFCOBrookshire-Regular", 395], ["en-Caladea-Regular", 396], ["en-Staatliches-Regular", 398], ["en-Helios-Bold", 399], ["en-Satisfy-Regular", 400], ["en-NexaScript-Regular", 401], ["en-Trocchi-Regular", 402], ["en-March", 403], ["en-IbarraRealNova-Regular", 404], ["en-Nectarine-Regular", 405], ["en-Overpass-Light", 406], ["en-TruetypewriterPolyglOTT", 407], ["en-Bangers-Regular", 408], ["en-Lazord-BoldExpandedItalic", 409], ["en-Chloe-Regular", 410], ["en-BaskervilleDisplayPT-Regular", 411], ["en-Bright-Regular", 412], ["en-Vollkorn-Regular", 413], ["en-Harmattan", 414], ["en-SortsMillGoudy-Regular", 415], ["en-Biryani-Bold", 416], ["en-SugoProDisplay-Italic", 417], ["en-Lazord-BoldItalic", 418], ["en-Alike-Regular", 419], ["en-Sacramento-Regular", 421], ["en-HKGroteskPro-Italic", 422], ["en-Aleo-BoldItalic", 423], ["en-TANGARLAND-Regular", 425], ["en-Twister", 426], ["en-Arsenal-Italic", 427], ["en-Bogart-Italic", 428], ["en-BethEllen-Regular", 429], ["en-Caveat-Regular", 430], ["en-BalsamiqSans-Bold", 431], ["en-BreeSerif-Regular", 432], ["en-CodecPro-ExtraBold", 433], ["en-Pierson-Light", 434], ["en-CyGrotesk-WideRegular", 435], ["en-Lumios-Marker", 436], ["en-Comfortaa-Bold", 437], ["en-RTL-AdamScript-Regular", 439], ["en-EastmanGrotesque-Italic", 440], ["en-Kalam-Bold", 441], ["en-ChauPhilomeneOne-Regular", 442], ["en-Coiny-Regular", 443], ["en-Lovera", 444], ["en-Gellatio", 445], ["en-TitilliumWeb-Bold", 446], ["en-OilvareBase-Italic", 447], ["en-Catamaran-Black", 448], ["en-Anteb-Italic", 449], ["en-SueEllenFrancisco", 450], ["en-SweetApricot", 451], ["en-BrightSunshine", 452], ["en-IM_FELL_Double_Pica_Italic", 453], ["en-Granaina-limpia", 454], ["en-TANPARFAIT", 455], ["en-AcherusGrotesque-Regular", 456], ["en-AwesomeLathusca-Italic", 457], ["en-Signika-Bold", 458], ["en-Andasia", 459], ["en-DO-AllCaps-Slanted", 460], ["en-Zenaida-Regular", 461], ["en-Fahkwang-Regular", 462], ["en-Play-Regular", 463], ["en-PlumaThin-Regular", 465], ["en-SportsWorld", 466], ["en-Garet-Black", 467], ["en-CarolloPlayscript-BlackItalic", 468], ["en-SEGO", 470], ["en-BobbyJones-Condensed", 471], ["en-NexaSlab-RegularItalic", 472], ["en-DancingScript-Regular", 473], ["en-Magnolia-Script", 475], ["en-OpunMai-400It", 476], ["en-MadelynFill-Regular", 477], ["en-FingerPaint-Regular", 479], ["en-BostonAngel-Light", 480], ["en-Gliker-RegularExpanded", 481], ["en-Ahsing", 482], ["en-Engagement-Regular", 483], ["en-EyesomeScript", 484], ["en-LibraSerifModern-Regular", 485], ["en-London-Regular", 486], ["en-AtkinsonHyperlegible-Regular", 487], ["en-StadioNow-TextItalic", 488], ["en-Aniyah", 489], ["en-ITCAvantGardePro-Bold", 490], ["en-Comica-Regular", 491], ["en-Coustard-Regular", 492], ["en-Brice-BoldCondensed", 493], ["en-TANNEWYORK-Bold", 494], ["en-TANBUSTER-Bold", 495], ["en-Alatsi-Regular", 496], ["en-TYSerif-Book", 497], ["en-Jingleberry", 498], ["en-Rajdhani-Bold", 499], ["en-LobsterTwo-BoldItalic", 500], ["en-BestLight-Medium", 501], ["en-Hitchcut-Regular", 502], ["en-GermaniaOne-Regular", 503], ["en-Emitha-Script", 504], ["en-LemonTuesday", 505], ["en-MonterchiSerif-Regular", 507], ["en-AllertaStencil-Regular", 508], ["en-RTL-Sondos-Regular", 509], ["en-HomemadeApple-Regular", 510], ["en-CosmicOcto-Medium", 511]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/en.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/en.json
new file mode 100644
index 0000000000000000000000000000000000000000..41f2a1fb4fee619fc7202de274525e1aeff0f45b
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/en.json
@@ -0,0 +1 @@
+[["en-Montserrat-Regular", 0], ["en-Poppins-Italic", 1], ["en-GlacialIndifference-Regular", 2], ["en-OpenSans-ExtraBoldItalic", 3], ["en-Montserrat-Bold", 4], ["en-Now-Regular", 5], ["en-Garet-Regular", 6], ["en-LeagueSpartan-Bold", 7], ["en-DMSans-Regular", 8], ["en-OpenSauceOne-Regular", 9], ["en-OpenSans-ExtraBold", 10], ["en-KGPrimaryPenmanship", 11], ["en-Anton-Regular", 12], ["en-Aileron-BlackItalic", 13], ["en-Quicksand-Light", 14], ["en-Roboto-BoldItalic", 15], ["en-TheSeasons-It", 16], ["en-Kollektif", 17], ["en-Inter-BoldItalic", 18], ["en-Poppins-Medium", 19], ["en-Poppins-Light", 20], ["en-RoxboroughCF-RegularItalic", 21], ["en-PlayfairDisplay-SemiBold", 22], ["en-Agrandir-Italic", 23], ["en-Lato-Regular", 24], ["en-MoreSugarRegular", 25], ["en-CanvaSans-RegularItalic", 26], ["en-PublicSans-Italic", 27], ["en-CodePro-NormalLC", 28], ["en-Belleza-Regular", 29], ["en-JosefinSans-Bold", 30], ["en-HKGrotesk-Bold", 31], ["en-Telegraf-Medium", 32], ["en-BrittanySignatureRegular", 33], ["en-Raleway-ExtraBoldItalic", 34], ["en-Mont-RegularItalic", 35], ["en-Arimo-BoldItalic", 36], ["en-Lora-Italic", 37], ["en-ArchivoBlack-Regular", 38], ["en-Poppins", 39], ["en-Barlow-Black", 40], ["en-CormorantGaramond-Bold", 41], ["en-LibreBaskerville-Regular", 42], ["en-CanvaSchoolFontRegular", 43], ["en-BebasNeueBold", 44], ["en-LazydogRegular", 45], ["en-FredokaOne-Regular", 46], ["en-Horizon-Bold", 47], ["en-Nourd-Regular", 48], ["en-Hatton-Regular", 49], ["en-Nunito-ExtraBoldItalic", 50], ["en-CerebriSans-Regular", 51], ["en-Montserrat-Light", 52], ["en-TenorSans", 53], ["en-Norwester-Regular", 54], ["en-ClearSans-Bold", 55], ["en-Cardo-Regular", 56], ["en-Alice-Regular", 57], ["en-Oswald-Regular", 58], ["en-Gaegu-Bold", 59], ["en-Muli-Black", 60], ["en-TAN-PEARL-Regular", 61], ["en-CooperHewitt-Book", 62], ["en-Agrandir-Grand", 63], ["en-BlackMango-Thin", 64], ["en-DMSerifDisplay-Regular", 65], ["en-Antonio-Bold", 66], ["en-Sniglet-Regular", 67], ["en-BeVietnam-Regular", 68], ["en-NunitoSans10pt-BlackItalic", 69], ["en-AbhayaLibre-ExtraBold", 70], ["en-Rubik-Regular", 71], ["en-PPNeueMachina-Regular", 72], ["en-TAN - MON CHERI-Regular", 73], ["en-Jua-Regular", 74], ["en-Playlist-Script", 75], ["en-SourceSansPro-BoldItalic", 76], ["en-MoonTime-Regular", 77], ["en-Eczar-ExtraBold", 78], ["en-Gatwick-Regular", 79], ["en-MonumentExtended-Regular", 80], ["en-BarlowSemiCondensed-Regular", 81], ["en-BarlowCondensed-Regular", 82], ["en-Alegreya-Regular", 83], ["en-DreamAvenue", 84], ["en-RobotoCondensed-Italic", 85], ["en-BobbyJones-Regular", 86], ["en-Garet-ExtraBold", 87], ["en-YesevaOne-Regular", 88], ["en-Dosis-ExtraBold", 89], ["en-LeagueGothic-Regular", 90], ["en-OpenSans-Italic", 91], ["en-TANAEGEAN-Regular", 92], ["en-Maharlika-Regular", 93], ["en-MarykateRegular", 94], ["en-Cinzel-Regular", 95], ["en-Agrandir-Wide", 96], ["en-Chewy-Regular", 97], ["en-BodoniFLF-BoldItalic", 98], ["en-Nunito-BlackItalic", 99], ["en-LilitaOne", 100], ["en-HandyCasualCondensed-Regular", 101], ["en-Ovo", 102], ["en-Livvic-Regular", 103], ["en-Agrandir-Narrow", 104], ["en-CrimsonPro-Italic", 105], ["en-AnonymousPro-Bold", 106], ["en-NF-OneLittleFont-Bold", 107], ["en-RedHatDisplay-BoldItalic", 108], ["en-CodecPro-Regular", 109], ["en-HalimunRegular", 110], ["en-LibreFranklin-Black", 111], ["en-TeXGyreTermes-BoldItalic", 112], ["en-Shrikhand-Regular", 113], ["en-TTNormsPro-Italic", 114], ["en-Gagalin-Regular", 115], ["en-OpenSans-Bold", 116], ["en-GreatVibes-Regular", 117], ["en-Breathing", 118], ["en-HeroLight-Regular", 119], ["en-KGPrimaryDots", 120], ["en-Quicksand-Bold", 121], ["en-Brice-ExtraLightSemiExpanded", 122], ["en-Lato-BoldItalic", 123], ["en-Fraunces9pt-Italic", 124], ["en-AbrilFatface-Regular", 125], ["en-BerkshireSwash-Regular", 126], ["en-Atma-Bold", 127], ["en-HolidayRegular", 128], ["en-BebasNeueCyrillic", 129], ["en-IntroRust-Base", 130], ["en-Gistesy", 131], ["en-BDScript-Regular", 132], ["en-ApricotsRegular", 133], ["en-Prompt-Black", 134], ["en-TAN MERINGUE", 135], ["en-Sukar Regular", 136], ["en-GentySans-Regular", 137], ["en-NeueEinstellung-Normal", 138], ["en-Garet-Bold", 139], ["en-FiraSans-Black", 140], ["en-BantayogLight", 141], ["en-NotoSerifDisplay-Black", 142], ["en-TTChocolates-Regular", 143], ["en-Ubuntu-Regular", 144], ["en-Assistant-Bold", 145], ["en-ABeeZee-Regular", 146], ["en-LexendDeca-Regular", 147], ["en-KingredSerif", 148], ["en-Radley-Regular", 149], ["en-BrownSugar", 150], ["en-MigraItalic-ExtraboldItalic", 151], ["en-ChildosArabic-Regular", 152], ["en-PeaceSans", 153], ["en-LondrinaSolid-Black", 154], ["en-SpaceMono-BoldItalic", 155], ["en-RobotoMono-Light", 156], ["en-CourierPrime-Regular", 157], ["en-Alata-Regular", 158], ["en-Amsterdam-One", 159], ["en-IreneFlorentina-Regular", 160], ["en-CatchyMager", 161], ["en-Alta_regular", 162], ["en-ArticulatCF-Regular", 163], ["en-Raleway-Regular", 164], ["en-BrasikaDisplay", 165], ["en-TANAngleton-Italic", 166], ["en-NotoSerifDisplay-ExtraCondensedItalic", 167], ["en-Bryndan Write", 168], ["en-TTCommonsPro-It", 169], ["en-AlexBrush-Regular", 170], ["en-Antic-Regular", 171], ["en-TTHoves-Bold", 172], ["en-DroidSerif", 173], ["en-AblationRegular", 174], ["en-Marcellus-Regular", 175], ["en-Sanchez-Italic", 176], ["en-JosefinSans", 177], ["en-Afrah-Regular", 178], ["en-PinyonScript", 179], ["en-TTInterphases-BoldItalic", 180], ["en-Yellowtail-Regular", 181], ["en-Gliker-Regular", 182], ["en-BobbyJonesSoft-Regular", 183], ["en-IBMPlexSans", 184], ["en-Amsterdam-Three", 185], ["en-Amsterdam-FourSlant", 186], ["en-TTFors-Regular", 187], ["en-Quattrocento", 188], ["en-Sifonn-Basic", 189], ["en-AlegreyaSans-Black", 190], ["en-Daydream", 191], ["en-AristotelicaProTx-Rg", 192], ["en-NotoSerif", 193], ["en-EBGaramond-Italic", 194], ["en-HammersmithOne-Regular", 195], ["en-RobotoSlab-Regular", 196], ["en-DO-Sans-Regular", 197], ["en-KGPrimaryDotsLined", 198], ["en-Blinker-Regular", 199], ["en-TAN NIMBUS", 200], ["en-Blueberry-Regular", 201], ["en-Rosario-Regular", 202], ["en-Forum", 203], ["en-MistrullyRegular", 204], ["en-SourceSerifPro-Regular", 205], ["en-Bugaki-Regular", 206], ["en-CMUSerif-Roman", 207], ["en-GulfsDisplay-NormalItalic", 208], ["en-PTSans-Bold", 209], ["en-Sensei-Medium", 210], ["en-SquadaOne-Regular", 211], ["en-Arapey-Italic", 212], ["en-Parisienne-Regular", 213], ["en-Aleo-Italic", 214], ["en-QuicheDisplay-Italic", 215], ["en-RocaOne-It", 216], ["en-Funtastic-Regular", 217], ["en-PTSerif-BoldItalic", 218], ["en-Muller-RegularItalic", 219], ["en-ArgentCF-Regular", 220], ["en-Brightwall-Italic", 221], ["en-Knewave-Regular", 222], ["en-TYSerif-D", 223], ["en-Agrandir-Tight", 224], ["en-AlfaSlabOne-Regular", 225], ["en-TANTangkiwood-Display", 226], ["en-Kief-Montaser-Regular", 227], ["en-Gotham-Book", 228], ["en-JuliusSansOne-Regular", 229], ["en-CocoGothic-Italic", 230], ["en-SairaCondensed-Regular", 231], ["en-DellaRespira-Regular", 232], ["en-Questrial-Regular", 233], ["en-BukhariScript-Regular", 234], ["en-HelveticaWorld-Bold", 235], ["en-TANKINDRED-Display", 236], ["en-CinzelDecorative-Regular", 237], ["en-Vidaloka-Regular", 238], ["en-AlegreyaSansSC-Black", 239], ["en-FeelingPassionate-Regular", 240], ["en-QuincyCF-Regular", 241], ["en-FiraCode-Regular", 242], ["en-Genty-Regular", 243], ["en-Nickainley-Normal", 244], ["en-RubikOne-Regular", 245], ["en-Gidole-Regular", 246], ["en-Borsok", 247], ["en-Gordita-RegularItalic", 248], ["en-Scripter-Regular", 249], ["en-Buffalo-Regular", 250], ["en-KleinText-Regular", 251], ["en-Creepster-Regular", 252], ["en-Arvo-Bold", 253], ["en-GabrielSans-NormalItalic", 254], ["en-Heebo-Black", 255], ["en-LexendExa-Regular", 256], ["en-BrixtonSansTC-Regular", 257], ["en-GildaDisplay-Regular", 258], ["en-ChunkFive-Roman", 259], ["en-Amaranth-BoldItalic", 260], ["en-BubbleboddyNeue-Regular", 261], ["en-MavenPro-Bold", 262], ["en-TTDrugs-Italic", 263], ["en-CyGrotesk-KeyRegular", 264], ["en-VarelaRound-Regular", 265], ["en-Ruda-Black", 266], ["en-SafiraMarch", 267], ["en-BloggerSans", 268], ["en-TANHEADLINE-Regular", 269], ["en-SloopScriptPro-Regular", 270], ["en-NeueMontreal-Regular", 271], ["en-Schoolbell-Regular", 272], ["en-SigherRegular", 273], ["en-InriaSerif-Regular", 274], ["en-JetBrainsMono-Regular", 275], ["en-MADEEvolveSans", 276], ["en-Dekko", 277], ["en-Handyman-Regular", 278], ["en-Aileron-BoldItalic", 279], ["en-Bright-Italic", 280], ["en-Solway-Regular", 281], ["en-Higuen-Regular", 282], ["en-WedgesItalic", 283], ["en-TANASHFORD-BOLD", 284], ["en-IBMPlexMono", 285], ["en-RacingSansOne-Regular", 286], ["en-RegularBrush", 287], ["en-OpenSans-LightItalic", 288], ["en-SpecialElite-Regular", 289], ["en-FuturaLTPro-Medium", 290], ["en-MaragsaDisplay", 291], ["en-BigShouldersDisplay-Regular", 292], ["en-BDSans-Regular", 293], ["en-RasputinRegular", 294], ["en-Yvesyvesdrawing-BoldItalic", 295], ["en-Bitter-Regular", 296], ["en-LuckiestGuy-Regular", 297], ["en-CanvaSchoolFontDotted", 298], ["en-TTFirsNeue-Italic", 299], ["en-Sunday-Regular", 300], ["en-HKGothic-MediumItalic", 301], ["en-CaveatBrush-Regular", 302], ["en-HeliosExt", 303], ["en-ArchitectsDaughter-Regular", 304], ["en-Angelina", 305], ["en-Calistoga-Regular", 306], ["en-ArchivoNarrow-Regular", 307], ["en-ObjectSans-MediumSlanted", 308], ["en-AyrLucidityCondensed-Regular", 309], ["en-Nexa-RegularItalic", 310], ["en-Lustria-Regular", 311], ["en-Amsterdam-TwoSlant", 312], ["en-Virtual-Regular", 313], ["en-Brusher-Regular", 314], ["en-NF-Lepetitcochon-Regular", 315], ["en-TANTWINKLE", 316], ["en-LeJour-Serif", 317], ["en-Prata-Regular", 318], ["en-PPWoodland-Regular", 319], ["en-PlayfairDisplay-BoldItalic", 320], ["en-AmaticSC-Regular", 321], ["en-Cabin-Regular", 322], ["en-Manjari-Bold", 323], ["en-MrDafoe-Regular", 324], ["en-TTRamillas-Italic", 325], ["en-Luckybones-Bold", 326], ["en-DarkerGrotesque-Light", 327], ["en-BellabooRegular", 328], ["en-CormorantSC-Bold", 329], ["en-GochiHand-Regular", 330], ["en-Atteron", 331], ["en-RocaTwo-Lt", 332], ["en-ZCOOLXiaoWei-Regular", 333], ["en-TANSONGBIRD", 334], ["en-HeadingNow-74Regular", 335], ["en-Luthier-BoldItalic", 336], ["en-Oregano-Regular", 337], ["en-AyrTropikaIsland-Int", 338], ["en-Mali-Regular", 339], ["en-DidactGothic-Regular", 340], ["en-Lovelace-Regular", 341], ["en-BakerieSmooth-Regular", 342], ["en-CarterOne", 343], ["en-HussarBd", 344], ["en-OldStandard-Italic", 345], ["en-TAN-ASTORIA-Display", 346], ["en-rugratssans-Regular", 347], ["en-BMHANNA", 348], ["en-BetterSaturday", 349], ["en-AdigianaToybox", 350], ["en-Sailors", 351], ["en-PlayfairDisplaySC-Italic", 352], ["en-Etna-Regular", 353], ["en-Revive80Signature", 354], ["en-CAGenerated", 355], ["en-Poppins-Regular", 356], ["en-Jonathan-Regular", 357], ["en-Pacifico-Regular", 358], ["en-Saira-Black", 359], ["en-Loubag-Regular", 360], ["en-Decalotype-Black", 361], ["en-Mansalva-Regular", 362], ["en-Allura-Regular", 363], ["en-ProximaNova-Bold", 364], ["en-TANMIGNON-DISPLAY", 365], ["en-ArsenicaAntiqua-Regular", 366], ["en-BreulGroteskA-RegularItalic", 367], ["en-HKModular-Bold", 368], ["en-TANNightingale-Regular", 369], ["en-AristotelicaProCndTxt-Rg", 370], ["en-Aprila-Regular", 371], ["en-Tomorrow-Regular", 372], ["en-AngellaWhite", 373], ["en-KaushanScript-Regular", 374], ["en-NotoSans", 375], ["en-LeJour-Script", 376], ["en-BrixtonTC-Regular", 377], ["en-OleoScript-Regular", 378], ["en-Cakerolli-Regular", 379], ["en-Lobster-Regular", 380], ["en-FrunchySerif-Regular", 381], ["en-PorcelainRegular", 382], ["en-AlojaExtended", 383], ["en-SergioTrendy-Italic", 384], ["en-LovelaceText-Bold", 385], ["en-Anaktoria", 386], ["en-JimmyScript-Light", 387], ["en-IBMPlexSerif", 388], ["en-Marta", 389], ["en-Mango-Regular", 390], ["en-Overpass-Italic", 391], ["en-Hagrid-Regular", 392], ["en-ElikaGorica", 393], ["en-Amiko-Regular", 394], ["en-EFCOBrookshire-Regular", 395], ["en-Caladea-Regular", 396], ["en-MoonlightBold", 397], ["en-Staatliches-Regular", 398], ["en-Helios-Bold", 399], ["en-Satisfy-Regular", 400], ["en-NexaScript-Regular", 401], ["en-Trocchi-Regular", 402], ["en-March", 403], ["en-IbarraRealNova-Regular", 404], ["en-Nectarine-Regular", 405], ["en-Overpass-Light", 406], ["en-TruetypewriterPolyglOTT", 407], ["en-Bangers-Regular", 408], ["en-Lazord-BoldExpandedItalic", 409], ["en-Chloe-Regular", 410], ["en-BaskervilleDisplayPT-Regular", 411], ["en-Bright-Regular", 412], ["en-Vollkorn-Regular", 413], ["en-Harmattan", 414], ["en-SortsMillGoudy-Regular", 415], ["en-Biryani-Bold", 416], ["en-SugoProDisplay-Italic", 417], ["en-Lazord-BoldItalic", 418], ["en-Alike-Regular", 419], ["en-PermanentMarker-Regular", 420], ["en-Sacramento-Regular", 421], ["en-HKGroteskPro-Italic", 422], ["en-Aleo-BoldItalic", 423], ["en-Noot", 424], ["en-TANGARLAND-Regular", 425], ["en-Twister", 426], ["en-Arsenal-Italic", 427], ["en-Bogart-Italic", 428], ["en-BethEllen-Regular", 429], ["en-Caveat-Regular", 430], ["en-BalsamiqSans-Bold", 431], ["en-BreeSerif-Regular", 432], ["en-CodecPro-ExtraBold", 433], ["en-Pierson-Light", 434], ["en-CyGrotesk-WideRegular", 435], ["en-Lumios-Marker", 436], ["en-Comfortaa-Bold", 437], ["en-TraceFontRegular", 438], ["en-RTL-AdamScript-Regular", 439], ["en-EastmanGrotesque-Italic", 440], ["en-Kalam-Bold", 441], ["en-ChauPhilomeneOne-Regular", 442], ["en-Coiny-Regular", 443], ["en-Lovera", 444], ["en-Gellatio", 445], ["en-TitilliumWeb-Bold", 446], ["en-OilvareBase-Italic", 447], ["en-Catamaran-Black", 448], ["en-Anteb-Italic", 449], ["en-SueEllenFrancisco", 450], ["en-SweetApricot", 451], ["en-BrightSunshine", 452], ["en-IM_FELL_Double_Pica_Italic", 453], ["en-Granaina-limpia", 454], ["en-TANPARFAIT", 455], ["en-AcherusGrotesque-Regular", 456], ["en-AwesomeLathusca-Italic", 457], ["en-Signika-Bold", 458], ["en-Andasia", 459], ["en-DO-AllCaps-Slanted", 460], ["en-Zenaida-Regular", 461], ["en-Fahkwang-Regular", 462], ["en-Play-Regular", 463], ["en-BERNIERRegular-Regular", 464], ["en-PlumaThin-Regular", 465], ["en-SportsWorld", 466], ["en-Garet-Black", 467], ["en-CarolloPlayscript-BlackItalic", 468], ["en-Cheque-Regular", 469], ["en-SEGO", 470], ["en-BobbyJones-Condensed", 471], ["en-NexaSlab-RegularItalic", 472], ["en-DancingScript-Regular", 473], ["en-PaalalabasDisplayWideBETA", 474], ["en-Magnolia-Script", 475], ["en-OpunMai-400It", 476], ["en-MadelynFill-Regular", 477], ["en-ZingRust-Base", 478], ["en-FingerPaint-Regular", 479], ["en-BostonAngel-Light", 480], ["en-Gliker-RegularExpanded", 481], ["en-Ahsing", 482], ["en-Engagement-Regular", 483], ["en-EyesomeScript", 484], ["en-LibraSerifModern-Regular", 485], ["en-London-Regular", 486], ["en-AtkinsonHyperlegible-Regular", 487], ["en-StadioNow-TextItalic", 488], ["en-Aniyah", 489], ["en-ITCAvantGardePro-Bold", 490], ["en-Comica-Regular", 491], ["en-Coustard-Regular", 492], ["en-Brice-BoldCondensed", 493], ["en-TANNEWYORK-Bold", 494], ["en-TANBUSTER-Bold", 495], ["en-Alatsi-Regular", 496], ["en-TYSerif-Book", 497], ["en-Jingleberry", 498], ["en-Rajdhani-Bold", 499], ["en-LobsterTwo-BoldItalic", 500], ["en-BestLight-Medium", 501], ["en-Hitchcut-Regular", 502], ["en-GermaniaOne-Regular", 503], ["en-Emitha-Script", 504], ["en-LemonTuesday", 505], ["en-Cubao_Free_Regular", 506], ["en-MonterchiSerif-Regular", 507], ["en-AllertaStencil-Regular", 508], ["en-RTL-Sondos-Regular", 509], ["en-HomemadeApple-Regular", 510], ["en-CosmicOcto-Medium", 511]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/es.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/es.json
new file mode 100644
index 0000000000000000000000000000000000000000..499c6177bf09171b4cfa9e0d47a7bd404e627ae1
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/es.json
@@ -0,0 +1 @@
+[["en-Montserrat-Regular", 0], ["en-Poppins-Italic", 1], ["en-OpenSans-ExtraBoldItalic", 3], ["en-Montserrat-Bold", 4], ["en-Now-Regular", 5], ["en-Garet-Regular", 6], ["en-LeagueSpartan-Bold", 7], ["en-DMSans-Regular", 8], ["en-OpenSauceOne-Regular", 9], ["en-OpenSans-ExtraBold", 10], ["en-KGPrimaryPenmanship", 11], ["en-Anton-Regular", 12], ["en-Aileron-BlackItalic", 13], ["en-Quicksand-Light", 14], ["en-Roboto-BoldItalic", 15], ["en-TheSeasons-It", 16], ["en-Kollektif", 17], ["en-Inter-BoldItalic", 18], ["en-Poppins-Medium", 19], ["en-Poppins-Light", 20], ["en-RoxboroughCF-RegularItalic", 21], ["en-PlayfairDisplay-SemiBold", 22], ["en-Agrandir-Italic", 23], ["en-Lato-Regular", 24], ["en-CanvaSans-RegularItalic", 26], ["en-PublicSans-Italic", 27], ["en-CodePro-NormalLC", 28], ["en-Belleza-Regular", 29], ["en-JosefinSans-Bold", 30], ["en-HKGrotesk-Bold", 31], ["en-Telegraf-Medium", 32], ["en-BrittanySignatureRegular", 33], ["en-Raleway-ExtraBoldItalic", 34], ["en-Mont-RegularItalic", 35], ["en-Arimo-BoldItalic", 36], ["en-Lora-Italic", 37], ["en-ArchivoBlack-Regular", 38], ["en-Poppins", 39], ["en-Barlow-Black", 40], ["en-CormorantGaramond-Bold", 41], ["en-LibreBaskerville-Regular", 42], ["en-BebasNeueBold", 44], ["en-FredokaOne-Regular", 46], ["en-Horizon-Bold", 47], ["en-Nourd-Regular", 48], ["en-Hatton-Regular", 49], ["en-Nunito-ExtraBoldItalic", 50], ["en-CerebriSans-Regular", 51], ["en-Montserrat-Light", 52], ["en-TenorSans", 53], ["en-ClearSans-Bold", 55], ["en-Cardo-Regular", 56], ["en-Alice-Regular", 57], ["en-Oswald-Regular", 58], ["en-Muli-Black", 60], ["en-TAN-PEARL-Regular", 61], ["en-CooperHewitt-Book", 62], ["en-Agrandir-Grand", 63], ["en-BlackMango-Thin", 64], ["en-DMSerifDisplay-Regular", 65], ["en-Antonio-Bold", 66], ["en-Sniglet-Regular", 67], ["en-BeVietnam-Regular", 68], ["en-NunitoSans10pt-BlackItalic", 69], ["en-AbhayaLibre-ExtraBold", 70], ["en-Rubik-Regular", 71], ["en-PPNeueMachina-Regular", 72], ["en-TAN - MON CHERI-Regular", 73], ["en-SourceSansPro-BoldItalic", 76], ["en-MoonTime-Regular", 77], ["en-Eczar-ExtraBold", 78], ["en-Gatwick-Regular", 79], ["en-MonumentExtended-Regular", 80], ["en-BarlowSemiCondensed-Regular", 81], ["en-BarlowCondensed-Regular", 82], ["en-Alegreya-Regular", 83], ["en-DreamAvenue", 84], ["en-RobotoCondensed-Italic", 85], ["en-Garet-ExtraBold", 87], ["en-YesevaOne-Regular", 88], ["en-Dosis-ExtraBold", 89], ["en-LeagueGothic-Regular", 90], ["en-OpenSans-Italic", 91], ["en-TANAEGEAN-Regular", 92], ["en-Maharlika-Regular", 93], ["en-Cinzel-Regular", 95], ["en-Agrandir-Wide", 96], ["en-Chewy-Regular", 97], ["en-BodoniFLF-BoldItalic", 98], ["en-Nunito-BlackItalic", 99], ["en-LilitaOne", 100], ["en-HandyCasualCondensed-Regular", 101], ["en-Ovo", 102], ["en-Livvic-Regular", 103], ["en-Agrandir-Narrow", 104], ["en-CrimsonPro-Italic", 105], ["en-AnonymousPro-Bold", 106], ["en-NF-OneLittleFont-Bold", 107], ["en-RedHatDisplay-BoldItalic", 108], ["en-CodecPro-Regular", 109], ["en-HalimunRegular", 110], ["en-LibreFranklin-Black", 111], ["en-TeXGyreTermes-BoldItalic", 112], ["en-Shrikhand-Regular", 113], ["en-TTNormsPro-Italic", 114], ["en-Gagalin-Regular", 115], ["en-OpenSans-Bold", 116], ["en-GreatVibes-Regular", 117], ["en-Breathing", 118], ["en-HeroLight-Regular", 119], ["en-KGPrimaryDots", 120], ["en-Quicksand-Bold", 121], ["en-Brice-ExtraLightSemiExpanded", 122], ["en-Lato-BoldItalic", 123], ["en-Fraunces9pt-Italic", 124], ["en-AbrilFatface-Regular", 125], ["en-BerkshireSwash-Regular", 126], ["en-Atma-Bold", 127], ["en-HolidayRegular", 128], ["en-BebasNeueCyrillic", 129], ["en-IntroRust-Base", 130], ["en-Gistesy", 131], ["en-BDScript-Regular", 132], ["en-ApricotsRegular", 133], ["en-Prompt-Black", 134], ["en-TAN MERINGUE", 135], ["en-GentySans-Regular", 137], ["en-NeueEinstellung-Normal", 138], ["en-Garet-Bold", 139], ["en-FiraSans-Black", 140], ["en-BantayogLight", 141], ["en-NotoSerifDisplay-Black", 142], ["en-TTChocolates-Regular", 143], ["en-Ubuntu-Regular", 144], ["en-Assistant-Bold", 145], ["en-ABeeZee-Regular", 146], ["en-LexendDeca-Regular", 147], ["en-KingredSerif", 148], ["en-Radley-Regular", 149], ["en-BrownSugar", 150], ["en-MigraItalic-ExtraboldItalic", 151], ["en-ChildosArabic-Regular", 152], ["en-PeaceSans", 153], ["en-LondrinaSolid-Black", 154], ["en-SpaceMono-BoldItalic", 155], ["en-RobotoMono-Light", 156], ["en-CourierPrime-Regular", 157], ["en-Alata-Regular", 158], ["en-Amsterdam-One", 159], ["en-CatchyMager", 161], ["en-Alta_regular", 162], ["en-ArticulatCF-Regular", 163], ["en-Raleway-Regular", 164], ["en-BrasikaDisplay", 165], ["en-TANAngleton-Italic", 166], ["en-NotoSerifDisplay-ExtraCondensedItalic", 167], ["en-Bryndan Write", 168], ["en-TTCommonsPro-It", 169], ["en-AlexBrush-Regular", 170], ["en-Antic-Regular", 171], ["en-TTHoves-Bold", 172], ["en-DroidSerif", 173], ["en-AblationRegular", 174], ["en-Marcellus-Regular", 175], ["en-Sanchez-Italic", 176], ["en-JosefinSans", 177], ["en-Afrah-Regular", 178], ["en-PinyonScript", 179], ["en-TTInterphases-BoldItalic", 180], ["en-Yellowtail-Regular", 181], ["en-Gliker-Regular", 182], ["en-BobbyJonesSoft-Regular", 183], ["en-IBMPlexSans", 184], ["en-Amsterdam-Three", 185], ["en-Amsterdam-FourSlant", 186], ["en-TTFors-Regular", 187], ["en-Quattrocento", 188], ["en-Sifonn-Basic", 189], ["en-AlegreyaSans-Black", 190], ["en-Daydream", 191], ["en-AristotelicaProTx-Rg", 192], ["en-NotoSerif", 193], ["en-EBGaramond-Italic", 194], ["en-HammersmithOne-Regular", 195], ["en-RobotoSlab-Regular", 196], ["en-DO-Sans-Regular", 197], ["en-KGPrimaryDotsLined", 198], ["en-Blinker-Regular", 199], ["en-TAN NIMBUS", 200], ["en-Rosario-Regular", 202], ["en-Forum", 203], ["en-MistrullyRegular", 204], ["en-SourceSerifPro-Regular", 205], ["en-Bugaki-Regular", 206], ["en-CMUSerif-Roman", 207], ["en-GulfsDisplay-NormalItalic", 208], ["en-PTSans-Bold", 209], ["en-Sensei-Medium", 210], ["en-SquadaOne-Regular", 211], ["en-Arapey-Italic", 212], ["en-Parisienne-Regular", 213], ["en-Aleo-Italic", 214], ["en-QuicheDisplay-Italic", 215], ["en-RocaOne-It", 216], ["en-Funtastic-Regular", 217], ["en-PTSerif-BoldItalic", 218], ["en-Muller-RegularItalic", 219], ["en-ArgentCF-Regular", 220], ["en-Brightwall-Italic", 221], ["en-Knewave-Regular", 222], ["en-Agrandir-Tight", 224], ["en-AlfaSlabOne-Regular", 225], ["en-TANTangkiwood-Display", 226], ["en-Kief-Montaser-Regular", 227], ["en-Gotham-Book", 228], ["en-JuliusSansOne-Regular", 229], ["en-CocoGothic-Italic", 230], ["en-SairaCondensed-Regular", 231], ["en-DellaRespira-Regular", 232], ["en-Questrial-Regular", 233], ["en-BukhariScript-Regular", 234], ["en-HelveticaWorld-Bold", 235], ["en-TANKINDRED-Display", 236], ["en-CinzelDecorative-Regular", 237], ["en-Vidaloka-Regular", 238], ["en-AlegreyaSansSC-Black", 239], ["en-FeelingPassionate-Regular", 240], ["en-QuincyCF-Regular", 241], ["en-FiraCode-Regular", 242], ["en-Genty-Regular", 243], ["en-Nickainley-Normal", 244], ["en-RubikOne-Regular", 245], ["en-Gidole-Regular", 246], ["en-Borsok", 247], ["en-Gordita-RegularItalic", 248], ["en-Scripter-Regular", 249], ["en-Buffalo-Regular", 250], ["en-KleinText-Regular", 251], ["en-Creepster-Regular", 252], ["en-Arvo-Bold", 253], ["en-GabrielSans-NormalItalic", 254], ["en-Heebo-Black", 255], ["en-LexendExa-Regular", 256], ["en-BrixtonSansTC-Regular", 257], ["en-GildaDisplay-Regular", 258], ["en-Amaranth-BoldItalic", 260], ["en-BubbleboddyNeue-Regular", 261], ["en-MavenPro-Bold", 262], ["en-TTDrugs-Italic", 263], ["en-CyGrotesk-KeyRegular", 264], ["en-VarelaRound-Regular", 265], ["en-Ruda-Black", 266], ["en-SafiraMarch", 267], ["en-BloggerSans", 268], ["en-TANHEADLINE-Regular", 269], ["en-SloopScriptPro-Regular", 270], ["en-NeueMontreal-Regular", 271], ["en-Schoolbell-Regular", 272], ["en-InriaSerif-Regular", 274], ["en-JetBrainsMono-Regular", 275], ["en-MADEEvolveSans", 276], ["en-Handyman-Regular", 278], ["en-Aileron-BoldItalic", 279], ["en-Solway-Regular", 281], ["en-Higuen-Regular", 282], ["en-WedgesItalic", 283], ["en-TANASHFORD-BOLD", 284], ["en-IBMPlexMono", 285], ["en-RacingSansOne-Regular", 286], ["en-RegularBrush", 287], ["en-OpenSans-LightItalic", 288], ["en-SpecialElite-Regular", 289], ["en-FuturaLTPro-Medium", 290], ["en-MaragsaDisplay", 291], ["en-BigShouldersDisplay-Regular", 292], ["en-BDSans-Regular", 293], ["en-RasputinRegular", 294], ["en-Yvesyvesdrawing-BoldItalic", 295], ["en-Bitter-Regular", 296], ["en-LuckiestGuy-Regular", 297], ["en-TTFirsNeue-Italic", 299], ["en-Sunday-Regular", 300], ["en-HKGothic-MediumItalic", 301], ["en-CaveatBrush-Regular", 302], ["en-ArchitectsDaughter-Regular", 304], ["en-Calistoga-Regular", 306], ["en-ArchivoNarrow-Regular", 307], ["en-ObjectSans-MediumSlanted", 308], ["en-AyrLucidityCondensed-Regular", 309], ["en-Nexa-RegularItalic", 310], ["en-Lustria-Regular", 311], ["en-Amsterdam-TwoSlant", 312], ["en-Virtual-Regular", 313], ["en-NF-Lepetitcochon-Regular", 315], ["en-TANTWINKLE", 316], ["en-LeJour-Serif", 317], ["en-Prata-Regular", 318], ["en-PPWoodland-Regular", 319], ["en-PlayfairDisplay-BoldItalic", 320], ["en-AmaticSC-Regular", 321], ["en-Cabin-Regular", 322], ["en-MrDafoe-Regular", 324], ["en-TTRamillas-Italic", 325], ["en-Luckybones-Bold", 326], ["en-DarkerGrotesque-Light", 327], ["en-BellabooRegular", 328], ["en-CormorantSC-Bold", 329], ["en-GochiHand-Regular", 330], ["en-Atteron", 331], ["en-RocaTwo-Lt", 332], ["en-TANSONGBIRD", 334], ["en-HeadingNow-74Regular", 335], ["en-Luthier-BoldItalic", 336], ["en-Oregano-Regular", 337], ["en-AyrTropikaIsland-Int", 338], ["en-Mali-Regular", 339], ["en-DidactGothic-Regular", 340], ["en-Lovelace-Regular", 341], ["en-BakerieSmooth-Regular", 342], ["en-CarterOne", 343], ["en-HussarBd", 344], ["en-OldStandard-Italic", 345], ["en-TAN-ASTORIA-Display", 346], ["en-rugratssans-Regular", 347], ["en-BetterSaturday", 349], ["en-AdigianaToybox", 350], ["en-Sailors", 351], ["en-PlayfairDisplaySC-Italic", 352], ["en-CAGenerated", 355], ["en-Poppins-Regular", 356], ["en-Jonathan-Regular", 357], ["en-Pacifico-Regular", 358], ["en-Saira-Black", 359], ["en-Loubag-Regular", 360], ["en-Decalotype-Black", 361], ["en-Mansalva-Regular", 362], ["en-Allura-Regular", 363], ["en-ProximaNova-Bold", 364], ["en-TANMIGNON-DISPLAY", 365], ["en-ArsenicaAntiqua-Regular", 366], ["en-BreulGroteskA-RegularItalic", 367], ["en-HKModular-Bold", 368], ["en-TANNightingale-Regular", 369], ["en-AristotelicaProCndTxt-Rg", 370], ["en-Aprila-Regular", 371], ["en-Tomorrow-Regular", 372], ["en-AngellaWhite", 373], ["en-KaushanScript-Regular", 374], ["en-NotoSans", 375], ["en-BrixtonTC-Regular", 377], ["en-OleoScript-Regular", 378], ["en-Cakerolli-Regular", 379], ["en-Lobster-Regular", 380], ["en-PorcelainRegular", 382], ["en-AlojaExtended", 383], ["en-SergioTrendy-Italic", 384], ["en-LovelaceText-Bold", 385], ["en-Anaktoria", 386], ["en-IBMPlexSerif", 388], ["en-Marta", 389], ["en-Mango-Regular", 390], ["en-Overpass-Italic", 391], ["en-Hagrid-Regular", 392], ["en-ElikaGorica", 393], ["en-Amiko-Regular", 394], ["en-EFCOBrookshire-Regular", 395], ["en-Caladea-Regular", 396], ["en-Staatliches-Regular", 398], ["en-Helios-Bold", 399], ["en-Satisfy-Regular", 400], ["en-NexaScript-Regular", 401], ["en-Trocchi-Regular", 402], ["en-March", 403], ["en-IbarraRealNova-Regular", 404], ["en-Nectarine-Regular", 405], ["en-Overpass-Light", 406], ["en-TruetypewriterPolyglOTT", 407], ["en-Bangers-Regular", 408], ["en-Lazord-BoldExpandedItalic", 409], ["en-Chloe-Regular", 410], ["en-BaskervilleDisplayPT-Regular", 411], ["en-Bright-Regular", 412], ["en-Vollkorn-Regular", 413], ["en-Harmattan", 414], ["en-SortsMillGoudy-Regular", 415], ["en-Biryani-Bold", 416], ["en-SugoProDisplay-Italic", 417], ["en-Lazord-BoldItalic", 418], ["en-Alike-Regular", 419], ["en-PermanentMarker-Regular", 420], ["en-Sacramento-Regular", 421], ["en-HKGroteskPro-Italic", 422], ["en-Aleo-BoldItalic", 423], ["en-TANGARLAND-Regular", 425], ["en-Twister", 426], ["en-Arsenal-Italic", 427], ["en-Bogart-Italic", 428], ["en-BethEllen-Regular", 429], ["en-Caveat-Regular", 430], ["en-BalsamiqSans-Bold", 431], ["en-BreeSerif-Regular", 432], ["en-CodecPro-ExtraBold", 433], ["en-Pierson-Light", 434], ["en-CyGrotesk-WideRegular", 435], ["en-Lumios-Marker", 436], ["en-Comfortaa-Bold", 437], ["en-TraceFontRegular", 438], ["en-RTL-AdamScript-Regular", 439], ["en-EastmanGrotesque-Italic", 440], ["en-Kalam-Bold", 441], ["en-ChauPhilomeneOne-Regular", 442], ["en-Coiny-Regular", 443], ["en-Lovera", 444], ["en-Gellatio", 445], ["en-TitilliumWeb-Bold", 446], ["en-OilvareBase-Italic", 447], ["en-Catamaran-Black", 448], ["en-Anteb-Italic", 449], ["en-SueEllenFrancisco", 450], ["en-SweetApricot", 451], ["en-BrightSunshine", 452], ["en-IM_FELL_Double_Pica_Italic", 453], ["en-Granaina-limpia", 454], ["en-TANPARFAIT", 455], ["en-AcherusGrotesque-Regular", 456], ["en-AwesomeLathusca-Italic", 457], ["en-Signika-Bold", 458], ["en-DO-AllCaps-Slanted", 460], ["en-Zenaida-Regular", 461], ["en-Fahkwang-Regular", 462], ["en-Play-Regular", 463], ["en-BERNIERRegular-Regular", 464], ["en-PlumaThin-Regular", 465], ["en-SportsWorld", 466], ["en-Garet-Black", 467], ["en-CarolloPlayscript-BlackItalic", 468], ["en-SEGO", 470], ["en-BobbyJones-Condensed", 471], ["en-NexaSlab-RegularItalic", 472], ["en-DancingScript-Regular", 473], ["en-Magnolia-Script", 475], ["en-OpunMai-400It", 476], ["en-MadelynFill-Regular", 477], ["en-ZingRust-Base", 478], ["en-FingerPaint-Regular", 479], ["en-BostonAngel-Light", 480], ["en-Gliker-RegularExpanded", 481], ["en-Ahsing", 482], ["en-Engagement-Regular", 483], ["en-EyesomeScript", 484], ["en-LibraSerifModern-Regular", 485], ["en-London-Regular", 486], ["en-AtkinsonHyperlegible-Regular", 487], ["en-StadioNow-TextItalic", 488], ["en-ITCAvantGardePro-Bold", 490], ["en-Comica-Regular", 491], ["en-Coustard-Regular", 492], ["en-Brice-BoldCondensed", 493], ["en-TANNEWYORK-Bold", 494], ["en-TANBUSTER-Bold", 495], ["en-Alatsi-Regular", 496], ["en-Jingleberry", 498], ["en-Rajdhani-Bold", 499], ["en-LobsterTwo-BoldItalic", 500], ["en-Hitchcut-Regular", 502], ["en-GermaniaOne-Regular", 503], ["en-Emitha-Script", 504], ["en-LemonTuesday", 505], ["en-MonterchiSerif-Regular", 507], ["en-AllertaStencil-Regular", 508], ["en-RTL-Sondos-Regular", 509], ["en-HomemadeApple-Regular", 510], ["en-CosmicOcto-Medium", 511]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/fr.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/fr.json
new file mode 100644
index 0000000000000000000000000000000000000000..57deb3329189aa8bcc4e44fa0fcf30cb542fa514
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/fr.json
@@ -0,0 +1 @@
+[["en-Montserrat-Regular", 0], ["en-Poppins-Italic", 1], ["en-GlacialIndifference-Regular", 2], ["en-OpenSans-ExtraBoldItalic", 3], ["en-Montserrat-Bold", 4], ["en-Now-Regular", 5], ["en-Garet-Regular", 6], ["en-LeagueSpartan-Bold", 7], ["en-DMSans-Regular", 8], ["en-OpenSauceOne-Regular", 9], ["en-OpenSans-ExtraBold", 10], ["en-KGPrimaryPenmanship", 11], ["en-Anton-Regular", 12], ["en-Aileron-BlackItalic", 13], ["en-Quicksand-Light", 14], ["en-Roboto-BoldItalic", 15], ["en-TheSeasons-It", 16], ["en-Kollektif", 17], ["en-Inter-BoldItalic", 18], ["en-Poppins-Medium", 19], ["en-Poppins-Light", 20], ["en-RoxboroughCF-RegularItalic", 21], ["en-PlayfairDisplay-SemiBold", 22], ["en-Agrandir-Italic", 23], ["en-Lato-Regular", 24], ["en-MoreSugarRegular", 25], ["en-CanvaSans-RegularItalic", 26], ["en-PublicSans-Italic", 27], ["en-CodePro-NormalLC", 28], ["en-Belleza-Regular", 29], ["en-JosefinSans-Bold", 30], ["en-HKGrotesk-Bold", 31], ["en-Telegraf-Medium", 32], ["en-Raleway-ExtraBoldItalic", 34], ["en-Mont-RegularItalic", 35], ["en-Arimo-BoldItalic", 36], ["en-Lora-Italic", 37], ["en-ArchivoBlack-Regular", 38], ["en-Poppins", 39], ["en-Barlow-Black", 40], ["en-CormorantGaramond-Bold", 41], ["en-LibreBaskerville-Regular", 42], ["en-BebasNeueBold", 44], ["en-LazydogRegular", 45], ["en-FredokaOne-Regular", 46], ["en-Horizon-Bold", 47], ["en-Nourd-Regular", 48], ["en-Hatton-Regular", 49], ["en-Nunito-ExtraBoldItalic", 50], ["en-CerebriSans-Regular", 51], ["en-Montserrat-Light", 52], ["en-TenorSans", 53], ["en-ClearSans-Bold", 55], ["en-Cardo-Regular", 56], ["en-Alice-Regular", 57], ["en-Oswald-Regular", 58], ["en-Muli-Black", 60], ["en-TAN-PEARL-Regular", 61], ["en-CooperHewitt-Book", 62], ["en-Agrandir-Grand", 63], ["en-BlackMango-Thin", 64], ["en-DMSerifDisplay-Regular", 65], ["en-Antonio-Bold", 66], ["en-Sniglet-Regular", 67], ["en-BeVietnam-Regular", 68], ["en-NunitoSans10pt-BlackItalic", 69], ["en-AbhayaLibre-ExtraBold", 70], ["en-Rubik-Regular", 71], ["en-PPNeueMachina-Regular", 72], ["en-TAN - MON CHERI-Regular", 73], ["en-Playlist-Script", 75], ["en-SourceSansPro-BoldItalic", 76], ["en-MoonTime-Regular", 77], ["en-Eczar-ExtraBold", 78], ["en-Gatwick-Regular", 79], ["en-MonumentExtended-Regular", 80], ["en-BarlowSemiCondensed-Regular", 81], ["en-BarlowCondensed-Regular", 82], ["en-Alegreya-Regular", 83], ["en-DreamAvenue", 84], ["en-RobotoCondensed-Italic", 85], ["en-BobbyJones-Regular", 86], ["en-Garet-ExtraBold", 87], ["en-YesevaOne-Regular", 88], ["en-Dosis-ExtraBold", 89], ["en-LeagueGothic-Regular", 90], ["en-OpenSans-Italic", 91], ["en-TANAEGEAN-Regular", 92], ["en-Maharlika-Regular", 93], ["en-MarykateRegular", 94], ["en-Cinzel-Regular", 95], ["en-Agrandir-Wide", 96], ["en-Chewy-Regular", 97], ["en-BodoniFLF-BoldItalic", 98], ["en-Nunito-BlackItalic", 99], ["en-LilitaOne", 100], ["en-HandyCasualCondensed-Regular", 101], ["en-Ovo", 102], ["en-Livvic-Regular", 103], ["en-Agrandir-Narrow", 104], ["en-CrimsonPro-Italic", 105], ["en-AnonymousPro-Bold", 106], ["en-NF-OneLittleFont-Bold", 107], ["en-RedHatDisplay-BoldItalic", 108], ["en-CodecPro-Regular", 109], ["en-HalimunRegular", 110], ["en-LibreFranklin-Black", 111], ["en-TeXGyreTermes-BoldItalic", 112], ["en-Shrikhand-Regular", 113], ["en-TTNormsPro-Italic", 114], ["en-Gagalin-Regular", 115], ["en-OpenSans-Bold", 116], ["en-GreatVibes-Regular", 117], ["en-Breathing", 118], ["en-HeroLight-Regular", 119], ["en-KGPrimaryDots", 120], ["en-Quicksand-Bold", 121], ["en-Brice-ExtraLightSemiExpanded", 122], ["en-Lato-BoldItalic", 123], ["en-Fraunces9pt-Italic", 124], ["en-AbrilFatface-Regular", 125], ["en-BerkshireSwash-Regular", 126], ["en-Atma-Bold", 127], ["en-HolidayRegular", 128], ["en-BebasNeueCyrillic", 129], ["en-IntroRust-Base", 130], ["en-Gistesy", 131], ["en-BDScript-Regular", 132], ["en-ApricotsRegular", 133], ["en-Prompt-Black", 134], ["en-TAN MERINGUE", 135], ["en-GentySans-Regular", 137], ["en-NeueEinstellung-Normal", 138], ["en-Garet-Bold", 139], ["en-FiraSans-Black", 140], ["en-BantayogLight", 141], ["en-NotoSerifDisplay-Black", 142], ["en-TTChocolates-Regular", 143], ["en-Ubuntu-Regular", 144], ["en-Assistant-Bold", 145], ["en-ABeeZee-Regular", 146], ["en-LexendDeca-Regular", 147], ["en-KingredSerif", 148], ["en-Radley-Regular", 149], ["en-BrownSugar", 150], ["en-MigraItalic-ExtraboldItalic", 151], ["en-ChildosArabic-Regular", 152], ["en-PeaceSans", 153], ["en-LondrinaSolid-Black", 154], ["en-SpaceMono-BoldItalic", 155], ["en-RobotoMono-Light", 156], ["en-CourierPrime-Regular", 157], ["en-Alata-Regular", 158], ["en-Amsterdam-One", 159], ["en-IreneFlorentina-Regular", 160], ["en-CatchyMager", 161], ["en-Alta_regular", 162], ["en-ArticulatCF-Regular", 163], ["en-Raleway-Regular", 164], ["en-BrasikaDisplay", 165], ["en-TANAngleton-Italic", 166], ["en-NotoSerifDisplay-ExtraCondensedItalic", 167], ["en-Bryndan Write", 168], ["en-TTCommonsPro-It", 169], ["en-AlexBrush-Regular", 170], ["en-Antic-Regular", 171], ["en-TTHoves-Bold", 172], ["en-DroidSerif", 173], ["en-AblationRegular", 174], ["en-Marcellus-Regular", 175], ["en-Sanchez-Italic", 176], ["en-JosefinSans", 177], ["en-Afrah-Regular", 178], ["en-PinyonScript", 179], ["en-TTInterphases-BoldItalic", 180], ["en-Yellowtail-Regular", 181], ["en-Gliker-Regular", 182], ["en-BobbyJonesSoft-Regular", 183], ["en-IBMPlexSans", 184], ["en-Amsterdam-Three", 185], ["en-Amsterdam-FourSlant", 186], ["en-TTFors-Regular", 187], ["en-Quattrocento", 188], ["en-Sifonn-Basic", 189], ["en-AlegreyaSans-Black", 190], ["en-Daydream", 191], ["en-AristotelicaProTx-Rg", 192], ["en-NotoSerif", 193], ["en-EBGaramond-Italic", 194], ["en-HammersmithOne-Regular", 195], ["en-RobotoSlab-Regular", 196], ["en-DO-Sans-Regular", 197], ["en-KGPrimaryDotsLined", 198], ["en-Blinker-Regular", 199], ["en-TAN NIMBUS", 200], ["en-Blueberry-Regular", 201], ["en-Rosario-Regular", 202], ["en-Forum", 203], ["en-MistrullyRegular", 204], ["en-SourceSerifPro-Regular", 205], ["en-Bugaki-Regular", 206], ["en-CMUSerif-Roman", 207], ["en-GulfsDisplay-NormalItalic", 208], ["en-PTSans-Bold", 209], ["en-Sensei-Medium", 210], ["en-SquadaOne-Regular", 211], ["en-Arapey-Italic", 212], ["en-Parisienne-Regular", 213], ["en-Aleo-Italic", 214], ["en-QuicheDisplay-Italic", 215], ["en-RocaOne-It", 216], ["en-Funtastic-Regular", 217], ["en-PTSerif-BoldItalic", 218], ["en-Muller-RegularItalic", 219], ["en-ArgentCF-Regular", 220], ["en-Brightwall-Italic", 221], ["en-Knewave-Regular", 222], ["en-TYSerif-D", 223], ["en-Agrandir-Tight", 224], ["en-AlfaSlabOne-Regular", 225], ["en-TANTangkiwood-Display", 226], ["en-Kief-Montaser-Regular", 227], ["en-Gotham-Book", 228], ["en-JuliusSansOne-Regular", 229], ["en-CocoGothic-Italic", 230], ["en-SairaCondensed-Regular", 231], ["en-DellaRespira-Regular", 232], ["en-Questrial-Regular", 233], ["en-BukhariScript-Regular", 234], ["en-HelveticaWorld-Bold", 235], ["en-TANKINDRED-Display", 236], ["en-CinzelDecorative-Regular", 237], ["en-Vidaloka-Regular", 238], ["en-AlegreyaSansSC-Black", 239], ["en-FeelingPassionate-Regular", 240], ["en-QuincyCF-Regular", 241], ["en-FiraCode-Regular", 242], ["en-Genty-Regular", 243], ["en-Nickainley-Normal", 244], ["en-RubikOne-Regular", 245], ["en-Gidole-Regular", 246], ["en-Borsok", 247], ["en-Gordita-RegularItalic", 248], ["en-Scripter-Regular", 249], ["en-Buffalo-Regular", 250], ["en-KleinText-Regular", 251], ["en-Creepster-Regular", 252], ["en-Arvo-Bold", 253], ["en-GabrielSans-NormalItalic", 254], ["en-Heebo-Black", 255], ["en-LexendExa-Regular", 256], ["en-BrixtonSansTC-Regular", 257], ["en-GildaDisplay-Regular", 258], ["en-Amaranth-BoldItalic", 260], ["en-BubbleboddyNeue-Regular", 261], ["en-MavenPro-Bold", 262], ["en-TTDrugs-Italic", 263], ["en-CyGrotesk-KeyRegular", 264], ["en-VarelaRound-Regular", 265], ["en-Ruda-Black", 266], ["en-SafiraMarch", 267], ["en-BloggerSans", 268], ["en-TANHEADLINE-Regular", 269], ["en-SloopScriptPro-Regular", 270], ["en-NeueMontreal-Regular", 271], ["en-Schoolbell-Regular", 272], ["en-SigherRegular", 273], ["en-InriaSerif-Regular", 274], ["en-JetBrainsMono-Regular", 275], ["en-MADEEvolveSans", 276], ["en-Dekko", 277], ["en-Handyman-Regular", 278], ["en-Aileron-BoldItalic", 279], ["en-Bright-Italic", 280], ["en-Solway-Regular", 281], ["en-Higuen-Regular", 282], ["en-WedgesItalic", 283], ["en-TANASHFORD-BOLD", 284], ["en-IBMPlexMono", 285], ["en-RacingSansOne-Regular", 286], ["en-RegularBrush", 287], ["en-OpenSans-LightItalic", 288], ["en-SpecialElite-Regular", 289], ["en-FuturaLTPro-Medium", 290], ["en-MaragsaDisplay", 291], ["en-BigShouldersDisplay-Regular", 292], ["en-BDSans-Regular", 293], ["en-RasputinRegular", 294], ["en-Yvesyvesdrawing-BoldItalic", 295], ["en-Bitter-Regular", 296], ["en-LuckiestGuy-Regular", 297], ["en-TTFirsNeue-Italic", 299], ["en-Sunday-Regular", 300], ["en-HKGothic-MediumItalic", 301], ["en-CaveatBrush-Regular", 302], ["en-ArchitectsDaughter-Regular", 304], ["en-Angelina", 305], ["en-Calistoga-Regular", 306], ["en-ArchivoNarrow-Regular", 307], ["en-ObjectSans-MediumSlanted", 308], ["en-AyrLucidityCondensed-Regular", 309], ["en-Nexa-RegularItalic", 310], ["en-Lustria-Regular", 311], ["en-Amsterdam-TwoSlant", 312], ["en-Virtual-Regular", 313], ["en-NF-Lepetitcochon-Regular", 315], ["en-TANTWINKLE", 316], ["en-LeJour-Serif", 317], ["en-Prata-Regular", 318], ["en-PPWoodland-Regular", 319], ["en-PlayfairDisplay-BoldItalic", 320], ["en-AmaticSC-Regular", 321], ["en-Cabin-Regular", 322], ["en-Manjari-Bold", 323], ["en-MrDafoe-Regular", 324], ["en-TTRamillas-Italic", 325], ["en-Luckybones-Bold", 326], ["en-DarkerGrotesque-Light", 327], ["en-BellabooRegular", 328], ["en-CormorantSC-Bold", 329], ["en-GochiHand-Regular", 330], ["en-Atteron", 331], ["en-RocaTwo-Lt", 332], ["en-TANSONGBIRD", 334], ["en-HeadingNow-74Regular", 335], ["en-Luthier-BoldItalic", 336], ["en-Oregano-Regular", 337], ["en-AyrTropikaIsland-Int", 338], ["en-Mali-Regular", 339], ["en-DidactGothic-Regular", 340], ["en-Lovelace-Regular", 341], ["en-BakerieSmooth-Regular", 342], ["en-CarterOne", 343], ["en-HussarBd", 344], ["en-OldStandard-Italic", 345], ["en-TAN-ASTORIA-Display", 346], ["en-rugratssans-Regular", 347], ["en-BetterSaturday", 349], ["en-AdigianaToybox", 350], ["en-Sailors", 351], ["en-PlayfairDisplaySC-Italic", 352], ["en-Etna-Regular", 353], ["en-Revive80Signature", 354], ["en-CAGenerated", 355], ["en-Poppins-Regular", 356], ["en-Jonathan-Regular", 357], ["en-Pacifico-Regular", 358], ["en-Saira-Black", 359], ["en-Loubag-Regular", 360], ["en-Decalotype-Black", 361], ["en-Mansalva-Regular", 362], ["en-ProximaNova-Bold", 364], ["en-TANMIGNON-DISPLAY", 365], ["en-ArsenicaAntiqua-Regular", 366], ["en-BreulGroteskA-RegularItalic", 367], ["en-HKModular-Bold", 368], ["en-TANNightingale-Regular", 369], ["en-AristotelicaProCndTxt-Rg", 370], ["en-Aprila-Regular", 371], ["en-Tomorrow-Regular", 372], ["en-AngellaWhite", 373], ["en-KaushanScript-Regular", 374], ["en-NotoSans", 375], ["en-LeJour-Script", 376], ["en-BrixtonTC-Regular", 377], ["en-OleoScript-Regular", 378], ["en-Cakerolli-Regular", 379], ["en-Lobster-Regular", 380], ["en-FrunchySerif-Regular", 381], ["en-PorcelainRegular", 382], ["en-AlojaExtended", 383], ["en-SergioTrendy-Italic", 384], ["en-LovelaceText-Bold", 385], ["en-Anaktoria", 386], ["en-JimmyScript-Light", 387], ["en-IBMPlexSerif", 388], ["en-Marta", 389], ["en-Mango-Regular", 390], ["en-Overpass-Italic", 391], ["en-Hagrid-Regular", 392], ["en-ElikaGorica", 393], ["en-Amiko-Regular", 394], ["en-EFCOBrookshire-Regular", 395], ["en-Caladea-Regular", 396], ["en-MoonlightBold", 397], ["en-Staatliches-Regular", 398], ["en-Helios-Bold", 399], ["en-Satisfy-Regular", 400], ["en-NexaScript-Regular", 401], ["en-Trocchi-Regular", 402], ["en-March", 403], ["en-IbarraRealNova-Regular", 404], ["en-Nectarine-Regular", 405], ["en-Overpass-Light", 406], ["en-TruetypewriterPolyglOTT", 407], ["en-Bangers-Regular", 408], ["en-Lazord-BoldExpandedItalic", 409], ["en-Chloe-Regular", 410], ["en-BaskervilleDisplayPT-Regular", 411], ["en-Bright-Regular", 412], ["en-Vollkorn-Regular", 413], ["en-Harmattan", 414], ["en-SortsMillGoudy-Regular", 415], ["en-Biryani-Bold", 416], ["en-SugoProDisplay-Italic", 417], ["en-Lazord-BoldItalic", 418], ["en-Alike-Regular", 419], ["en-PermanentMarker-Regular", 420], ["en-Sacramento-Regular", 421], ["en-HKGroteskPro-Italic", 422], ["en-Aleo-BoldItalic", 423], ["en-TANGARLAND-Regular", 425], ["en-Twister", 426], ["en-Arsenal-Italic", 427], ["en-Bogart-Italic", 428], ["en-BethEllen-Regular", 429], ["en-Caveat-Regular", 430], ["en-BalsamiqSans-Bold", 431], ["en-BreeSerif-Regular", 432], ["en-CodecPro-ExtraBold", 433], ["en-Pierson-Light", 434], ["en-CyGrotesk-WideRegular", 435], ["en-Lumios-Marker", 436], ["en-Comfortaa-Bold", 437], ["en-TraceFontRegular", 438], ["en-RTL-AdamScript-Regular", 439], ["en-EastmanGrotesque-Italic", 440], ["en-Kalam-Bold", 441], ["en-ChauPhilomeneOne-Regular", 442], ["en-Coiny-Regular", 443], ["en-Lovera", 444], ["en-Gellatio", 445], ["en-TitilliumWeb-Bold", 446], ["en-OilvareBase-Italic", 447], ["en-Catamaran-Black", 448], ["en-Anteb-Italic", 449], ["en-SueEllenFrancisco", 450], ["en-SweetApricot", 451], ["en-BrightSunshine", 452], ["en-IM_FELL_Double_Pica_Italic", 453], ["en-Granaina-limpia", 454], ["en-TANPARFAIT", 455], ["en-AcherusGrotesque-Regular", 456], ["en-AwesomeLathusca-Italic", 457], ["en-Signika-Bold", 458], ["en-Andasia", 459], ["en-DO-AllCaps-Slanted", 460], ["en-Zenaida-Regular", 461], ["en-Fahkwang-Regular", 462], ["en-Play-Regular", 463], ["en-BERNIERRegular-Regular", 464], ["en-PlumaThin-Regular", 465], ["en-SportsWorld", 466], ["en-Garet-Black", 467], ["en-CarolloPlayscript-BlackItalic", 468], ["en-Cheque-Regular", 469], ["en-SEGO", 470], ["en-BobbyJones-Condensed", 471], ["en-NexaSlab-RegularItalic", 472], ["en-DancingScript-Regular", 473], ["en-Magnolia-Script", 475], ["en-OpunMai-400It", 476], ["en-MadelynFill-Regular", 477], ["en-ZingRust-Base", 478], ["en-FingerPaint-Regular", 479], ["en-BostonAngel-Light", 480], ["en-Gliker-RegularExpanded", 481], ["en-Ahsing", 482], ["en-Engagement-Regular", 483], ["en-EyesomeScript", 484], ["en-LibraSerifModern-Regular", 485], ["en-London-Regular", 486], ["en-AtkinsonHyperlegible-Regular", 487], ["en-StadioNow-TextItalic", 488], ["en-Aniyah", 489], ["en-ITCAvantGardePro-Bold", 490], ["en-Comica-Regular", 491], ["en-Coustard-Regular", 492], ["en-Brice-BoldCondensed", 493], ["en-TANNEWYORK-Bold", 494], ["en-TANBUSTER-Bold", 495], ["en-Alatsi-Regular", 496], ["en-TYSerif-Book", 497], ["en-Jingleberry", 498], ["en-Rajdhani-Bold", 499], ["en-LobsterTwo-BoldItalic", 500], ["en-Hitchcut-Regular", 502], ["en-GermaniaOne-Regular", 503], ["en-Emitha-Script", 504], ["en-LemonTuesday", 505], ["en-MonterchiSerif-Regular", 507], ["en-AllertaStencil-Regular", 508], ["en-RTL-Sondos-Regular", 509], ["en-HomemadeApple-Regular", 510], ["en-CosmicOcto-Medium", 511]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/it.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/it.json
new file mode 100644
index 0000000000000000000000000000000000000000..43f27d592b19a0667964f97dc19ddec42a6256e3
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/it.json
@@ -0,0 +1 @@
+[["en-Montserrat-Regular", 0], ["en-Poppins-Italic", 1], ["en-GlacialIndifference-Regular", 2], ["en-OpenSans-ExtraBoldItalic", 3], ["en-Montserrat-Bold", 4], ["en-Now-Regular", 5], ["en-Garet-Regular", 6], ["en-LeagueSpartan-Bold", 7], ["en-DMSans-Regular", 8], ["en-OpenSauceOne-Regular", 9], ["en-OpenSans-ExtraBold", 10], ["en-KGPrimaryPenmanship", 11], ["en-Anton-Regular", 12], ["en-Aileron-BlackItalic", 13], ["en-Quicksand-Light", 14], ["en-Roboto-BoldItalic", 15], ["en-TheSeasons-It", 16], ["en-Kollektif", 17], ["en-Inter-BoldItalic", 18], ["en-Poppins-Medium", 19], ["en-Poppins-Light", 20], ["en-RoxboroughCF-RegularItalic", 21], ["en-PlayfairDisplay-SemiBold", 22], ["en-Agrandir-Italic", 23], ["en-Lato-Regular", 24], ["en-MoreSugarRegular", 25], ["en-CanvaSans-RegularItalic", 26], ["en-PublicSans-Italic", 27], ["en-CodePro-NormalLC", 28], ["en-Belleza-Regular", 29], ["en-JosefinSans-Bold", 30], ["en-HKGrotesk-Bold", 31], ["en-Telegraf-Medium", 32], ["en-BrittanySignatureRegular", 33], ["en-Raleway-ExtraBoldItalic", 34], ["en-Mont-RegularItalic", 35], ["en-Arimo-BoldItalic", 36], ["en-Lora-Italic", 37], ["en-ArchivoBlack-Regular", 38], ["en-Poppins", 39], ["en-Barlow-Black", 40], ["en-CormorantGaramond-Bold", 41], ["en-LibreBaskerville-Regular", 42], ["en-BebasNeueBold", 44], ["en-LazydogRegular", 45], ["en-FredokaOne-Regular", 46], ["en-Horizon-Bold", 47], ["en-Nourd-Regular", 48], ["en-Hatton-Regular", 49], ["en-Nunito-ExtraBoldItalic", 50], ["en-CerebriSans-Regular", 51], ["en-Montserrat-Light", 52], ["en-TenorSans", 53], ["en-ClearSans-Bold", 55], ["en-Cardo-Regular", 56], ["en-Alice-Regular", 57], ["en-Oswald-Regular", 58], ["en-Muli-Black", 60], ["en-TAN-PEARL-Regular", 61], ["en-CooperHewitt-Book", 62], ["en-Agrandir-Grand", 63], ["en-BlackMango-Thin", 64], ["en-DMSerifDisplay-Regular", 65], ["en-Antonio-Bold", 66], ["en-Sniglet-Regular", 67], ["en-BeVietnam-Regular", 68], ["en-NunitoSans10pt-BlackItalic", 69], ["en-AbhayaLibre-ExtraBold", 70], ["en-Rubik-Regular", 71], ["en-PPNeueMachina-Regular", 72], ["en-TAN - MON CHERI-Regular", 73], ["en-Playlist-Script", 75], ["en-SourceSansPro-BoldItalic", 76], ["en-MoonTime-Regular", 77], ["en-Eczar-ExtraBold", 78], ["en-Gatwick-Regular", 79], ["en-BarlowSemiCondensed-Regular", 81], ["en-BarlowCondensed-Regular", 82], ["en-Alegreya-Regular", 83], ["en-DreamAvenue", 84], ["en-RobotoCondensed-Italic", 85], ["en-BobbyJones-Regular", 86], ["en-Garet-ExtraBold", 87], ["en-YesevaOne-Regular", 88], ["en-Dosis-ExtraBold", 89], ["en-LeagueGothic-Regular", 90], ["en-OpenSans-Italic", 91], ["en-TANAEGEAN-Regular", 92], ["en-Maharlika-Regular", 93], ["en-MarykateRegular", 94], ["en-Cinzel-Regular", 95], ["en-Agrandir-Wide", 96], ["en-Chewy-Regular", 97], ["en-BodoniFLF-BoldItalic", 98], ["en-Nunito-BlackItalic", 99], ["en-LilitaOne", 100], ["en-HandyCasualCondensed-Regular", 101], ["en-Ovo", 102], ["en-Livvic-Regular", 103], ["en-Agrandir-Narrow", 104], ["en-CrimsonPro-Italic", 105], ["en-AnonymousPro-Bold", 106], ["en-NF-OneLittleFont-Bold", 107], ["en-RedHatDisplay-BoldItalic", 108], ["en-CodecPro-Regular", 109], ["en-HalimunRegular", 110], ["en-LibreFranklin-Black", 111], ["en-TeXGyreTermes-BoldItalic", 112], ["en-Shrikhand-Regular", 113], ["en-TTNormsPro-Italic", 114], ["en-Gagalin-Regular", 115], ["en-OpenSans-Bold", 116], ["en-GreatVibes-Regular", 117], ["en-Breathing", 118], ["en-HeroLight-Regular", 119], ["en-KGPrimaryDots", 120], ["en-Quicksand-Bold", 121], ["en-Brice-ExtraLightSemiExpanded", 122], ["en-Lato-BoldItalic", 123], ["en-Fraunces9pt-Italic", 124], ["en-AbrilFatface-Regular", 125], ["en-BerkshireSwash-Regular", 126], ["en-Atma-Bold", 127], ["en-HolidayRegular", 128], ["en-BebasNeueCyrillic", 129], ["en-IntroRust-Base", 130], ["en-Gistesy", 131], ["en-BDScript-Regular", 132], ["en-ApricotsRegular", 133], ["en-Prompt-Black", 134], ["en-TAN MERINGUE", 135], ["en-GentySans-Regular", 137], ["en-NeueEinstellung-Normal", 138], ["en-Garet-Bold", 139], ["en-FiraSans-Black", 140], ["en-BantayogLight", 141], ["en-NotoSerifDisplay-Black", 142], ["en-TTChocolates-Regular", 143], ["en-Ubuntu-Regular", 144], ["en-Assistant-Bold", 145], ["en-ABeeZee-Regular", 146], ["en-LexendDeca-Regular", 147], ["en-KingredSerif", 148], ["en-Radley-Regular", 149], ["en-BrownSugar", 150], ["en-MigraItalic-ExtraboldItalic", 151], ["en-ChildosArabic-Regular", 152], ["en-PeaceSans", 153], ["en-LondrinaSolid-Black", 154], ["en-SpaceMono-BoldItalic", 155], ["en-RobotoMono-Light", 156], ["en-CourierPrime-Regular", 157], ["en-Alata-Regular", 158], ["en-Amsterdam-One", 159], ["en-IreneFlorentina-Regular", 160], ["en-CatchyMager", 161], ["en-Alta_regular", 162], ["en-ArticulatCF-Regular", 163], ["en-Raleway-Regular", 164], ["en-BrasikaDisplay", 165], ["en-TANAngleton-Italic", 166], ["en-NotoSerifDisplay-ExtraCondensedItalic", 167], ["en-Bryndan Write", 168], ["en-TTCommonsPro-It", 169], ["en-AlexBrush-Regular", 170], ["en-Antic-Regular", 171], ["en-TTHoves-Bold", 172], ["en-DroidSerif", 173], ["en-AblationRegular", 174], ["en-Marcellus-Regular", 175], ["en-Sanchez-Italic", 176], ["en-JosefinSans", 177], ["en-Afrah-Regular", 178], ["en-PinyonScript", 179], ["en-TTInterphases-BoldItalic", 180], ["en-Yellowtail-Regular", 181], ["en-Gliker-Regular", 182], ["en-BobbyJonesSoft-Regular", 183], ["en-IBMPlexSans", 184], ["en-Amsterdam-Three", 185], ["en-Amsterdam-FourSlant", 186], ["en-TTFors-Regular", 187], ["en-Quattrocento", 188], ["en-Sifonn-Basic", 189], ["en-AlegreyaSans-Black", 190], ["en-Daydream", 191], ["en-AristotelicaProTx-Rg", 192], ["en-NotoSerif", 193], ["en-EBGaramond-Italic", 194], ["en-HammersmithOne-Regular", 195], ["en-RobotoSlab-Regular", 196], ["en-DO-Sans-Regular", 197], ["en-KGPrimaryDotsLined", 198], ["en-Blinker-Regular", 199], ["en-TAN NIMBUS", 200], ["en-Blueberry-Regular", 201], ["en-Rosario-Regular", 202], ["en-Forum", 203], ["en-MistrullyRegular", 204], ["en-SourceSerifPro-Regular", 205], ["en-Bugaki-Regular", 206], ["en-CMUSerif-Roman", 207], ["en-GulfsDisplay-NormalItalic", 208], ["en-PTSans-Bold", 209], ["en-Sensei-Medium", 210], ["en-SquadaOne-Regular", 211], ["en-Arapey-Italic", 212], ["en-Parisienne-Regular", 213], ["en-Aleo-Italic", 214], ["en-QuicheDisplay-Italic", 215], ["en-RocaOne-It", 216], ["en-Funtastic-Regular", 217], ["en-PTSerif-BoldItalic", 218], ["en-Muller-RegularItalic", 219], ["en-ArgentCF-Regular", 220], ["en-Brightwall-Italic", 221], ["en-Knewave-Regular", 222], ["en-TYSerif-D", 223], ["en-Agrandir-Tight", 224], ["en-AlfaSlabOne-Regular", 225], ["en-TANTangkiwood-Display", 226], ["en-Kief-Montaser-Regular", 227], ["en-Gotham-Book", 228], ["en-JuliusSansOne-Regular", 229], ["en-CocoGothic-Italic", 230], ["en-SairaCondensed-Regular", 231], ["en-DellaRespira-Regular", 232], ["en-Questrial-Regular", 233], ["en-BukhariScript-Regular", 234], ["en-HelveticaWorld-Bold", 235], ["en-TANKINDRED-Display", 236], ["en-CinzelDecorative-Regular", 237], ["en-Vidaloka-Regular", 238], ["en-AlegreyaSansSC-Black", 239], ["en-FeelingPassionate-Regular", 240], ["en-QuincyCF-Regular", 241], ["en-FiraCode-Regular", 242], ["en-Genty-Regular", 243], ["en-Nickainley-Normal", 244], ["en-RubikOne-Regular", 245], ["en-Gidole-Regular", 246], ["en-Borsok", 247], ["en-Gordita-RegularItalic", 248], ["en-Scripter-Regular", 249], ["en-Buffalo-Regular", 250], ["en-KleinText-Regular", 251], ["en-Creepster-Regular", 252], ["en-Arvo-Bold", 253], ["en-GabrielSans-NormalItalic", 254], ["en-Heebo-Black", 255], ["en-LexendExa-Regular", 256], ["en-BrixtonSansTC-Regular", 257], ["en-GildaDisplay-Regular", 258], ["en-ChunkFive-Roman", 259], ["en-Amaranth-BoldItalic", 260], ["en-BubbleboddyNeue-Regular", 261], ["en-MavenPro-Bold", 262], ["en-TTDrugs-Italic", 263], ["en-CyGrotesk-KeyRegular", 264], ["en-VarelaRound-Regular", 265], ["en-Ruda-Black", 266], ["en-SafiraMarch", 267], ["en-BloggerSans", 268], ["en-TANHEADLINE-Regular", 269], ["en-SloopScriptPro-Regular", 270], ["en-NeueMontreal-Regular", 271], ["en-Schoolbell-Regular", 272], ["en-SigherRegular", 273], ["en-InriaSerif-Regular", 274], ["en-JetBrainsMono-Regular", 275], ["en-MADEEvolveSans", 276], ["en-Dekko", 277], ["en-Handyman-Regular", 278], ["en-Aileron-BoldItalic", 279], ["en-Bright-Italic", 280], ["en-Solway-Regular", 281], ["en-Higuen-Regular", 282], ["en-WedgesItalic", 283], ["en-TANASHFORD-BOLD", 284], ["en-IBMPlexMono", 285], ["en-RacingSansOne-Regular", 286], ["en-RegularBrush", 287], ["en-OpenSans-LightItalic", 288], ["en-SpecialElite-Regular", 289], ["en-FuturaLTPro-Medium", 290], ["en-MaragsaDisplay", 291], ["en-BigShouldersDisplay-Regular", 292], ["en-BDSans-Regular", 293], ["en-RasputinRegular", 294], ["en-Yvesyvesdrawing-BoldItalic", 295], ["en-Bitter-Regular", 296], ["en-LuckiestGuy-Regular", 297], ["en-TTFirsNeue-Italic", 299], ["en-Sunday-Regular", 300], ["en-HKGothic-MediumItalic", 301], ["en-CaveatBrush-Regular", 302], ["en-ArchitectsDaughter-Regular", 304], ["en-Angelina", 305], ["en-Calistoga-Regular", 306], ["en-ArchivoNarrow-Regular", 307], ["en-ObjectSans-MediumSlanted", 308], ["en-AyrLucidityCondensed-Regular", 309], ["en-Nexa-RegularItalic", 310], ["en-Lustria-Regular", 311], ["en-Amsterdam-TwoSlant", 312], ["en-Virtual-Regular", 313], ["en-NF-Lepetitcochon-Regular", 315], ["en-TANTWINKLE", 316], ["en-LeJour-Serif", 317], ["en-Prata-Regular", 318], ["en-PPWoodland-Regular", 319], ["en-PlayfairDisplay-BoldItalic", 320], ["en-AmaticSC-Regular", 321], ["en-Cabin-Regular", 322], ["en-Manjari-Bold", 323], ["en-MrDafoe-Regular", 324], ["en-TTRamillas-Italic", 325], ["en-Luckybones-Bold", 326], ["en-DarkerGrotesque-Light", 327], ["en-BellabooRegular", 328], ["en-CormorantSC-Bold", 329], ["en-GochiHand-Regular", 330], ["en-Atteron", 331], ["en-RocaTwo-Lt", 332], ["en-TANSONGBIRD", 334], ["en-HeadingNow-74Regular", 335], ["en-Luthier-BoldItalic", 336], ["en-Oregano-Regular", 337], ["en-AyrTropikaIsland-Int", 338], ["en-Mali-Regular", 339], ["en-DidactGothic-Regular", 340], ["en-Lovelace-Regular", 341], ["en-BakerieSmooth-Regular", 342], ["en-CarterOne", 343], ["en-HussarBd", 344], ["en-OldStandard-Italic", 345], ["en-TAN-ASTORIA-Display", 346], ["en-rugratssans-Regular", 347], ["en-BetterSaturday", 349], ["en-AdigianaToybox", 350], ["en-Sailors", 351], ["en-PlayfairDisplaySC-Italic", 352], ["en-Etna-Regular", 353], ["en-Revive80Signature", 354], ["en-CAGenerated", 355], ["en-Poppins-Regular", 356], ["en-Jonathan-Regular", 357], ["en-Pacifico-Regular", 358], ["en-Saira-Black", 359], ["en-Loubag-Regular", 360], ["en-Decalotype-Black", 361], ["en-Mansalva-Regular", 362], ["en-Allura-Regular", 363], ["en-ProximaNova-Bold", 364], ["en-TANMIGNON-DISPLAY", 365], ["en-ArsenicaAntiqua-Regular", 366], ["en-BreulGroteskA-RegularItalic", 367], ["en-HKModular-Bold", 368], ["en-TANNightingale-Regular", 369], ["en-AristotelicaProCndTxt-Rg", 370], ["en-Aprila-Regular", 371], ["en-Tomorrow-Regular", 372], ["en-AngellaWhite", 373], ["en-KaushanScript-Regular", 374], ["en-NotoSans", 375], ["en-LeJour-Script", 376], ["en-BrixtonTC-Regular", 377], ["en-OleoScript-Regular", 378], ["en-Cakerolli-Regular", 379], ["en-Lobster-Regular", 380], ["en-FrunchySerif-Regular", 381], ["en-PorcelainRegular", 382], ["en-AlojaExtended", 383], ["en-SergioTrendy-Italic", 384], ["en-LovelaceText-Bold", 385], ["en-Anaktoria", 386], ["en-JimmyScript-Light", 387], ["en-IBMPlexSerif", 388], ["en-Marta", 389], ["en-Mango-Regular", 390], ["en-Overpass-Italic", 391], ["en-Hagrid-Regular", 392], ["en-ElikaGorica", 393], ["en-Amiko-Regular", 394], ["en-EFCOBrookshire-Regular", 395], ["en-Caladea-Regular", 396], ["en-MoonlightBold", 397], ["en-Staatliches-Regular", 398], ["en-Helios-Bold", 399], ["en-Satisfy-Regular", 400], ["en-NexaScript-Regular", 401], ["en-Trocchi-Regular", 402], ["en-March", 403], ["en-IbarraRealNova-Regular", 404], ["en-Nectarine-Regular", 405], ["en-Overpass-Light", 406], ["en-TruetypewriterPolyglOTT", 407], ["en-Bangers-Regular", 408], ["en-Lazord-BoldExpandedItalic", 409], ["en-Chloe-Regular", 410], ["en-BaskervilleDisplayPT-Regular", 411], ["en-Bright-Regular", 412], ["en-Vollkorn-Regular", 413], ["en-Harmattan", 414], ["en-SortsMillGoudy-Regular", 415], ["en-Biryani-Bold", 416], ["en-SugoProDisplay-Italic", 417], ["en-Lazord-BoldItalic", 418], ["en-Alike-Regular", 419], ["en-PermanentMarker-Regular", 420], ["en-Sacramento-Regular", 421], ["en-HKGroteskPro-Italic", 422], ["en-Aleo-BoldItalic", 423], ["en-TANGARLAND-Regular", 425], ["en-Twister", 426], ["en-Arsenal-Italic", 427], ["en-Bogart-Italic", 428], ["en-BethEllen-Regular", 429], ["en-Caveat-Regular", 430], ["en-BalsamiqSans-Bold", 431], ["en-BreeSerif-Regular", 432], ["en-CodecPro-ExtraBold", 433], ["en-Pierson-Light", 434], ["en-CyGrotesk-WideRegular", 435], ["en-Lumios-Marker", 436], ["en-Comfortaa-Bold", 437], ["en-TraceFontRegular", 438], ["en-RTL-AdamScript-Regular", 439], ["en-EastmanGrotesque-Italic", 440], ["en-Kalam-Bold", 441], ["en-ChauPhilomeneOne-Regular", 442], ["en-Coiny-Regular", 443], ["en-Lovera", 444], ["en-Gellatio", 445], ["en-TitilliumWeb-Bold", 446], ["en-OilvareBase-Italic", 447], ["en-Catamaran-Black", 448], ["en-Anteb-Italic", 449], ["en-SueEllenFrancisco", 450], ["en-SweetApricot", 451], ["en-BrightSunshine", 452], ["en-IM_FELL_Double_Pica_Italic", 453], ["en-Granaina-limpia", 454], ["en-TANPARFAIT", 455], ["en-AcherusGrotesque-Regular", 456], ["en-AwesomeLathusca-Italic", 457], ["en-Signika-Bold", 458], ["en-Andasia", 459], ["en-DO-AllCaps-Slanted", 460], ["en-Zenaida-Regular", 461], ["en-Fahkwang-Regular", 462], ["en-Play-Regular", 463], ["en-BERNIERRegular-Regular", 464], ["en-PlumaThin-Regular", 465], ["en-SportsWorld", 466], ["en-Garet-Black", 467], ["en-CarolloPlayscript-BlackItalic", 468], ["en-Cheque-Regular", 469], ["en-SEGO", 470], ["en-BobbyJones-Condensed", 471], ["en-NexaSlab-RegularItalic", 472], ["en-DancingScript-Regular", 473], ["en-Magnolia-Script", 475], ["en-OpunMai-400It", 476], ["en-MadelynFill-Regular", 477], ["en-ZingRust-Base", 478], ["en-FingerPaint-Regular", 479], ["en-BostonAngel-Light", 480], ["en-Gliker-RegularExpanded", 481], ["en-Ahsing", 482], ["en-Engagement-Regular", 483], ["en-EyesomeScript", 484], ["en-LibraSerifModern-Regular", 485], ["en-London-Regular", 486], ["en-AtkinsonHyperlegible-Regular", 487], ["en-StadioNow-TextItalic", 488], ["en-Aniyah", 489], ["en-ITCAvantGardePro-Bold", 490], ["en-Comica-Regular", 491], ["en-Coustard-Regular", 492], ["en-Brice-BoldCondensed", 493], ["en-TANNEWYORK-Bold", 494], ["en-TANBUSTER-Bold", 495], ["en-Alatsi-Regular", 496], ["en-TYSerif-Book", 497], ["en-Jingleberry", 498], ["en-Rajdhani-Bold", 499], ["en-LobsterTwo-BoldItalic", 500], ["en-BestLight-Medium", 501], ["en-Hitchcut-Regular", 502], ["en-GermaniaOne-Regular", 503], ["en-Emitha-Script", 504], ["en-LemonTuesday", 505], ["en-MonterchiSerif-Regular", 507], ["en-AllertaStencil-Regular", 508], ["en-RTL-Sondos-Regular", 509], ["en-HomemadeApple-Regular", 510], ["en-CosmicOcto-Medium", 511]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/jp.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/jp.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a94ec5373069c0e5ef2e8b1820092ece251703e
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/jp.json
@@ -0,0 +1 @@
+[["jp-04KanjyukuGothic", 0], ["jp-07LightNovelPOP", 1], ["jp-07NikumaruFont", 2], ["jp-07YasashisaAntique", 3], ["jp-07YasashisaGothic", 4], ["jp-BokutachinoGothic2Bold", 5], ["jp-BokutachinoGothic2Regular", 6], ["jp-CHI_SpeedyRight_full_211128-Regular", 7], ["jp-CHI_SpeedyRight_italic_full_211127-Regular", 8], ["jp-CP-Font", 9], ["jp-Canva_CezanneProN-B", 10], ["jp-Canva_CezanneProN-M", 11], ["jp-Canva_ChiaroStd-B", 12], ["jp-Canva_CometStd-B", 13], ["jp-Canva_DotMincho16Std-M", 14], ["jp-Canva_GrecoStd-B", 15], ["jp-Canva_GrecoStd-M", 16], ["jp-Canva_LyraStd-DB", 17], ["jp-Canva_MatisseHatsuhiPro-B", 18], ["jp-Canva_MatisseHatsuhiPro-M", 19], ["jp-Canva_ModeMinAStd-B", 20], ["jp-Canva_NewCezanneProN-B", 21], ["jp-Canva_NewCezanneProN-M", 22], ["jp-Canva_PearlStd-L", 23], ["jp-Canva_RaglanStd-UB", 24], ["jp-Canva_RailwayStd-B", 25], ["jp-Canva_ReggaeStd-B", 26], ["jp-Canva_RocknRollStd-DB", 27], ["jp-Canva_RodinCattleyaPro-B", 28], ["jp-Canva_RodinCattleyaPro-M", 29], ["jp-Canva_RodinCattleyaPro-UB", 30], ["jp-Canva_RodinHimawariPro-B", 31], ["jp-Canva_RodinHimawariPro-M", 32], ["jp-Canva_RodinMariaPro-B", 33], ["jp-Canva_RodinMariaPro-DB", 34], ["jp-Canva_RodinProN-M", 35], ["jp-Canva_ShadowTLStd-B", 36], ["jp-Canva_StickStd-B", 37], ["jp-Canva_TsukuAOldMinPr6N-B", 38], ["jp-Canva_TsukuAOldMinPr6N-R", 39], ["jp-Canva_UtrilloPro-DB", 40], ["jp-Canva_UtrilloPro-M", 41], ["jp-Canva_YurukaStd-UB", 42], ["jp-FGUIGEN", 43], ["jp-GlowSansJ-Condensed-Heavy", 44], ["jp-GlowSansJ-Condensed-Light", 45], ["jp-GlowSansJ-Normal-Bold", 46], ["jp-GlowSansJ-Normal-Light", 47], ["jp-HannariMincho", 48], ["jp-HarenosoraMincho", 49], ["jp-Jiyucho", 50], ["jp-Kaiso-Makina-B", 51], ["jp-Kaisotai-Next-UP-B", 52], ["jp-KokoroMinchoutai", 53], ["jp-Mamelon-3-Hi-Regular", 54], ["jp-MotoyaAnemoneStd-W1", 55], ["jp-MotoyaAnemoneStd-W5", 56], ["jp-MotoyaAnticPro-W3", 57], ["jp-MotoyaCedarStd-W3", 58], ["jp-MotoyaCedarStd-W5", 59], ["jp-MotoyaGochikaStd-W4", 60], ["jp-MotoyaGochikaStd-W8", 61], ["jp-MotoyaGothicMiyabiStd-W6", 62], ["jp-MotoyaGothicStd-W3", 63], ["jp-MotoyaGothicStd-W5", 64], ["jp-MotoyaKoinStd-W3", 65], ["jp-MotoyaKyotaiStd-W2", 66], ["jp-MotoyaKyotaiStd-W4", 67], ["jp-MotoyaMaruStd-W3", 68], ["jp-MotoyaMaruStd-W5", 69], ["jp-MotoyaMinchoMiyabiStd-W4", 70], ["jp-MotoyaMinchoMiyabiStd-W6", 71], ["jp-MotoyaMinchoModernStd-W4", 72], ["jp-MotoyaMinchoModernStd-W6", 73], ["jp-MotoyaMinchoStd-W3", 74], ["jp-MotoyaMinchoStd-W5", 75], ["jp-MotoyaReisyoStd-W2", 76], ["jp-MotoyaReisyoStd-W6", 77], ["jp-MotoyaTohitsuStd-W4", 78], ["jp-MotoyaTohitsuStd-W6", 79], ["jp-MtySousyokuEmBcJis-W6", 80], ["jp-MtySousyokuLiBcJis-W6", 81], ["jp-Mushin", 82], ["jp-NotoSansJP-Bold", 83], ["jp-NotoSansJP-Regular", 84], ["jp-NudMotoyaAporoStd-W3", 85], ["jp-NudMotoyaAporoStd-W5", 86], ["jp-NudMotoyaCedarStd-W3", 87], ["jp-NudMotoyaCedarStd-W5", 88], ["jp-NudMotoyaMaruStd-W3", 89], ["jp-NudMotoyaMaruStd-W5", 90], ["jp-NudMotoyaMinchoStd-W5", 91], ["jp-Ounen-mouhitsu", 92], ["jp-Ronde-B-Square", 93], ["jp-SMotoyaGyosyoStd-W5", 94], ["jp-SMotoyaSinkaiStd-W3", 95], ["jp-SMotoyaSinkaiStd-W5", 96], ["jp-SourceHanSansJP-Bold", 97], ["jp-SourceHanSansJP-Regular", 98], ["jp-SourceHanSerifJP-Bold", 99], ["jp-SourceHanSerifJP-Regular", 100], ["jp-TazuganeGothicStdN-Bold", 101], ["jp-TazuganeGothicStdN-Regular", 102], ["jp-TelopMinProN-B", 103], ["jp-Togalite-Bold", 104], ["jp-Togalite-Regular", 105], ["jp-TsukuMinPr6N-E", 106], ["jp-TsukuMinPr6N-M", 107], ["jp-mikachan_o", 108], ["jp-nagayama_kai", 109], ["jp-07LogoTypeGothic7", 110], ["jp-07TetsubinGothic", 111], ["jp-851CHIKARA-DZUYOKU-KANA-A", 112], ["jp-ARMinchoJIS-Light", 113], ["jp-ARMinchoJIS-Ultra", 114], ["jp-ARPCrystalMinchoJIS-Medium", 115], ["jp-ARPCrystalRGothicJIS-Medium", 116], ["jp-ARShounanShinpitsuGyosyoJIS-Medium", 117], ["jp-AozoraMincho-bold", 118], ["jp-AozoraMinchoRegular", 119], ["jp-ArialUnicodeMS-Bold", 120], ["jp-ArialUnicodeMS", 121], ["jp-CanvaBreezeJP", 122], ["jp-CanvaLiCN", 123], ["jp-CanvaLiJP", 124], ["jp-CanvaOrientalBrushCN", 125], ["jp-CanvaQinfuCalligraphyJP", 126], ["jp-CanvaSweetHeartJP", 127], ["jp-CanvaWenJP", 128], ["jp-Corporate-Logo-Bold", 129], ["jp-DelaGothicOne-Regular", 130], ["jp-GN-Kin-iro_SansSerif", 131], ["jp-GN-Koharuiro_Sunray", 132], ["jp-GenEiGothicM-B", 133], ["jp-GenEiGothicM-R", 134], ["jp-GenJyuuGothic-Bold", 135], ["jp-GenRyuMinTW-B", 136], ["jp-GenRyuMinTW-R", 137], ["jp-GenSekiGothicTW-B", 138], ["jp-GenSekiGothicTW-R", 139], ["jp-GenSenRoundedTW-B", 140], ["jp-GenSenRoundedTW-R", 141], ["jp-GenShinGothic-Bold", 142], ["jp-GenShinGothic-Normal", 143], ["jp-GenWanMinTW-L", 144], ["jp-GenYoGothicTW-B", 145], ["jp-GenYoGothicTW-R", 146], ["jp-GenYoMinTW-B", 147], ["jp-GenYoMinTW-R", 148], ["jp-HGBouquet", 149], ["jp-HanaMinA", 150], ["jp-HanazomeFont", 151], ["jp-HinaMincho-Regular", 152], ["jp-Honoka-Antique-Maru", 153], ["jp-Honoka-Mincho", 154], ["jp-HuiFontP", 155], ["jp-IPAexMincho", 156], ["jp-JK-Gothic-L", 157], ["jp-JK-Gothic-M", 158], ["jp-JackeyFont", 159], ["jp-KaiseiTokumin-Bold", 160], ["jp-KaiseiTokumin-Regular", 161], ["jp-Keifont", 162], ["jp-KiwiMaru-Regular", 163], ["jp-Koku-Mincho-Regular", 164], ["jp-MotoyaLMaru-W3-90ms-RKSJ-H", 165], ["jp-NewTegomin-Regular", 166], ["jp-NicoKaku", 167], ["jp-NicoMoji+", 168], ["jp-Otsutome_font-Bold", 169], ["jp-PottaOne-Regular", 170], ["jp-RampartOne-Regular", 171], ["jp-Senobi-Gothic-Bold", 172], ["jp-Senobi-Gothic-Regular", 173], ["jp-SmartFontUI-Proportional", 174], ["jp-SoukouMincho", 175], ["jp-TEST_Klee-DB", 176], ["jp-TEST_Klee-M", 177], ["jp-TEST_UDMincho-B", 178], ["jp-TEST_UDMincho-L", 179], ["jp-TT_Akakane-EB", 180], ["jp-Tanuki-Permanent-Marker", 181], ["jp-TrainOne-Regular", 182], ["jp-TsunagiGothic-Black", 183], ["jp-Ume-Hy-Gothic", 184], ["jp-Ume-P-Mincho", 185], ["jp-WenQuanYiMicroHei", 186], ["jp-XANO-mincho-U32", 187], ["jp-YOzFontM90-Regular", 188], ["jp-Yomogi-Regular", 189], ["jp-YujiBoku-Regular", 190], ["jp-YujiSyuku-Regular", 191], ["jp-ZenKakuGothicNew-Bold", 192], ["jp-ZenKakuGothicNew-Regular", 193], ["jp-ZenKurenaido-Regular", 194], ["jp-ZenMaruGothic-Bold", 195], ["jp-ZenMaruGothic-Regular", 196], ["jp-darts-font", 197], ["jp-irohakakuC-Bold", 198], ["jp-irohakakuC-Medium", 199], ["jp-irohakakuC-Regular", 200], ["jp-katyou", 201], ["jp-mplus-1m-bold", 202], ["jp-mplus-1m-regular", 203], ["jp-mplus-1p-bold", 204], ["jp-mplus-1p-regular", 205], ["jp-rounded-mplus-1p-bold", 206], ["jp-rounded-mplus-1p-regular", 207], ["jp-timemachine-wa", 208], ["jp-ttf-GenEiLateMin-Medium", 209], ["jp-uzura_font", 210]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/kr.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/kr.json
new file mode 100644
index 0000000000000000000000000000000000000000..778c949347801d04958d600f7334f60f01bd181f
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/kr.json
@@ -0,0 +1 @@
+[["kr-Arita-buri-Bold_OTF", 0], ["kr-Arita-buri-HairLine_OTF", 1], ["kr-Arita-buri-Light_OTF", 2], ["kr-Arita-buri-Medium_OTF", 3], ["kr-Arita-buri-SemiBold_OTF", 4], ["kr-Canva_YDSunshineL", 5], ["kr-Canva_YDSunshineM", 6], ["kr-Canva_YoonGulimPro710", 7], ["kr-Canva_YoonGulimPro730", 8], ["kr-Canva_YoonGulimPro740", 9], ["kr-Canva_YoonGulimPro760", 10], ["kr-Canva_YoonGulimPro770", 11], ["kr-Canva_YoonGulimPro790", 12], ["kr-CreHappB", 13], ["kr-CreHappL", 14], ["kr-CreHappM", 15], ["kr-CreHappS", 16], ["kr-OTAuroraB", 17], ["kr-OTAuroraL", 18], ["kr-OTAuroraR", 19], ["kr-OTDoldamgilB", 20], ["kr-OTDoldamgilL", 21], ["kr-OTDoldamgilR", 22], ["kr-OTHamsterB", 23], ["kr-OTHamsterL", 24], ["kr-OTHamsterR", 25], ["kr-OTHapchangdanB", 26], ["kr-OTHapchangdanL", 27], ["kr-OTHapchangdanR", 28], ["kr-OTSupersizeBkBOX", 29], ["kr-SourceHanSansKR-Bold", 30], ["kr-SourceHanSansKR-ExtraLight", 31], ["kr-SourceHanSansKR-Heavy", 32], ["kr-SourceHanSansKR-Light", 33], ["kr-SourceHanSansKR-Medium", 34], ["kr-SourceHanSansKR-Normal", 35], ["kr-SourceHanSansKR-Regular", 36], ["kr-SourceHanSansSC-Bold", 37], ["kr-SourceHanSansSC-ExtraLight", 38], ["kr-SourceHanSansSC-Heavy", 39], ["kr-SourceHanSansSC-Light", 40], ["kr-SourceHanSansSC-Medium", 41], ["kr-SourceHanSansSC-Normal", 42], ["kr-SourceHanSansSC-Regular", 43], ["kr-SourceHanSerifSC-Bold", 44], ["kr-SourceHanSerifSC-SemiBold", 45], ["kr-TDTDBubbleBubbleOTF", 46], ["kr-TDTDConfusionOTF", 47], ["kr-TDTDCuteAndCuteOTF", 48], ["kr-TDTDEggTakOTF", 49], ["kr-TDTDEmotionalLetterOTF", 50], ["kr-TDTDGalapagosOTF", 51], ["kr-TDTDHappyHourOTF", 52], ["kr-TDTDLatteOTF", 53], ["kr-TDTDMoonLightOTF", 54], ["kr-TDTDParkForestOTF", 55], ["kr-TDTDPencilOTF", 56], ["kr-TDTDSmileOTF", 57], ["kr-TDTDSproutOTF", 58], ["kr-TDTDSunshineOTF", 59], ["kr-TDTDWaferOTF", 60], ["kr-777Chyaochyureu", 61], ["kr-ArialUnicodeMS-Bold", 62], ["kr-ArialUnicodeMS", 63], ["kr-BMHANNA", 64], ["kr-Baekmuk-Dotum", 65], ["kr-BagelFatOne-Regular", 66], ["kr-CoreBandi", 67], ["kr-CoreBandiFace", 68], ["kr-CoreBori", 69], ["kr-DoHyeon-Regular", 70], ["kr-Dokdo-Regular", 71], ["kr-Gaegu-Bold", 72], ["kr-Gaegu-Light", 73], ["kr-Gaegu-Regular", 74], ["kr-GamjaFlower-Regular", 75], ["kr-GasoekOne-Regular", 76], ["kr-GothicA1-Black", 77], ["kr-GothicA1-Bold", 78], ["kr-GothicA1-ExtraBold", 79], ["kr-GothicA1-ExtraLight", 80], ["kr-GothicA1-Light", 81], ["kr-GothicA1-Medium", 82], ["kr-GothicA1-Regular", 83], ["kr-GothicA1-SemiBold", 84], ["kr-GothicA1-Thin", 85], ["kr-Gugi-Regular", 86], ["kr-HiMelody-Regular", 87], ["kr-Jua-Regular", 88], ["kr-KirangHaerang-Regular", 89], ["kr-NanumBrush", 90], ["kr-NanumPen", 91], ["kr-NanumSquareRoundB", 92], ["kr-NanumSquareRoundEB", 93], ["kr-NanumSquareRoundL", 94], ["kr-NanumSquareRoundR", 95], ["kr-SeH-CB", 96], ["kr-SeH-CBL", 97], ["kr-SeH-CEB", 98], ["kr-SeH-CL", 99], ["kr-SeH-CM", 100], ["kr-SeN-CB", 101], ["kr-SeN-CBL", 102], ["kr-SeN-CEB", 103], ["kr-SeN-CL", 104], ["kr-SeN-CM", 105], ["kr-Sunflower-Bold", 106], ["kr-Sunflower-Light", 107], ["kr-Sunflower-Medium", 108], ["kr-TTClaytoyR", 109], ["kr-TTDalpangiR", 110], ["kr-TTMamablockR", 111], ["kr-TTNauidongmuR", 112], ["kr-TTOktapbangR", 113], ["kr-UhBeeMiMi", 114], ["kr-UhBeeMiMiBold", 115], ["kr-UhBeeSe_hyun", 116], ["kr-UhBeeSe_hyunBold", 117], ["kr-UhBeenamsoyoung", 118], ["kr-UhBeenamsoyoungBold", 119], ["kr-WenQuanYiMicroHei", 120], ["kr-YeonSung-Regular", 121]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/pt.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/pt.json
new file mode 100644
index 0000000000000000000000000000000000000000..00962ab5bbcf087c9ae8d691a99b21f5797275b7
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/pt.json
@@ -0,0 +1 @@
+[["en-Montserrat-Regular", 0], ["en-Poppins-Italic", 1], ["en-GlacialIndifference-Regular", 2], ["en-OpenSans-ExtraBoldItalic", 3], ["en-Montserrat-Bold", 4], ["en-Now-Regular", 5], ["en-Garet-Regular", 6], ["en-LeagueSpartan-Bold", 7], ["en-DMSans-Regular", 8], ["en-OpenSauceOne-Regular", 9], ["en-OpenSans-ExtraBold", 10], ["en-KGPrimaryPenmanship", 11], ["en-Anton-Regular", 12], ["en-Aileron-BlackItalic", 13], ["en-Quicksand-Light", 14], ["en-Roboto-BoldItalic", 15], ["en-TheSeasons-It", 16], ["en-Kollektif", 17], ["en-Inter-BoldItalic", 18], ["en-Poppins-Medium", 19], ["en-Poppins-Light", 20], ["en-RoxboroughCF-RegularItalic", 21], ["en-PlayfairDisplay-SemiBold", 22], ["en-Agrandir-Italic", 23], ["en-Lato-Regular", 24], ["en-MoreSugarRegular", 25], ["en-CanvaSans-RegularItalic", 26], ["en-PublicSans-Italic", 27], ["en-CodePro-NormalLC", 28], ["en-Belleza-Regular", 29], ["en-JosefinSans-Bold", 30], ["en-HKGrotesk-Bold", 31], ["en-Telegraf-Medium", 32], ["en-BrittanySignatureRegular", 33], ["en-Raleway-ExtraBoldItalic", 34], ["en-Mont-RegularItalic", 35], ["en-Arimo-BoldItalic", 36], ["en-Lora-Italic", 37], ["en-ArchivoBlack-Regular", 38], ["en-Poppins", 39], ["en-Barlow-Black", 40], ["en-CormorantGaramond-Bold", 41], ["en-LibreBaskerville-Regular", 42], ["en-BebasNeueBold", 44], ["en-LazydogRegular", 45], ["en-FredokaOne-Regular", 46], ["en-Horizon-Bold", 47], ["en-Nourd-Regular", 48], ["en-Hatton-Regular", 49], ["en-Nunito-ExtraBoldItalic", 50], ["en-CerebriSans-Regular", 51], ["en-Montserrat-Light", 52], ["en-TenorSans", 53], ["en-ClearSans-Bold", 55], ["en-Cardo-Regular", 56], ["en-Alice-Regular", 57], ["en-Oswald-Regular", 58], ["en-Muli-Black", 60], ["en-TAN-PEARL-Regular", 61], ["en-CooperHewitt-Book", 62], ["en-Agrandir-Grand", 63], ["en-BlackMango-Thin", 64], ["en-DMSerifDisplay-Regular", 65], ["en-Antonio-Bold", 66], ["en-Sniglet-Regular", 67], ["en-BeVietnam-Regular", 68], ["en-NunitoSans10pt-BlackItalic", 69], ["en-AbhayaLibre-ExtraBold", 70], ["en-Rubik-Regular", 71], ["en-PPNeueMachina-Regular", 72], ["en-TAN - MON CHERI-Regular", 73], ["en-Playlist-Script", 75], ["en-SourceSansPro-BoldItalic", 76], ["en-MoonTime-Regular", 77], ["en-Eczar-ExtraBold", 78], ["en-Gatwick-Regular", 79], ["en-MonumentExtended-Regular", 80], ["en-BarlowSemiCondensed-Regular", 81], ["en-BarlowCondensed-Regular", 82], ["en-Alegreya-Regular", 83], ["en-DreamAvenue", 84], ["en-RobotoCondensed-Italic", 85], ["en-BobbyJones-Regular", 86], ["en-Garet-ExtraBold", 87], ["en-YesevaOne-Regular", 88], ["en-Dosis-ExtraBold", 89], ["en-LeagueGothic-Regular", 90], ["en-OpenSans-Italic", 91], ["en-TANAEGEAN-Regular", 92], ["en-Maharlika-Regular", 93], ["en-MarykateRegular", 94], ["en-Cinzel-Regular", 95], ["en-Agrandir-Wide", 96], ["en-Chewy-Regular", 97], ["en-BodoniFLF-BoldItalic", 98], ["en-Nunito-BlackItalic", 99], ["en-LilitaOne", 100], ["en-HandyCasualCondensed-Regular", 101], ["en-Ovo", 102], ["en-Livvic-Regular", 103], ["en-Agrandir-Narrow", 104], ["en-CrimsonPro-Italic", 105], ["en-AnonymousPro-Bold", 106], ["en-NF-OneLittleFont-Bold", 107], ["en-RedHatDisplay-BoldItalic", 108], ["en-CodecPro-Regular", 109], ["en-HalimunRegular", 110], ["en-LibreFranklin-Black", 111], ["en-TeXGyreTermes-BoldItalic", 112], ["en-Shrikhand-Regular", 113], ["en-TTNormsPro-Italic", 114], ["en-Gagalin-Regular", 115], ["en-OpenSans-Bold", 116], ["en-GreatVibes-Regular", 117], ["en-Breathing", 118], ["en-HeroLight-Regular", 119], ["en-KGPrimaryDots", 120], ["en-Quicksand-Bold", 121], ["en-Brice-ExtraLightSemiExpanded", 122], ["en-Lato-BoldItalic", 123], ["en-Fraunces9pt-Italic", 124], ["en-AbrilFatface-Regular", 125], ["en-BerkshireSwash-Regular", 126], ["en-Atma-Bold", 127], ["en-HolidayRegular", 128], ["en-BebasNeueCyrillic", 129], ["en-IntroRust-Base", 130], ["en-Gistesy", 131], ["en-BDScript-Regular", 132], ["en-ApricotsRegular", 133], ["en-Prompt-Black", 134], ["en-TAN MERINGUE", 135], ["en-GentySans-Regular", 137], ["en-NeueEinstellung-Normal", 138], ["en-Garet-Bold", 139], ["en-FiraSans-Black", 140], ["en-BantayogLight", 141], ["en-NotoSerifDisplay-Black", 142], ["en-TTChocolates-Regular", 143], ["en-Ubuntu-Regular", 144], ["en-Assistant-Bold", 145], ["en-ABeeZee-Regular", 146], ["en-LexendDeca-Regular", 147], ["en-KingredSerif", 148], ["en-Radley-Regular", 149], ["en-BrownSugar", 150], ["en-MigraItalic-ExtraboldItalic", 151], ["en-ChildosArabic-Regular", 152], ["en-PeaceSans", 153], ["en-LondrinaSolid-Black", 154], ["en-SpaceMono-BoldItalic", 155], ["en-RobotoMono-Light", 156], ["en-CourierPrime-Regular", 157], ["en-Alata-Regular", 158], ["en-Amsterdam-One", 159], ["en-IreneFlorentina-Regular", 160], ["en-CatchyMager", 161], ["en-Alta_regular", 162], ["en-ArticulatCF-Regular", 163], ["en-Raleway-Regular", 164], ["en-BrasikaDisplay", 165], ["en-TANAngleton-Italic", 166], ["en-NotoSerifDisplay-ExtraCondensedItalic", 167], ["en-Bryndan Write", 168], ["en-TTCommonsPro-It", 169], ["en-AlexBrush-Regular", 170], ["en-Antic-Regular", 171], ["en-TTHoves-Bold", 172], ["en-DroidSerif", 173], ["en-AblationRegular", 174], ["en-Marcellus-Regular", 175], ["en-Sanchez-Italic", 176], ["en-JosefinSans", 177], ["en-Afrah-Regular", 178], ["en-PinyonScript", 179], ["en-TTInterphases-BoldItalic", 180], ["en-Yellowtail-Regular", 181], ["en-Gliker-Regular", 182], ["en-BobbyJonesSoft-Regular", 183], ["en-IBMPlexSans", 184], ["en-Amsterdam-Three", 185], ["en-Amsterdam-FourSlant", 186], ["en-TTFors-Regular", 187], ["en-Quattrocento", 188], ["en-Sifonn-Basic", 189], ["en-AlegreyaSans-Black", 190], ["en-Daydream", 191], ["en-AristotelicaProTx-Rg", 192], ["en-NotoSerif", 193], ["en-EBGaramond-Italic", 194], ["en-HammersmithOne-Regular", 195], ["en-RobotoSlab-Regular", 196], ["en-DO-Sans-Regular", 197], ["en-KGPrimaryDotsLined", 198], ["en-Blinker-Regular", 199], ["en-TAN NIMBUS", 200], ["en-Blueberry-Regular", 201], ["en-Rosario-Regular", 202], ["en-Forum", 203], ["en-MistrullyRegular", 204], ["en-SourceSerifPro-Regular", 205], ["en-Bugaki-Regular", 206], ["en-CMUSerif-Roman", 207], ["en-GulfsDisplay-NormalItalic", 208], ["en-PTSans-Bold", 209], ["en-SquadaOne-Regular", 211], ["en-Arapey-Italic", 212], ["en-Parisienne-Regular", 213], ["en-Aleo-Italic", 214], ["en-QuicheDisplay-Italic", 215], ["en-RocaOne-It", 216], ["en-Funtastic-Regular", 217], ["en-PTSerif-BoldItalic", 218], ["en-Muller-RegularItalic", 219], ["en-ArgentCF-Regular", 220], ["en-Brightwall-Italic", 221], ["en-Knewave-Regular", 222], ["en-TYSerif-D", 223], ["en-Agrandir-Tight", 224], ["en-AlfaSlabOne-Regular", 225], ["en-TANTangkiwood-Display", 226], ["en-Kief-Montaser-Regular", 227], ["en-Gotham-Book", 228], ["en-JuliusSansOne-Regular", 229], ["en-CocoGothic-Italic", 230], ["en-SairaCondensed-Regular", 231], ["en-DellaRespira-Regular", 232], ["en-Questrial-Regular", 233], ["en-BukhariScript-Regular", 234], ["en-HelveticaWorld-Bold", 235], ["en-TANKINDRED-Display", 236], ["en-CinzelDecorative-Regular", 237], ["en-Vidaloka-Regular", 238], ["en-AlegreyaSansSC-Black", 239], ["en-FeelingPassionate-Regular", 240], ["en-QuincyCF-Regular", 241], ["en-FiraCode-Regular", 242], ["en-Genty-Regular", 243], ["en-Nickainley-Normal", 244], ["en-RubikOne-Regular", 245], ["en-Gidole-Regular", 246], ["en-Borsok", 247], ["en-Gordita-RegularItalic", 248], ["en-Scripter-Regular", 249], ["en-Buffalo-Regular", 250], ["en-KleinText-Regular", 251], ["en-Creepster-Regular", 252], ["en-Arvo-Bold", 253], ["en-GabrielSans-NormalItalic", 254], ["en-Heebo-Black", 255], ["en-LexendExa-Regular", 256], ["en-BrixtonSansTC-Regular", 257], ["en-GildaDisplay-Regular", 258], ["en-ChunkFive-Roman", 259], ["en-Amaranth-BoldItalic", 260], ["en-BubbleboddyNeue-Regular", 261], ["en-MavenPro-Bold", 262], ["en-TTDrugs-Italic", 263], ["en-CyGrotesk-KeyRegular", 264], ["en-VarelaRound-Regular", 265], ["en-Ruda-Black", 266], ["en-SafiraMarch", 267], ["en-BloggerSans", 268], ["en-TANHEADLINE-Regular", 269], ["en-SloopScriptPro-Regular", 270], ["en-NeueMontreal-Regular", 271], ["en-Schoolbell-Regular", 272], ["en-SigherRegular", 273], ["en-InriaSerif-Regular", 274], ["en-JetBrainsMono-Regular", 275], ["en-MADEEvolveSans", 276], ["en-Dekko", 277], ["en-Handyman-Regular", 278], ["en-Aileron-BoldItalic", 279], ["en-Bright-Italic", 280], ["en-Solway-Regular", 281], ["en-Higuen-Regular", 282], ["en-WedgesItalic", 283], ["en-TANASHFORD-BOLD", 284], ["en-IBMPlexMono", 285], ["en-RacingSansOne-Regular", 286], ["en-RegularBrush", 287], ["en-OpenSans-LightItalic", 288], ["en-SpecialElite-Regular", 289], ["en-FuturaLTPro-Medium", 290], ["en-MaragsaDisplay", 291], ["en-BigShouldersDisplay-Regular", 292], ["en-BDSans-Regular", 293], ["en-RasputinRegular", 294], ["en-Yvesyvesdrawing-BoldItalic", 295], ["en-Bitter-Regular", 296], ["en-LuckiestGuy-Regular", 297], ["en-TTFirsNeue-Italic", 299], ["en-Sunday-Regular", 300], ["en-HKGothic-MediumItalic", 301], ["en-CaveatBrush-Regular", 302], ["en-ArchitectsDaughter-Regular", 304], ["en-Angelina", 305], ["en-Calistoga-Regular", 306], ["en-ArchivoNarrow-Regular", 307], ["en-ObjectSans-MediumSlanted", 308], ["en-AyrLucidityCondensed-Regular", 309], ["en-Nexa-RegularItalic", 310], ["en-Lustria-Regular", 311], ["en-Amsterdam-TwoSlant", 312], ["en-Virtual-Regular", 313], ["en-NF-Lepetitcochon-Regular", 315], ["en-TANTWINKLE", 316], ["en-LeJour-Serif", 317], ["en-Prata-Regular", 318], ["en-PPWoodland-Regular", 319], ["en-PlayfairDisplay-BoldItalic", 320], ["en-AmaticSC-Regular", 321], ["en-Cabin-Regular", 322], ["en-Manjari-Bold", 323], ["en-MrDafoe-Regular", 324], ["en-TTRamillas-Italic", 325], ["en-Luckybones-Bold", 326], ["en-DarkerGrotesque-Light", 327], ["en-BellabooRegular", 328], ["en-CormorantSC-Bold", 329], ["en-GochiHand-Regular", 330], ["en-Atteron", 331], ["en-RocaTwo-Lt", 332], ["en-TANSONGBIRD", 334], ["en-HeadingNow-74Regular", 335], ["en-Luthier-BoldItalic", 336], ["en-Oregano-Regular", 337], ["en-AyrTropikaIsland-Int", 338], ["en-Mali-Regular", 339], ["en-DidactGothic-Regular", 340], ["en-Lovelace-Regular", 341], ["en-BakerieSmooth-Regular", 342], ["en-CarterOne", 343], ["en-HussarBd", 344], ["en-OldStandard-Italic", 345], ["en-TAN-ASTORIA-Display", 346], ["en-rugratssans-Regular", 347], ["en-BetterSaturday", 349], ["en-AdigianaToybox", 350], ["en-Sailors", 351], ["en-PlayfairDisplaySC-Italic", 352], ["en-Etna-Regular", 353], ["en-Revive80Signature", 354], ["en-CAGenerated", 355], ["en-Poppins-Regular", 356], ["en-Jonathan-Regular", 357], ["en-Pacifico-Regular", 358], ["en-Saira-Black", 359], ["en-Loubag-Regular", 360], ["en-Decalotype-Black", 361], ["en-Mansalva-Regular", 362], ["en-Allura-Regular", 363], ["en-ProximaNova-Bold", 364], ["en-TANMIGNON-DISPLAY", 365], ["en-ArsenicaAntiqua-Regular", 366], ["en-BreulGroteskA-RegularItalic", 367], ["en-HKModular-Bold", 368], ["en-TANNightingale-Regular", 369], ["en-AristotelicaProCndTxt-Rg", 370], ["en-Aprila-Regular", 371], ["en-Tomorrow-Regular", 372], ["en-AngellaWhite", 373], ["en-KaushanScript-Regular", 374], ["en-NotoSans", 375], ["en-LeJour-Script", 376], ["en-BrixtonTC-Regular", 377], ["en-OleoScript-Regular", 378], ["en-Cakerolli-Regular", 379], ["en-Lobster-Regular", 380], ["en-FrunchySerif-Regular", 381], ["en-PorcelainRegular", 382], ["en-AlojaExtended", 383], ["en-SergioTrendy-Italic", 384], ["en-LovelaceText-Bold", 385], ["en-Anaktoria", 386], ["en-JimmyScript-Light", 387], ["en-IBMPlexSerif", 388], ["en-Marta", 389], ["en-Mango-Regular", 390], ["en-Overpass-Italic", 391], ["en-Hagrid-Regular", 392], ["en-ElikaGorica", 393], ["en-Amiko-Regular", 394], ["en-EFCOBrookshire-Regular", 395], ["en-Caladea-Regular", 396], ["en-MoonlightBold", 397], ["en-Staatliches-Regular", 398], ["en-Helios-Bold", 399], ["en-Satisfy-Regular", 400], ["en-NexaScript-Regular", 401], ["en-Trocchi-Regular", 402], ["en-March", 403], ["en-IbarraRealNova-Regular", 404], ["en-Nectarine-Regular", 405], ["en-Overpass-Light", 406], ["en-TruetypewriterPolyglOTT", 407], ["en-Bangers-Regular", 408], ["en-Lazord-BoldExpandedItalic", 409], ["en-Chloe-Regular", 410], ["en-BaskervilleDisplayPT-Regular", 411], ["en-Bright-Regular", 412], ["en-Vollkorn-Regular", 413], ["en-Harmattan", 414], ["en-SortsMillGoudy-Regular", 415], ["en-Biryani-Bold", 416], ["en-SugoProDisplay-Italic", 417], ["en-Lazord-BoldItalic", 418], ["en-Alike-Regular", 419], ["en-PermanentMarker-Regular", 420], ["en-Sacramento-Regular", 421], ["en-HKGroteskPro-Italic", 422], ["en-Aleo-BoldItalic", 423], ["en-TANGARLAND-Regular", 425], ["en-Twister", 426], ["en-Arsenal-Italic", 427], ["en-Bogart-Italic", 428], ["en-BethEllen-Regular", 429], ["en-Caveat-Regular", 430], ["en-BalsamiqSans-Bold", 431], ["en-BreeSerif-Regular", 432], ["en-CodecPro-ExtraBold", 433], ["en-Pierson-Light", 434], ["en-CyGrotesk-WideRegular", 435], ["en-Lumios-Marker", 436], ["en-Comfortaa-Bold", 437], ["en-TraceFontRegular", 438], ["en-RTL-AdamScript-Regular", 439], ["en-EastmanGrotesque-Italic", 440], ["en-Kalam-Bold", 441], ["en-ChauPhilomeneOne-Regular", 442], ["en-Coiny-Regular", 443], ["en-Lovera", 444], ["en-Gellatio", 445], ["en-TitilliumWeb-Bold", 446], ["en-OilvareBase-Italic", 447], ["en-Catamaran-Black", 448], ["en-Anteb-Italic", 449], ["en-SueEllenFrancisco", 450], ["en-SweetApricot", 451], ["en-BrightSunshine", 452], ["en-IM_FELL_Double_Pica_Italic", 453], ["en-Granaina-limpia", 454], ["en-TANPARFAIT", 455], ["en-AcherusGrotesque-Regular", 456], ["en-AwesomeLathusca-Italic", 457], ["en-Signika-Bold", 458], ["en-Andasia", 459], ["en-DO-AllCaps-Slanted", 460], ["en-Zenaida-Regular", 461], ["en-Fahkwang-Regular", 462], ["en-Play-Regular", 463], ["en-BERNIERRegular-Regular", 464], ["en-PlumaThin-Regular", 465], ["en-SportsWorld", 466], ["en-Garet-Black", 467], ["en-CarolloPlayscript-BlackItalic", 468], ["en-Cheque-Regular", 469], ["en-SEGO", 470], ["en-BobbyJones-Condensed", 471], ["en-NexaSlab-RegularItalic", 472], ["en-DancingScript-Regular", 473], ["en-Magnolia-Script", 475], ["en-OpunMai-400It", 476], ["en-MadelynFill-Regular", 477], ["en-ZingRust-Base", 478], ["en-FingerPaint-Regular", 479], ["en-BostonAngel-Light", 480], ["en-Gliker-RegularExpanded", 481], ["en-Ahsing", 482], ["en-Engagement-Regular", 483], ["en-EyesomeScript", 484], ["en-LibraSerifModern-Regular", 485], ["en-London-Regular", 486], ["en-AtkinsonHyperlegible-Regular", 487], ["en-StadioNow-TextItalic", 488], ["en-Aniyah", 489], ["en-ITCAvantGardePro-Bold", 490], ["en-Comica-Regular", 491], ["en-Coustard-Regular", 492], ["en-Brice-BoldCondensed", 493], ["en-TANNEWYORK-Bold", 494], ["en-TANBUSTER-Bold", 495], ["en-Alatsi-Regular", 496], ["en-TYSerif-Book", 497], ["en-Jingleberry", 498], ["en-Rajdhani-Bold", 499], ["en-LobsterTwo-BoldItalic", 500], ["en-Hitchcut-Regular", 502], ["en-GermaniaOne-Regular", 503], ["en-Emitha-Script", 504], ["en-LemonTuesday", 505], ["en-MonterchiSerif-Regular", 507], ["en-AllertaStencil-Regular", 508], ["en-RTL-Sondos-Regular", 509], ["en-HomemadeApple-Regular", 510], ["en-CosmicOcto-Medium", 511]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/ru.json b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/ru.json
new file mode 100644
index 0000000000000000000000000000000000000000..49aec4f46efc8d51ab933e9d516891afc7de7e6c
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multi_fonts/ru.json
@@ -0,0 +1 @@
+[["en-OpenSans-ExtraBold", 10], ["en-AnonymousPro-Bold", 106], ["en-CodecPro-Regular", 109], ["en-TTNormsPro-Italic", 114], ["en-Gagalin-Regular", 115], ["en-OpenSans-Bold", 116], ["en-HeroLight-Regular", 119], ["en-Lato-BoldItalic", 123], ["en-BebasNeueCyrillic", 129], ["en-IntroRust-Base", 130], ["en-Garet-Bold", 139], ["en-FiraSans-Black", 140], ["en-NotoSerifDisplay-Black", 142], ["en-TTChocolates-Regular", 143], ["en-Ubuntu-Regular", 144], ["en-Roboto-BoldItalic", 15], ["en-PeaceSans", 153], ["en-RobotoMono-Light", 156], ["en-ArticulatCF-Regular", 163], ["en-Raleway-Regular", 164], ["en-NotoSerifDisplay-ExtraCondensedItalic", 167], ["en-Bryndan Write", 168], ["en-TTCommonsPro-It", 169], ["en-TTHoves-Bold", 172], ["en-DroidSerif", 173], ["en-AblationRegular", 174], ["en-Inter-BoldItalic", 18], ["en-TTInterphases-BoldItalic", 180], ["en-IBMPlexSans", 184], ["en-TTFors-Regular", 187], ["en-AlegreyaSans-Black", 190], ["en-AristotelicaProTx-Rg", 192], ["en-NotoSerif", 193], ["en-EBGaramond-Italic", 194], ["en-RobotoSlab-Regular", 196], ["en-DO-Sans-Regular", 197], ["en-Forum", 203], ["en-CMUSerif-Roman", 207], ["en-PTSans-Bold", 209], ["en-Sensei-Medium", 210], ["en-RocaOne-It", 216], ["en-Funtastic-Regular", 217], ["en-PTSerif-BoldItalic", 218], ["en-Muller-RegularItalic", 219], ["en-PlayfairDisplay-SemiBold", 22], ["en-ArgentCF-Regular", 220], ["en-TYSerif-D", 223], ["en-Gotham-Book", 228], ["en-CocoGothic-Italic", 230], ["en-BukhariScript-Regular", 234], ["en-HelveticaWorld-Bold", 235], ["en-AlegreyaSansSC-Black", 239], ["en-Lato-Regular", 24], ["en-QuincyCF-Regular", 241], ["en-FiraCode-Regular", 242], ["en-Nickainley-Normal", 244], ["en-RubikOne-Regular", 245], ["en-Borsok", 247], ["en-Gordita-RegularItalic", 248], ["en-Scripter-Regular", 249], ["en-KleinText-Regular", 251], ["en-BubbleboddyNeue-Regular", 261], ["en-TTDrugs-Italic", 263], ["en-CyGrotesk-KeyRegular", 264], ["en-BloggerSans", 268], ["en-SloopScriptPro-Regular", 270], ["en-JetBrainsMono-Regular", 275], ["en-MADEEvolveSans", 276], ["en-Handyman-Regular", 278], ["en-CodePro-NormalLC", 28], ["en-IBMPlexMono", 285], ["en-OpenSans-LightItalic", 288], ["en-TTFirsNeue-Italic", 299], ["en-OpenSans-ExtraBoldItalic", 3], ["en-Sunday-Regular", 300], ["en-HeliosExt", 303], ["en-ObjectSans-MediumSlanted", 308], ["en-HKGrotesk-Bold", 31], ["en-Brusher-Regular", 314], ["en-Prata-Regular", 318], ["en-PlayfairDisplay-BoldItalic", 320], ["en-AmaticSC-Regular", 321], ["en-TTRamillas-Italic", 325], ["en-CormorantSC-Bold", 329], ["en-Atteron", 331], ["en-RocaTwo-Lt", 332], ["en-HeadingNow-74Regular", 335], ["en-Raleway-ExtraBoldItalic", 34], ["en-DidactGothic-Regular", 340], ["en-Lovelace-Regular", 341], ["en-HussarBd", 344], ["en-OldStandard-Italic", 345], ["en-Mont-RegularItalic", 35], ["en-PlayfairDisplaySC-Italic", 352], ["en-Etna-Regular", 353], ["en-Arimo-BoldItalic", 36], ["en-ProximaNova-Bold", 364], ["en-ArsenicaAntiqua-Regular", 366], ["en-Lora-Italic", 37], ["en-AristotelicaProCndTxt-Rg", 370], ["en-NotoSans", 375], ["en-Lobster-Regular", 380], ["en-LovelaceText-Bold", 385], ["en-Anaktoria", 386], ["en-IBMPlexSerif", 388], ["en-Marta", 389], ["en-Mango-Regular", 390], ["en-Hagrid-Regular", 392], ["en-Helios-Bold", 399], ["en-NexaScript-Regular", 401], ["en-TruetypewriterPolyglOTT", 407], ["en-CormorantGaramond-Bold", 41], ["en-BaskervilleDisplayPT-Regular", 411], ["en-Vollkorn-Regular", 413], ["en-SugoProDisplay-Italic", 417], ["en-Arsenal-Italic", 427], ["en-Bogart-Italic", 428], ["en-Caveat-Regular", 430], ["en-BalsamiqSans-Bold", 431], ["en-CodecPro-ExtraBold", 433], ["en-CyGrotesk-WideRegular", 435], ["en-Lumios-Marker", 436], ["en-Comfortaa-Bold", 437], ["en-RTL-AdamScript-Regular", 439], ["en-BebasNeueBold", 44], ["en-EastmanGrotesque-Italic", 440], ["en-LazydogRegular", 45], ["en-DO-AllCaps-Slanted", 460], ["en-Play-Regular", 463], ["en-BERNIERRegular-Regular", 464], ["en-SportsWorld", 466], ["en-Garet-Black", 467], ["en-CarolloPlayscript-BlackItalic", 468], ["en-Cheque-Regular", 469], ["en-Magnolia-Script", 475], ["en-MadelynFill-Regular", 477], ["en-ZingRust-Base", 478], ["en-LibraSerifModern-Regular", 485], ["en-StadioNow-TextItalic", 488], ["en-Comica-Regular", 491], ["en-TYSerif-Book", 497], ["en-Jingleberry", 498], ["en-Nunito-ExtraBoldItalic", 50], ["en-LemonTuesday", 505], ["en-MonterchiSerif-Regular", 507], ["en-Montserrat-Light", 52], ["en-TenorSans", 53], ["en-ClearSans-Bold", 55], ["en-Alice-Regular", 57], ["en-Oswald-Regular", 58], ["en-Garet-Regular", 6], ["en-NunitoSans10pt-BlackItalic", 69], ["en-Rubik-Regular", 71], ["en-PPNeueMachina-Regular", 72], ["en-Alegreya-Regular", 83], ["en-RobotoCondensed-Italic", 85], ["en-Garet-ExtraBold", 87], ["en-YesevaOne-Regular", 88], ["en-OpenSans-Italic", 91], ["en-Nunito-BlackItalic", 99]]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/multilingual_10-lang_idx.json b/text_encoder/Glyph-SDXL-v2/assets/multilingual_10-lang_idx.json
new file mode 100644
index 0000000000000000000000000000000000000000..ccce7212f98923c4862d3c0c1236cea7bc04f8e2
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/assets/multilingual_10-lang_idx.json
@@ -0,0 +1 @@
+{"en-Montserrat-Regular": 0, "en-Poppins-Italic": 1, "en-GlacialIndifference-Regular": 2, "en-OpenSans-ExtraBoldItalic": 3, "en-Montserrat-Bold": 4, "en-Now-Regular": 5, "en-Garet-Regular": 6, "en-LeagueSpartan-Bold": 7, "en-DMSans-Regular": 8, "en-OpenSauceOne-Regular": 9, "en-OpenSans-ExtraBold": 10, "en-KGPrimaryPenmanship": 11, "en-Anton-Regular": 12, "en-Aileron-BlackItalic": 13, "en-Quicksand-Light": 14, "en-Roboto-BoldItalic": 15, "en-TheSeasons-It": 16, "en-Kollektif": 17, "en-Inter-BoldItalic": 18, "en-Poppins-Medium": 19, "en-Poppins-Light": 20, "en-RoxboroughCF-RegularItalic": 21, "en-PlayfairDisplay-SemiBold": 22, "en-Agrandir-Italic": 23, "en-Lato-Regular": 24, "en-MoreSugarRegular": 25, "en-CanvaSans-RegularItalic": 26, "en-PublicSans-Italic": 27, "en-CodePro-NormalLC": 28, "en-Belleza-Regular": 29, "en-JosefinSans-Bold": 30, "en-HKGrotesk-Bold": 31, "en-Telegraf-Medium": 32, "en-BrittanySignatureRegular": 33, "en-Raleway-ExtraBoldItalic": 34, "en-Mont-RegularItalic": 35, "en-Arimo-BoldItalic": 36, "en-Lora-Italic": 37, "en-ArchivoBlack-Regular": 38, "en-Poppins": 39, "en-Barlow-Black": 40, "en-CormorantGaramond-Bold": 41, "en-LibreBaskerville-Regular": 42, "en-CanvaSchoolFontRegular": 43, "en-BebasNeueBold": 44, "en-LazydogRegular": 45, "en-FredokaOne-Regular": 46, "en-Horizon-Bold": 47, "en-Nourd-Regular": 48, "en-Hatton-Regular": 49, "en-Nunito-ExtraBoldItalic": 50, "en-CerebriSans-Regular": 51, "en-Montserrat-Light": 52, "en-TenorSans": 53, "en-Norwester-Regular": 54, "en-ClearSans-Bold": 55, "en-Cardo-Regular": 56, "en-Alice-Regular": 57, "en-Oswald-Regular": 58, "en-Gaegu-Bold": 59, "en-Muli-Black": 60, "en-TAN-PEARL-Regular": 61, "en-CooperHewitt-Book": 62, "en-Agrandir-Grand": 63, "en-BlackMango-Thin": 64, "en-DMSerifDisplay-Regular": 65, "en-Antonio-Bold": 66, "en-Sniglet-Regular": 67, "en-BeVietnam-Regular": 68, "en-NunitoSans10pt-BlackItalic": 69, "en-AbhayaLibre-ExtraBold": 70, "en-Rubik-Regular": 71, "en-PPNeueMachina-Regular": 72, "en-TAN - MON CHERI-Regular": 73, "en-Jua-Regular": 74, "en-Playlist-Script": 75, "en-SourceSansPro-BoldItalic": 76, "en-MoonTime-Regular": 77, "en-Eczar-ExtraBold": 78, "en-Gatwick-Regular": 79, "en-MonumentExtended-Regular": 80, "en-BarlowSemiCondensed-Regular": 81, "en-BarlowCondensed-Regular": 82, "en-Alegreya-Regular": 83, "en-DreamAvenue": 84, "en-RobotoCondensed-Italic": 85, "en-BobbyJones-Regular": 86, "en-Garet-ExtraBold": 87, "en-YesevaOne-Regular": 88, "en-Dosis-ExtraBold": 89, "en-LeagueGothic-Regular": 90, "en-OpenSans-Italic": 91, "en-TANAEGEAN-Regular": 92, "en-Maharlika-Regular": 93, "en-MarykateRegular": 94, "en-Cinzel-Regular": 95, "en-Agrandir-Wide": 96, "en-Chewy-Regular": 97, "en-BodoniFLF-BoldItalic": 98, "en-Nunito-BlackItalic": 99, "en-LilitaOne": 100, "en-HandyCasualCondensed-Regular": 101, "en-Ovo": 102, "en-Livvic-Regular": 103, "en-Agrandir-Narrow": 104, "en-CrimsonPro-Italic": 105, "en-AnonymousPro-Bold": 106, "en-NF-OneLittleFont-Bold": 107, "en-RedHatDisplay-BoldItalic": 108, "en-CodecPro-Regular": 109, "en-HalimunRegular": 110, "en-LibreFranklin-Black": 111, "en-TeXGyreTermes-BoldItalic": 112, "en-Shrikhand-Regular": 113, "en-TTNormsPro-Italic": 114, "en-Gagalin-Regular": 115, "en-OpenSans-Bold": 116, "en-GreatVibes-Regular": 117, "en-Breathing": 118, "en-HeroLight-Regular": 119, "en-KGPrimaryDots": 120, "en-Quicksand-Bold": 121, "en-Brice-ExtraLightSemiExpanded": 122, "en-Lato-BoldItalic": 123, "en-Fraunces9pt-Italic": 124, "en-AbrilFatface-Regular": 125, "en-BerkshireSwash-Regular": 126, "en-Atma-Bold": 127, "en-HolidayRegular": 128, "en-BebasNeueCyrillic": 129, "en-IntroRust-Base": 130, "en-Gistesy": 131, "en-BDScript-Regular": 132, "en-ApricotsRegular": 133, "en-Prompt-Black": 134, "en-TAN MERINGUE": 135, "en-Sukar Regular": 136, "en-GentySans-Regular": 137, "en-NeueEinstellung-Normal": 138, "en-Garet-Bold": 139, "en-FiraSans-Black": 140, "en-BantayogLight": 141, "en-NotoSerifDisplay-Black": 142, "en-TTChocolates-Regular": 143, "en-Ubuntu-Regular": 144, "en-Assistant-Bold": 145, "en-ABeeZee-Regular": 146, "en-LexendDeca-Regular": 147, "en-KingredSerif": 148, "en-Radley-Regular": 149, "en-BrownSugar": 150, "en-MigraItalic-ExtraboldItalic": 151, "en-ChildosArabic-Regular": 152, "en-PeaceSans": 153, "en-LondrinaSolid-Black": 154, "en-SpaceMono-BoldItalic": 155, "en-RobotoMono-Light": 156, "en-CourierPrime-Regular": 157, "en-Alata-Regular": 158, "en-Amsterdam-One": 159, "en-IreneFlorentina-Regular": 160, "en-CatchyMager": 161, "en-Alta_regular": 162, "en-ArticulatCF-Regular": 163, "en-Raleway-Regular": 164, "en-BrasikaDisplay": 165, "en-TANAngleton-Italic": 166, "en-NotoSerifDisplay-ExtraCondensedItalic": 167, "en-Bryndan Write": 168, "en-TTCommonsPro-It": 169, "en-AlexBrush-Regular": 170, "en-Antic-Regular": 171, "en-TTHoves-Bold": 172, "en-DroidSerif": 173, "en-AblationRegular": 174, "en-Marcellus-Regular": 175, "en-Sanchez-Italic": 176, "en-JosefinSans": 177, "en-Afrah-Regular": 178, "en-PinyonScript": 179, "en-TTInterphases-BoldItalic": 180, "en-Yellowtail-Regular": 181, "en-Gliker-Regular": 182, "en-BobbyJonesSoft-Regular": 183, "en-IBMPlexSans": 184, "en-Amsterdam-Three": 185, "en-Amsterdam-FourSlant": 186, "en-TTFors-Regular": 187, "en-Quattrocento": 188, "en-Sifonn-Basic": 189, "en-AlegreyaSans-Black": 190, "en-Daydream": 191, "en-AristotelicaProTx-Rg": 192, "en-NotoSerif": 193, "en-EBGaramond-Italic": 194, "en-HammersmithOne-Regular": 195, "en-RobotoSlab-Regular": 196, "en-DO-Sans-Regular": 197, "en-KGPrimaryDotsLined": 198, "en-Blinker-Regular": 199, "en-TAN NIMBUS": 200, "en-Blueberry-Regular": 201, "en-Rosario-Regular": 202, "en-Forum": 203, "en-MistrullyRegular": 204, "en-SourceSerifPro-Regular": 205, "en-Bugaki-Regular": 206, "en-CMUSerif-Roman": 207, "en-GulfsDisplay-NormalItalic": 208, "en-PTSans-Bold": 209, "en-Sensei-Medium": 210, "en-SquadaOne-Regular": 211, "en-Arapey-Italic": 212, "en-Parisienne-Regular": 213, "en-Aleo-Italic": 214, "en-QuicheDisplay-Italic": 215, "en-RocaOne-It": 216, "en-Funtastic-Regular": 217, "en-PTSerif-BoldItalic": 218, "en-Muller-RegularItalic": 219, "en-ArgentCF-Regular": 220, "en-Brightwall-Italic": 221, "en-Knewave-Regular": 222, "en-TYSerif-D": 223, "en-Agrandir-Tight": 224, "en-AlfaSlabOne-Regular": 225, "en-TANTangkiwood-Display": 226, "en-Kief-Montaser-Regular": 227, "en-Gotham-Book": 228, "en-JuliusSansOne-Regular": 229, "en-CocoGothic-Italic": 230, "en-SairaCondensed-Regular": 231, "en-DellaRespira-Regular": 232, "en-Questrial-Regular": 233, "en-BukhariScript-Regular": 234, "en-HelveticaWorld-Bold": 235, "en-TANKINDRED-Display": 236, "en-CinzelDecorative-Regular": 237, "en-Vidaloka-Regular": 238, "en-AlegreyaSansSC-Black": 239, "en-FeelingPassionate-Regular": 240, "en-QuincyCF-Regular": 241, "en-FiraCode-Regular": 242, "en-Genty-Regular": 243, "en-Nickainley-Normal": 244, "en-RubikOne-Regular": 245, "en-Gidole-Regular": 246, "en-Borsok": 247, "en-Gordita-RegularItalic": 248, "en-Scripter-Regular": 249, "en-Buffalo-Regular": 250, "en-KleinText-Regular": 251, "en-Creepster-Regular": 252, "en-Arvo-Bold": 253, "en-GabrielSans-NormalItalic": 254, "en-Heebo-Black": 255, "en-LexendExa-Regular": 256, "en-BrixtonSansTC-Regular": 257, "en-GildaDisplay-Regular": 258, "en-ChunkFive-Roman": 259, "en-Amaranth-BoldItalic": 260, "en-BubbleboddyNeue-Regular": 261, "en-MavenPro-Bold": 262, "en-TTDrugs-Italic": 263, "en-CyGrotesk-KeyRegular": 264, "en-VarelaRound-Regular": 265, "en-Ruda-Black": 266, "en-SafiraMarch": 267, "en-BloggerSans": 268, "en-TANHEADLINE-Regular": 269, "en-SloopScriptPro-Regular": 270, "en-NeueMontreal-Regular": 271, "en-Schoolbell-Regular": 272, "en-SigherRegular": 273, "en-InriaSerif-Regular": 274, "en-JetBrainsMono-Regular": 275, "en-MADEEvolveSans": 276, "en-Dekko": 277, "en-Handyman-Regular": 278, "en-Aileron-BoldItalic": 279, "en-Bright-Italic": 280, "en-Solway-Regular": 281, "en-Higuen-Regular": 282, "en-WedgesItalic": 283, "en-TANASHFORD-BOLD": 284, "en-IBMPlexMono": 285, "en-RacingSansOne-Regular": 286, "en-RegularBrush": 287, "en-OpenSans-LightItalic": 288, "en-SpecialElite-Regular": 289, "en-FuturaLTPro-Medium": 290, "en-MaragsaDisplay": 291, "en-BigShouldersDisplay-Regular": 292, "en-BDSans-Regular": 293, "en-RasputinRegular": 294, "en-Yvesyvesdrawing-BoldItalic": 295, "en-Bitter-Regular": 296, "en-LuckiestGuy-Regular": 297, "en-CanvaSchoolFontDotted": 298, "en-TTFirsNeue-Italic": 299, "en-Sunday-Regular": 300, "en-HKGothic-MediumItalic": 301, "en-CaveatBrush-Regular": 302, "en-HeliosExt": 303, "en-ArchitectsDaughter-Regular": 304, "en-Angelina": 305, "en-Calistoga-Regular": 306, "en-ArchivoNarrow-Regular": 307, "en-ObjectSans-MediumSlanted": 308, "en-AyrLucidityCondensed-Regular": 309, "en-Nexa-RegularItalic": 310, "en-Lustria-Regular": 311, "en-Amsterdam-TwoSlant": 312, "en-Virtual-Regular": 313, "en-Brusher-Regular": 314, "en-NF-Lepetitcochon-Regular": 315, "en-TANTWINKLE": 316, "en-LeJour-Serif": 317, "en-Prata-Regular": 318, "en-PPWoodland-Regular": 319, "en-PlayfairDisplay-BoldItalic": 320, "en-AmaticSC-Regular": 321, "en-Cabin-Regular": 322, "en-Manjari-Bold": 323, "en-MrDafoe-Regular": 324, "en-TTRamillas-Italic": 325, "en-Luckybones-Bold": 326, "en-DarkerGrotesque-Light": 327, "en-BellabooRegular": 328, "en-CormorantSC-Bold": 329, "en-GochiHand-Regular": 330, "en-Atteron": 331, "en-RocaTwo-Lt": 332, "en-ZCOOLXiaoWei-Regular": 333, "en-TANSONGBIRD": 334, "en-HeadingNow-74Regular": 335, "en-Luthier-BoldItalic": 336, "en-Oregano-Regular": 337, "en-AyrTropikaIsland-Int": 338, "en-Mali-Regular": 339, "en-DidactGothic-Regular": 340, "en-Lovelace-Regular": 341, "en-BakerieSmooth-Regular": 342, "en-CarterOne": 343, "en-HussarBd": 344, "en-OldStandard-Italic": 345, "en-TAN-ASTORIA-Display": 346, "en-rugratssans-Regular": 347, "en-BMHANNA": 348, "en-BetterSaturday": 349, "en-AdigianaToybox": 350, "en-Sailors": 351, "en-PlayfairDisplaySC-Italic": 352, "en-Etna-Regular": 353, "en-Revive80Signature": 354, "en-CAGenerated": 355, "en-Poppins-Regular": 356, "en-Jonathan-Regular": 357, "en-Pacifico-Regular": 358, "en-Saira-Black": 359, "en-Loubag-Regular": 360, "en-Decalotype-Black": 361, "en-Mansalva-Regular": 362, "en-Allura-Regular": 363, "en-ProximaNova-Bold": 364, "en-TANMIGNON-DISPLAY": 365, "en-ArsenicaAntiqua-Regular": 366, "en-BreulGroteskA-RegularItalic": 367, "en-HKModular-Bold": 368, "en-TANNightingale-Regular": 369, "en-AristotelicaProCndTxt-Rg": 370, "en-Aprila-Regular": 371, "en-Tomorrow-Regular": 372, "en-AngellaWhite": 373, "en-KaushanScript-Regular": 374, "en-NotoSans": 375, "en-LeJour-Script": 376, "en-BrixtonTC-Regular": 377, "en-OleoScript-Regular": 378, "en-Cakerolli-Regular": 379, "en-Lobster-Regular": 380, "en-FrunchySerif-Regular": 381, "en-PorcelainRegular": 382, "en-AlojaExtended": 383, "en-SergioTrendy-Italic": 384, "en-LovelaceText-Bold": 385, "en-Anaktoria": 386, "en-JimmyScript-Light": 387, "en-IBMPlexSerif": 388, "en-Marta": 389, "en-Mango-Regular": 390, "en-Overpass-Italic": 391, "en-Hagrid-Regular": 392, "en-ElikaGorica": 393, "en-Amiko-Regular": 394, "en-EFCOBrookshire-Regular": 395, "en-Caladea-Regular": 396, "en-MoonlightBold": 397, "en-Staatliches-Regular": 398, "en-Helios-Bold": 399, "en-Satisfy-Regular": 400, "en-NexaScript-Regular": 401, "en-Trocchi-Regular": 402, "en-March": 403, "en-IbarraRealNova-Regular": 404, "en-Nectarine-Regular": 405, "en-Overpass-Light": 406, "en-TruetypewriterPolyglOTT": 407, "en-Bangers-Regular": 408, "en-Lazord-BoldExpandedItalic": 409, "en-Chloe-Regular": 410, "en-BaskervilleDisplayPT-Regular": 411, "en-Bright-Regular": 412, "en-Vollkorn-Regular": 413, "en-Harmattan": 414, "en-SortsMillGoudy-Regular": 415, "en-Biryani-Bold": 416, "en-SugoProDisplay-Italic": 417, "en-Lazord-BoldItalic": 418, "en-Alike-Regular": 419, "en-PermanentMarker-Regular": 420, "en-Sacramento-Regular": 421, "en-HKGroteskPro-Italic": 422, "en-Aleo-BoldItalic": 423, "en-Noot": 424, "en-TANGARLAND-Regular": 425, "en-Twister": 426, "en-Arsenal-Italic": 427, "en-Bogart-Italic": 428, "en-BethEllen-Regular": 429, "en-Caveat-Regular": 430, "en-BalsamiqSans-Bold": 431, "en-BreeSerif-Regular": 432, "en-CodecPro-ExtraBold": 433, "en-Pierson-Light": 434, "en-CyGrotesk-WideRegular": 435, "en-Lumios-Marker": 436, "en-Comfortaa-Bold": 437, "en-TraceFontRegular": 438, "en-RTL-AdamScript-Regular": 439, "en-EastmanGrotesque-Italic": 440, "en-Kalam-Bold": 441, "en-ChauPhilomeneOne-Regular": 442, "en-Coiny-Regular": 443, "en-Lovera": 444, "en-Gellatio": 445, "en-TitilliumWeb-Bold": 446, "en-OilvareBase-Italic": 447, "en-Catamaran-Black": 448, "en-Anteb-Italic": 449, "en-SueEllenFrancisco": 450, "en-SweetApricot": 451, "en-BrightSunshine": 452, "en-IM_FELL_Double_Pica_Italic": 453, "en-Granaina-limpia": 454, "en-TANPARFAIT": 455, "en-AcherusGrotesque-Regular": 456, "en-AwesomeLathusca-Italic": 457, "en-Signika-Bold": 458, "en-Andasia": 459, "en-DO-AllCaps-Slanted": 460, "en-Zenaida-Regular": 461, "en-Fahkwang-Regular": 462, "en-Play-Regular": 463, "en-BERNIERRegular-Regular": 464, "en-PlumaThin-Regular": 465, "en-SportsWorld": 466, "en-Garet-Black": 467, "en-CarolloPlayscript-BlackItalic": 468, "en-Cheque-Regular": 469, "en-SEGO": 470, "en-BobbyJones-Condensed": 471, "en-NexaSlab-RegularItalic": 472, "en-DancingScript-Regular": 473, "en-PaalalabasDisplayWideBETA": 474, "en-Magnolia-Script": 475, "en-OpunMai-400It": 476, "en-MadelynFill-Regular": 477, "en-ZingRust-Base": 478, "en-FingerPaint-Regular": 479, "en-BostonAngel-Light": 480, "en-Gliker-RegularExpanded": 481, "en-Ahsing": 482, "en-Engagement-Regular": 483, "en-EyesomeScript": 484, "en-LibraSerifModern-Regular": 485, "en-London-Regular": 486, "en-AtkinsonHyperlegible-Regular": 487, "en-StadioNow-TextItalic": 488, "en-Aniyah": 489, "en-ITCAvantGardePro-Bold": 490, "en-Comica-Regular": 491, "en-Coustard-Regular": 492, "en-Brice-BoldCondensed": 493, "en-TANNEWYORK-Bold": 494, "en-TANBUSTER-Bold": 495, "en-Alatsi-Regular": 496, "en-TYSerif-Book": 497, "en-Jingleberry": 498, "en-Rajdhani-Bold": 499, "en-LobsterTwo-BoldItalic": 500, "en-BestLight-Medium": 501, "en-Hitchcut-Regular": 502, "en-GermaniaOne-Regular": 503, "en-Emitha-Script": 504, "en-LemonTuesday": 505, "en-Cubao_Free_Regular": 506, "en-MonterchiSerif-Regular": 507, "en-AllertaStencil-Regular": 508, "en-RTL-Sondos-Regular": 509, "en-HomemadeApple-Regular": 510, "en-CosmicOcto-Medium": 511, "cn-HelloFont-FangHuaTi": 0, "cn-HelloFont-ID-DianFangSong-Bold": 1, "cn-HelloFont-ID-DianFangSong": 2, "cn-HelloFont-ID-DianHei-CEJ": 3, "cn-HelloFont-ID-DianHei-DEJ": 4, "cn-HelloFont-ID-DianHei-EEJ": 5, "cn-HelloFont-ID-DianHei-FEJ": 6, "cn-HelloFont-ID-DianHei-GEJ": 7, "cn-HelloFont-ID-DianKai-Bold": 8, "cn-HelloFont-ID-DianKai": 9, "cn-HelloFont-WenYiHei": 10, "cn-Hellofont-ID-ChenYanXingKai": 11, "cn-Hellofont-ID-DaZiBao": 12, "cn-Hellofont-ID-DaoCaoRen": 13, "cn-Hellofont-ID-JianSong": 14, "cn-Hellofont-ID-JiangHuZhaoPaiHei": 15, "cn-Hellofont-ID-KeSong": 16, "cn-Hellofont-ID-LeYuanTi": 17, "cn-Hellofont-ID-Pinocchio": 18, "cn-Hellofont-ID-QiMiaoTi": 19, "cn-Hellofont-ID-QingHuaKai": 20, "cn-Hellofont-ID-QingHuaXingKai": 21, "cn-Hellofont-ID-ShanShuiXingKai": 22, "cn-Hellofont-ID-ShouXieQiShu": 23, "cn-Hellofont-ID-ShouXieTongZhenTi": 24, "cn-Hellofont-ID-TengLingTi": 25, "cn-Hellofont-ID-XiaoLiShu": 26, "cn-Hellofont-ID-XuanZhenSong": 27, "cn-Hellofont-ID-ZhongLingXingKai": 28, "cn-HellofontIDJiaoTangTi": 29, "cn-HellofontIDJiuZhuTi": 30, "cn-HuXiaoBao-SaoBao": 31, "cn-HuXiaoBo-NanShen": 32, "cn-HuXiaoBo-ZhenShuai": 33, "cn-SourceHanSansSC-Bold": 34, "cn-SourceHanSansSC-ExtraLight": 35, "cn-SourceHanSansSC-Heavy": 36, "cn-SourceHanSansSC-Light": 37, "cn-SourceHanSansSC-Medium": 38, "cn-SourceHanSansSC-Normal": 39, "cn-SourceHanSansSC-Regular": 40, "cn-SourceHanSerifSC-Bold": 41, "cn-SourceHanSerifSC-ExtraLight": 42, "cn-SourceHanSerifSC-Heavy": 43, "cn-SourceHanSerifSC-Light": 44, "cn-SourceHanSerifSC-Medium": 45, "cn-SourceHanSerifSC-Regular": 46, "cn-SourceHanSerifSC-SemiBold": 47, "cn-xiaowei": 48, "cn-AaJianHaoTi": 49, "cn-AlibabaPuHuiTi-Bold": 50, "cn-AlibabaPuHuiTi-Heavy": 51, "cn-AlibabaPuHuiTi-Light": 52, "cn-AlibabaPuHuiTi-Medium": 53, "cn-AlibabaPuHuiTi-Regular": 54, "cn-CanvaAcidBoldSC": 55, "cn-CanvaBreezeCN": 56, "cn-CanvaBumperCropSC": 57, "cn-CanvaCakeShopCN": 58, "cn-CanvaEndeavorBlackSC": 59, "cn-CanvaJoyHeiCN": 60, "cn-CanvaLiCN": 61, "cn-CanvaOrientalBrushCN": 62, "cn-CanvaPoster": 63, "cn-CanvaQinfuCalligraphyCN": 64, "cn-CanvaSweetHeartCN": 65, "cn-CanvaSwordLikeDreamCN": 66, "cn-CanvaTangyuanHandwritingCN": 67, "cn-CanvaWanderWorldCN": 68, "cn-CanvaWenCN": 69, "cn-DianZiChunYi": 70, "cn-GenSekiGothicTW-H": 71, "cn-GenWanMinTW-L": 72, "cn-GenYoMinTW-B": 73, "cn-GenYoMinTW-EL": 74, "cn-GenYoMinTW-H": 75, "cn-GenYoMinTW-M": 76, "cn-GenYoMinTW-R": 77, "cn-GenYoMinTW-SB": 78, "cn-HYQiHei-AZEJ": 79, "cn-HYQiHei-EES": 80, "cn-HanaMinA": 81, "cn-HappyZcool-2016": 82, "cn-HelloFont ZJ KeKouKeAiTi": 83, "cn-HelloFont-ID-BoBoTi": 84, "cn-HelloFont-ID-FuGuHei-25": 85, "cn-HelloFont-ID-FuGuHei-35": 86, "cn-HelloFont-ID-FuGuHei-45": 87, "cn-HelloFont-ID-FuGuHei-55": 88, "cn-HelloFont-ID-FuGuHei-65": 89, "cn-HelloFont-ID-FuGuHei-75": 90, "cn-HelloFont-ID-FuGuHei-85": 91, "cn-HelloFont-ID-HeiKa": 92, "cn-HelloFont-ID-HeiTang": 93, "cn-HelloFont-ID-JianSong-95": 94, "cn-HelloFont-ID-JueJiangHei-50": 95, "cn-HelloFont-ID-JueJiangHei-55": 96, "cn-HelloFont-ID-JueJiangHei-60": 97, "cn-HelloFont-ID-JueJiangHei-65": 98, "cn-HelloFont-ID-JueJiangHei-70": 99, "cn-HelloFont-ID-JueJiangHei-75": 100, "cn-HelloFont-ID-JueJiangHei-80": 101, "cn-HelloFont-ID-KuHeiTi": 102, "cn-HelloFont-ID-LingDongTi": 103, "cn-HelloFont-ID-LingLiTi": 104, "cn-HelloFont-ID-MuFengTi": 105, "cn-HelloFont-ID-NaiNaiJiangTi": 106, "cn-HelloFont-ID-PangDu": 107, "cn-HelloFont-ID-ReLieTi": 108, "cn-HelloFont-ID-RouRun": 109, "cn-HelloFont-ID-SaShuangShouXieTi": 110, "cn-HelloFont-ID-WangZheFengFan": 111, "cn-HelloFont-ID-YouQiTi": 112, "cn-Hellofont-ID-XiaLeTi": 113, "cn-Hellofont-ID-XianXiaTi": 114, "cn-HuXiaoBoKuHei": 115, "cn-IDDanMoXingKai": 116, "cn-IDJueJiangHei": 117, "cn-IDMeiLingTi": 118, "cn-IDQQSugar": 119, "cn-LiuJianMaoCao-Regular": 120, "cn-LongCang-Regular": 121, "cn-MaShanZheng-Regular": 122, "cn-PangMenZhengDao-3": 123, "cn-PangMenZhengDao-Cu": 124, "cn-PangMenZhengDao": 125, "cn-SentyCaramel": 126, "cn-SourceHanSerifSC": 127, "cn-WenCang-Regular": 128, "cn-WenQuanYiMicroHei": 129, "cn-XianErTi": 130, "cn-YRDZSTJF": 131, "cn-YS-HelloFont-BangBangTi": 132, "cn-ZCOOLKuaiLe-Regular": 133, "cn-ZCOOLQingKeHuangYou-Regular": 134, "cn-ZCOOLXiaoWei-Regular": 135, "cn-ZCOOL_KuHei": 136, "cn-ZhiMangXing-Regular": 137, "cn-baotuxiaobaiti": 138, "cn-jiangxizhuokai-Regular": 139, "cn-zcool-gdh": 140, "cn-zcoolqingkehuangyouti-Regular": 141, "cn-zcoolwenyiti": 142, "jp-04KanjyukuGothic": 0, "jp-07LightNovelPOP": 1, "jp-07NikumaruFont": 2, "jp-07YasashisaAntique": 3, "jp-07YasashisaGothic": 4, "jp-BokutachinoGothic2Bold": 5, "jp-BokutachinoGothic2Regular": 6, "jp-CHI_SpeedyRight_full_211128-Regular": 7, "jp-CHI_SpeedyRight_italic_full_211127-Regular": 8, "jp-CP-Font": 9, "jp-Canva_CezanneProN-B": 10, "jp-Canva_CezanneProN-M": 11, "jp-Canva_ChiaroStd-B": 12, "jp-Canva_CometStd-B": 13, "jp-Canva_DotMincho16Std-M": 14, "jp-Canva_GrecoStd-B": 15, "jp-Canva_GrecoStd-M": 16, "jp-Canva_LyraStd-DB": 17, "jp-Canva_MatisseHatsuhiPro-B": 18, "jp-Canva_MatisseHatsuhiPro-M": 19, "jp-Canva_ModeMinAStd-B": 20, "jp-Canva_NewCezanneProN-B": 21, "jp-Canva_NewCezanneProN-M": 22, "jp-Canva_PearlStd-L": 23, "jp-Canva_RaglanStd-UB": 24, "jp-Canva_RailwayStd-B": 25, "jp-Canva_ReggaeStd-B": 26, "jp-Canva_RocknRollStd-DB": 27, "jp-Canva_RodinCattleyaPro-B": 28, "jp-Canva_RodinCattleyaPro-M": 29, "jp-Canva_RodinCattleyaPro-UB": 30, "jp-Canva_RodinHimawariPro-B": 31, "jp-Canva_RodinHimawariPro-M": 32, "jp-Canva_RodinMariaPro-B": 33, "jp-Canva_RodinMariaPro-DB": 34, "jp-Canva_RodinProN-M": 35, "jp-Canva_ShadowTLStd-B": 36, "jp-Canva_StickStd-B": 37, "jp-Canva_TsukuAOldMinPr6N-B": 38, "jp-Canva_TsukuAOldMinPr6N-R": 39, "jp-Canva_UtrilloPro-DB": 40, "jp-Canva_UtrilloPro-M": 41, "jp-Canva_YurukaStd-UB": 42, "jp-FGUIGEN": 43, "jp-GlowSansJ-Condensed-Heavy": 44, "jp-GlowSansJ-Condensed-Light": 45, "jp-GlowSansJ-Normal-Bold": 46, "jp-GlowSansJ-Normal-Light": 47, "jp-HannariMincho": 48, "jp-HarenosoraMincho": 49, "jp-Jiyucho": 50, "jp-Kaiso-Makina-B": 51, "jp-Kaisotai-Next-UP-B": 52, "jp-KokoroMinchoutai": 53, "jp-Mamelon-3-Hi-Regular": 54, "jp-MotoyaAnemoneStd-W1": 55, "jp-MotoyaAnemoneStd-W5": 56, "jp-MotoyaAnticPro-W3": 57, "jp-MotoyaCedarStd-W3": 58, "jp-MotoyaCedarStd-W5": 59, "jp-MotoyaGochikaStd-W4": 60, "jp-MotoyaGochikaStd-W8": 61, "jp-MotoyaGothicMiyabiStd-W6": 62, "jp-MotoyaGothicStd-W3": 63, "jp-MotoyaGothicStd-W5": 64, "jp-MotoyaKoinStd-W3": 65, "jp-MotoyaKyotaiStd-W2": 66, "jp-MotoyaKyotaiStd-W4": 67, "jp-MotoyaMaruStd-W3": 68, "jp-MotoyaMaruStd-W5": 69, "jp-MotoyaMinchoMiyabiStd-W4": 70, "jp-MotoyaMinchoMiyabiStd-W6": 71, "jp-MotoyaMinchoModernStd-W4": 72, "jp-MotoyaMinchoModernStd-W6": 73, "jp-MotoyaMinchoStd-W3": 74, "jp-MotoyaMinchoStd-W5": 75, "jp-MotoyaReisyoStd-W2": 76, "jp-MotoyaReisyoStd-W6": 77, "jp-MotoyaTohitsuStd-W4": 78, "jp-MotoyaTohitsuStd-W6": 79, "jp-MtySousyokuEmBcJis-W6": 80, "jp-MtySousyokuLiBcJis-W6": 81, "jp-Mushin": 82, "jp-NotoSansJP-Bold": 83, "jp-NotoSansJP-Regular": 84, "jp-NudMotoyaAporoStd-W3": 85, "jp-NudMotoyaAporoStd-W5": 86, "jp-NudMotoyaCedarStd-W3": 87, "jp-NudMotoyaCedarStd-W5": 88, "jp-NudMotoyaMaruStd-W3": 89, "jp-NudMotoyaMaruStd-W5": 90, "jp-NudMotoyaMinchoStd-W5": 91, "jp-Ounen-mouhitsu": 92, "jp-Ronde-B-Square": 93, "jp-SMotoyaGyosyoStd-W5": 94, "jp-SMotoyaSinkaiStd-W3": 95, "jp-SMotoyaSinkaiStd-W5": 96, "jp-SourceHanSansJP-Bold": 97, "jp-SourceHanSansJP-Regular": 98, "jp-SourceHanSerifJP-Bold": 99, "jp-SourceHanSerifJP-Regular": 100, "jp-TazuganeGothicStdN-Bold": 101, "jp-TazuganeGothicStdN-Regular": 102, "jp-TelopMinProN-B": 103, "jp-Togalite-Bold": 104, "jp-Togalite-Regular": 105, "jp-TsukuMinPr6N-E": 106, "jp-TsukuMinPr6N-M": 107, "jp-mikachan_o": 108, "jp-nagayama_kai": 109, "jp-07LogoTypeGothic7": 110, "jp-07TetsubinGothic": 111, "jp-851CHIKARA-DZUYOKU-KANA-A": 112, "jp-ARMinchoJIS-Light": 113, "jp-ARMinchoJIS-Ultra": 114, "jp-ARPCrystalMinchoJIS-Medium": 115, "jp-ARPCrystalRGothicJIS-Medium": 116, "jp-ARShounanShinpitsuGyosyoJIS-Medium": 117, "jp-AozoraMincho-bold": 118, "jp-AozoraMinchoRegular": 119, "jp-ArialUnicodeMS-Bold": 120, "jp-ArialUnicodeMS": 121, "jp-CanvaBreezeJP": 122, "jp-CanvaLiCN": 123, "jp-CanvaLiJP": 124, "jp-CanvaOrientalBrushCN": 125, "jp-CanvaQinfuCalligraphyJP": 126, "jp-CanvaSweetHeartJP": 127, "jp-CanvaWenJP": 128, "jp-Corporate-Logo-Bold": 129, "jp-DelaGothicOne-Regular": 130, "jp-GN-Kin-iro_SansSerif": 131, "jp-GN-Koharuiro_Sunray": 132, "jp-GenEiGothicM-B": 133, "jp-GenEiGothicM-R": 134, "jp-GenJyuuGothic-Bold": 135, "jp-GenRyuMinTW-B": 136, "jp-GenRyuMinTW-R": 137, "jp-GenSekiGothicTW-B": 138, "jp-GenSekiGothicTW-R": 139, "jp-GenSenRoundedTW-B": 140, "jp-GenSenRoundedTW-R": 141, "jp-GenShinGothic-Bold": 142, "jp-GenShinGothic-Normal": 143, "jp-GenWanMinTW-L": 144, "jp-GenYoGothicTW-B": 145, "jp-GenYoGothicTW-R": 146, "jp-GenYoMinTW-B": 147, "jp-GenYoMinTW-R": 148, "jp-HGBouquet": 149, "jp-HanaMinA": 150, "jp-HanazomeFont": 151, "jp-HinaMincho-Regular": 152, "jp-Honoka-Antique-Maru": 153, "jp-Honoka-Mincho": 154, "jp-HuiFontP": 155, "jp-IPAexMincho": 156, "jp-JK-Gothic-L": 157, "jp-JK-Gothic-M": 158, "jp-JackeyFont": 159, "jp-KaiseiTokumin-Bold": 160, "jp-KaiseiTokumin-Regular": 161, "jp-Keifont": 162, "jp-KiwiMaru-Regular": 163, "jp-Koku-Mincho-Regular": 164, "jp-MotoyaLMaru-W3-90ms-RKSJ-H": 165, "jp-NewTegomin-Regular": 166, "jp-NicoKaku": 167, "jp-NicoMoji+": 168, "jp-Otsutome_font-Bold": 169, "jp-PottaOne-Regular": 170, "jp-RampartOne-Regular": 171, "jp-Senobi-Gothic-Bold": 172, "jp-Senobi-Gothic-Regular": 173, "jp-SmartFontUI-Proportional": 174, "jp-SoukouMincho": 175, "jp-TEST_Klee-DB": 176, "jp-TEST_Klee-M": 177, "jp-TEST_UDMincho-B": 178, "jp-TEST_UDMincho-L": 179, "jp-TT_Akakane-EB": 180, "jp-Tanuki-Permanent-Marker": 181, "jp-TrainOne-Regular": 182, "jp-TsunagiGothic-Black": 183, "jp-Ume-Hy-Gothic": 184, "jp-Ume-P-Mincho": 185, "jp-WenQuanYiMicroHei": 186, "jp-XANO-mincho-U32": 187, "jp-YOzFontM90-Regular": 188, "jp-Yomogi-Regular": 189, "jp-YujiBoku-Regular": 190, "jp-YujiSyuku-Regular": 191, "jp-ZenKakuGothicNew-Bold": 192, "jp-ZenKakuGothicNew-Regular": 193, "jp-ZenKurenaido-Regular": 194, "jp-ZenMaruGothic-Bold": 195, "jp-ZenMaruGothic-Regular": 196, "jp-darts-font": 197, "jp-irohakakuC-Bold": 198, "jp-irohakakuC-Medium": 199, "jp-irohakakuC-Regular": 200, "jp-katyou": 201, "jp-mplus-1m-bold": 202, "jp-mplus-1m-regular": 203, "jp-mplus-1p-bold": 204, "jp-mplus-1p-regular": 205, "jp-rounded-mplus-1p-bold": 206, "jp-rounded-mplus-1p-regular": 207, "jp-timemachine-wa": 208, "jp-ttf-GenEiLateMin-Medium": 209, "jp-uzura_font": 210, "kr-Arita-buri-Bold_OTF": 0, "kr-Arita-buri-HairLine_OTF": 1, "kr-Arita-buri-Light_OTF": 2, "kr-Arita-buri-Medium_OTF": 3, "kr-Arita-buri-SemiBold_OTF": 4, "kr-Canva_YDSunshineL": 5, "kr-Canva_YDSunshineM": 6, "kr-Canva_YoonGulimPro710": 7, "kr-Canva_YoonGulimPro730": 8, "kr-Canva_YoonGulimPro740": 9, "kr-Canva_YoonGulimPro760": 10, "kr-Canva_YoonGulimPro770": 11, "kr-Canva_YoonGulimPro790": 12, "kr-CreHappB": 13, "kr-CreHappL": 14, "kr-CreHappM": 15, "kr-CreHappS": 16, "kr-OTAuroraB": 17, "kr-OTAuroraL": 18, "kr-OTAuroraR": 19, "kr-OTDoldamgilB": 20, "kr-OTDoldamgilL": 21, "kr-OTDoldamgilR": 22, "kr-OTHamsterB": 23, "kr-OTHamsterL": 24, "kr-OTHamsterR": 25, "kr-OTHapchangdanB": 26, "kr-OTHapchangdanL": 27, "kr-OTHapchangdanR": 28, "kr-OTSupersizeBkBOX": 29, "kr-SourceHanSansKR-Bold": 30, "kr-SourceHanSansKR-ExtraLight": 31, "kr-SourceHanSansKR-Heavy": 32, "kr-SourceHanSansKR-Light": 33, "kr-SourceHanSansKR-Medium": 34, "kr-SourceHanSansKR-Normal": 35, "kr-SourceHanSansKR-Regular": 36, "kr-SourceHanSansSC-Bold": 37, "kr-SourceHanSansSC-ExtraLight": 38, "kr-SourceHanSansSC-Heavy": 39, "kr-SourceHanSansSC-Light": 40, "kr-SourceHanSansSC-Medium": 41, "kr-SourceHanSansSC-Normal": 42, "kr-SourceHanSansSC-Regular": 43, "kr-SourceHanSerifSC-Bold": 44, "kr-SourceHanSerifSC-SemiBold": 45, "kr-TDTDBubbleBubbleOTF": 46, "kr-TDTDConfusionOTF": 47, "kr-TDTDCuteAndCuteOTF": 48, "kr-TDTDEggTakOTF": 49, "kr-TDTDEmotionalLetterOTF": 50, "kr-TDTDGalapagosOTF": 51, "kr-TDTDHappyHourOTF": 52, "kr-TDTDLatteOTF": 53, "kr-TDTDMoonLightOTF": 54, "kr-TDTDParkForestOTF": 55, "kr-TDTDPencilOTF": 56, "kr-TDTDSmileOTF": 57, "kr-TDTDSproutOTF": 58, "kr-TDTDSunshineOTF": 59, "kr-TDTDWaferOTF": 60, "kr-777Chyaochyureu": 61, "kr-ArialUnicodeMS-Bold": 62, "kr-ArialUnicodeMS": 63, "kr-BMHANNA": 64, "kr-Baekmuk-Dotum": 65, "kr-BagelFatOne-Regular": 66, "kr-CoreBandi": 67, "kr-CoreBandiFace": 68, "kr-CoreBori": 69, "kr-DoHyeon-Regular": 70, "kr-Dokdo-Regular": 71, "kr-Gaegu-Bold": 72, "kr-Gaegu-Light": 73, "kr-Gaegu-Regular": 74, "kr-GamjaFlower-Regular": 75, "kr-GasoekOne-Regular": 76, "kr-GothicA1-Black": 77, "kr-GothicA1-Bold": 78, "kr-GothicA1-ExtraBold": 79, "kr-GothicA1-ExtraLight": 80, "kr-GothicA1-Light": 81, "kr-GothicA1-Medium": 82, "kr-GothicA1-Regular": 83, "kr-GothicA1-SemiBold": 84, "kr-GothicA1-Thin": 85, "kr-Gugi-Regular": 86, "kr-HiMelody-Regular": 87, "kr-Jua-Regular": 88, "kr-KirangHaerang-Regular": 89, "kr-NanumBrush": 90, "kr-NanumPen": 91, "kr-NanumSquareRoundB": 92, "kr-NanumSquareRoundEB": 93, "kr-NanumSquareRoundL": 94, "kr-NanumSquareRoundR": 95, "kr-SeH-CB": 96, "kr-SeH-CBL": 97, "kr-SeH-CEB": 98, "kr-SeH-CL": 99, "kr-SeH-CM": 100, "kr-SeN-CB": 101, "kr-SeN-CBL": 102, "kr-SeN-CEB": 103, "kr-SeN-CL": 104, "kr-SeN-CM": 105, "kr-Sunflower-Bold": 106, "kr-Sunflower-Light": 107, "kr-Sunflower-Medium": 108, "kr-TTClaytoyR": 109, "kr-TTDalpangiR": 110, "kr-TTMamablockR": 111, "kr-TTNauidongmuR": 112, "kr-TTOktapbangR": 113, "kr-UhBeeMiMi": 114, "kr-UhBeeMiMiBold": 115, "kr-UhBeeSe_hyun": 116, "kr-UhBeeSe_hyunBold": 117, "kr-UhBeenamsoyoung": 118, "kr-UhBeenamsoyoungBold": 119, "kr-WenQuanYiMicroHei": 120, "kr-YeonSung-Regular": 121}
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_1.webp b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_1.webp
new file mode 100644
index 0000000000000000000000000000000000000000..909b3e8be5452ae637f1e0e1b59eb73a1cd6393e
Binary files /dev/null and b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_1.webp differ
diff --git a/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_2.webp b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_2.webp
new file mode 100644
index 0000000000000000000000000000000000000000..ab48fd87a16df9634f84518339829f9d65e15b5f
Binary files /dev/null and b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_2.webp differ
diff --git a/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_3.webp b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_3.webp
new file mode 100644
index 0000000000000000000000000000000000000000..24e0592c44b57004b5402ba46dddb6057f472877
Binary files /dev/null and b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_3.webp differ
diff --git a/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_4.webp b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_4.webp
new file mode 100644
index 0000000000000000000000000000000000000000..7a96ae0b74bfe2d94222cc202fadc32f4add880b
Binary files /dev/null and b/text_encoder/Glyph-SDXL-v2/assets/teaser/teaser_multilingual_4.webp differ
diff --git a/text_encoder/Glyph-SDXL-v2/checkpoints/byt5_mapper.pt b/text_encoder/Glyph-SDXL-v2/checkpoints/byt5_mapper.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f2231e72af0ea65c02a47d4eff2c43c8e137cce
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/checkpoints/byt5_mapper.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d5911cf07328d949eff29cc08ca7637dc4fe5312a8fa351ca4bec07d357b1c5
+size 301553807
diff --git a/text_encoder/Glyph-SDXL-v2/checkpoints/byt5_model.pt b/text_encoder/Glyph-SDXL-v2/checkpoints/byt5_model.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b75471ebeb3c18d4fb673501aa73e30ad431df3d
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/checkpoints/byt5_model.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca8c97c89136f767d4534449bbf3f25296d390574e0af1cc16f09774a901d6db
+size 877308845
diff --git a/text_encoder/Glyph-SDXL-v2/checkpoints/unet_inserted_attn.pt b/text_encoder/Glyph-SDXL-v2/checkpoints/unet_inserted_attn.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a0fa4873f6b2f36220807261c193f17df58c5c2
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/checkpoints/unet_inserted_attn.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b6af4376281be262f3b52ca0b16b0244099161693f65a7db352f53878481767
+size 908
diff --git a/text_encoder/Glyph-SDXL-v2/checkpoints/unet_lora.pt b/text_encoder/Glyph-SDXL-v2/checkpoints/unet_lora.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2151650fd973486e89471bea239de0e663f110c
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/checkpoints/unet_lora.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47ae2328a9c4892a24c4a66f25780ab61a55cbd8eb693a1966cc99e674e832be
+size 743590514
diff --git a/text_encoder/Glyph-SDXL-v2/configs/glyph_sdxl_multilingual_albedo.py b/text_encoder/Glyph-SDXL-v2/configs/glyph_sdxl_multilingual_albedo.py
new file mode 100644
index 0000000000000000000000000000000000000000..82706d92909f4cf10f07c9edd90c6d2da9c77502
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/configs/glyph_sdxl_multilingual_albedo.py
@@ -0,0 +1,96 @@
+#### Model Setting
+pretrained_model_name_or_path = 'stablediffusionapi/albedobase-xl-20'
+pretrained_vae_model_name_or_path = 'madebyollin/sdxl-vae-fp16-fix'
+revision = None
+
+byt5_max_length = 512
+byt5_mapper_type = 'T5EncoderBlockByT5Mapper'
+byt5_mapper_config = dict(
+ num_layers=4,
+ sdxl_channels=2048,
+)
+byt5_config = dict(
+ byt5_name='google/byt5-small',
+ special_token=True,
+ color_special_token=True,
+ font_special_token=True,
+ color_ann_path='assets/color_idx.json',
+ font_ann_path='assets/multilingual_10-lang_idx.json',
+ multilingual=True,
+)
+
+attn_block_to_modify = [
+ "down_blocks.1.attentions.0.transformer_blocks.0",
+ "down_blocks.1.attentions.0.transformer_blocks.1",
+ "down_blocks.1.attentions.1.transformer_blocks.0",
+ "down_blocks.1.attentions.1.transformer_blocks.1",
+ "down_blocks.2.attentions.0.transformer_blocks.0",
+ "down_blocks.2.attentions.0.transformer_blocks.1",
+ "down_blocks.2.attentions.0.transformer_blocks.2",
+ "down_blocks.2.attentions.0.transformer_blocks.3",
+ "down_blocks.2.attentions.0.transformer_blocks.4",
+ "down_blocks.2.attentions.0.transformer_blocks.5",
+ "down_blocks.2.attentions.0.transformer_blocks.6",
+ "down_blocks.2.attentions.0.transformer_blocks.7",
+ "down_blocks.2.attentions.0.transformer_blocks.8",
+ "down_blocks.2.attentions.0.transformer_blocks.9",
+ "down_blocks.2.attentions.1.transformer_blocks.0",
+ "down_blocks.2.attentions.1.transformer_blocks.1",
+ "down_blocks.2.attentions.1.transformer_blocks.2",
+ "down_blocks.2.attentions.1.transformer_blocks.3",
+ "down_blocks.2.attentions.1.transformer_blocks.4",
+ "down_blocks.2.attentions.1.transformer_blocks.5",
+ "down_blocks.2.attentions.1.transformer_blocks.6",
+ "down_blocks.2.attentions.1.transformer_blocks.7",
+ "down_blocks.2.attentions.1.transformer_blocks.8",
+ "down_blocks.2.attentions.1.transformer_blocks.9",
+ "up_blocks.0.attentions.0.transformer_blocks.0",
+ "up_blocks.0.attentions.0.transformer_blocks.1",
+ "up_blocks.0.attentions.0.transformer_blocks.2",
+ "up_blocks.0.attentions.0.transformer_blocks.3",
+ "up_blocks.0.attentions.0.transformer_blocks.4",
+ "up_blocks.0.attentions.0.transformer_blocks.5",
+ "up_blocks.0.attentions.0.transformer_blocks.6",
+ "up_blocks.0.attentions.0.transformer_blocks.7",
+ "up_blocks.0.attentions.0.transformer_blocks.8",
+ "up_blocks.0.attentions.0.transformer_blocks.9",
+ "up_blocks.0.attentions.1.transformer_blocks.0",
+ "up_blocks.0.attentions.1.transformer_blocks.1",
+ "up_blocks.0.attentions.1.transformer_blocks.2",
+ "up_blocks.0.attentions.1.transformer_blocks.3",
+ "up_blocks.0.attentions.1.transformer_blocks.4",
+ "up_blocks.0.attentions.1.transformer_blocks.5",
+ "up_blocks.0.attentions.1.transformer_blocks.6",
+ "up_blocks.0.attentions.1.transformer_blocks.7",
+ "up_blocks.0.attentions.1.transformer_blocks.8",
+ "up_blocks.0.attentions.1.transformer_blocks.9",
+ "up_blocks.0.attentions.2.transformer_blocks.0",
+ "up_blocks.0.attentions.2.transformer_blocks.1",
+ "up_blocks.0.attentions.2.transformer_blocks.2",
+ "up_blocks.0.attentions.2.transformer_blocks.3",
+ "up_blocks.0.attentions.2.transformer_blocks.4",
+ "up_blocks.0.attentions.2.transformer_blocks.5",
+ "up_blocks.0.attentions.2.transformer_blocks.6",
+ "up_blocks.0.attentions.2.transformer_blocks.7",
+ "up_blocks.0.attentions.2.transformer_blocks.8",
+ "up_blocks.0.attentions.2.transformer_blocks.9",
+ "up_blocks.1.attentions.0.transformer_blocks.0",
+ "up_blocks.1.attentions.0.transformer_blocks.1",
+ "up_blocks.1.attentions.1.transformer_blocks.0",
+ "up_blocks.1.attentions.1.transformer_blocks.1",
+ "up_blocks.1.attentions.2.transformer_blocks.0",
+ "up_blocks.1.attentions.2.transformer_blocks.1",
+ "mid_block.attentions.0.transformer_blocks.0",
+ "mid_block.attentions.0.transformer_blocks.1",
+ "mid_block.attentions.0.transformer_blocks.2",
+ "mid_block.attentions.0.transformer_blocks.3",
+ "mid_block.attentions.0.transformer_blocks.4",
+ "mid_block.attentions.0.transformer_blocks.5",
+ "mid_block.attentions.0.transformer_blocks.6",
+ "mid_block.attentions.0.transformer_blocks.7",
+ "mid_block.attentions.0.transformer_blocks.8",
+ "mid_block.attentions.0.transformer_blocks.9",
+]
+
+unet_lora_rank = 128
+inference_dtype = 'fp16'
diff --git a/text_encoder/Glyph-SDXL-v2/configuration.json b/text_encoder/Glyph-SDXL-v2/configuration.json
new file mode 100644
index 0000000000000000000000000000000000000000..7600045091e74defbb184808c1b4482398a7def8
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/configuration.json
@@ -0,0 +1 @@
+{"framework": "pytorch", "task": "text-image", "allow_remote": true}
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/examples/xiaoman.json b/text_encoder/Glyph-SDXL-v2/examples/xiaoman.json
new file mode 100644
index 0000000000000000000000000000000000000000..36197355190ad747327c0c6940797258835dafa7
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/examples/xiaoman.json
@@ -0,0 +1,65 @@
+{
+ "texts": [
+ "小满是二十四节气之一,夏季的第二个节气。该节气是指夏熟作物的籽粒开始灌浆饱满,但还未成熟,只是小满,还未大满。",
+ "2022.5.21",
+ "饱满的灵魂 无畏的生长 二十四节气之一",
+ "今日小满",
+ "Grain Buds"
+ ],
+ "styles": [
+ {
+ "color": "#427227",
+ "font-family": "cn-HYQiHei-AZEJ"
+ },
+ {
+ "font-family": "en-TAN MERINGUE",
+ "color": "#f89b2b"
+ },
+ {
+ "color": "#ffffff",
+ "font-family": "cn-SourceHanSansSC-ExtraLight"
+ },
+ {
+ "color": "#427227",
+ "font-family": "cn-AlibabaPuHuiTi-Bold"
+ },
+ {
+ "color": "#427227",
+ "font-family": "en-SairaCondensed-Regular"
+ }
+ ],
+ "bbox": [
+ [
+ 0.09969604863221884,
+ 0.4370820668693009,
+ 0.31124620060790276,
+ 0.2072948328267477
+ ],
+ [
+ 0.10455927051671733,
+ 0.09908814589665653,
+ 0.22127659574468084,
+ 0.034650455927051675
+ ],
+ [
+ 0.09969604863221884,
+ 0.9398176291793313,
+ 0.7993920972644377,
+ 0.026747720364741642
+ ],
+ [
+ 0.09787234042553192,
+ 0.17142857142857143,
+ 0.4231003039513678,
+ 0.10577507598784194
+ ],
+ [
+ 0.10091185410334347,
+ 0.3100303951367781,
+ 0.2772036474164134,
+ 0.053495440729483285
+ ]
+ ],
+ "bg_prompt": "The image portrays a young girl sitting on a large green leaf. The leaf is part of a plant with other green leaves. The girl is wearing a yellow dress and a straw hat. She is holding a small yellow flower in her hand. The background of the image is a light blue sky with a few clouds. The overall style of the image is a colorful, cartoon-like illustration.",
+ "seed": 0
+}
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/__init__.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb8e293067659717f27792b10c14413fa72b4f24
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/__init__.py
@@ -0,0 +1,2 @@
+from .pipelines import *
+from .models import *
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/models/__init__.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..648dae2e74c678c47c86f631522e24d07d2e8569
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/models/__init__.py
@@ -0,0 +1,3 @@
+from .cross_attn_insert_transformer_blocks import CrossAttnInsertBasicTransformerBlock
+
+__all__ = ['CrossAttnInsertBasicTransformerBlock']
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/models/cross_attn_insert_transformer_blocks.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/models/cross_attn_insert_transformer_blocks.py
new file mode 100644
index 0000000000000000000000000000000000000000..8df97ea5a3baa47f8aedef66cd225f4530ef6002
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/models/cross_attn_insert_transformer_blocks.py
@@ -0,0 +1,377 @@
+from typing import Optional, Dict, Any
+import copy
+
+import torch
+import torch.nn as nn
+
+from diffusers.models.attention import (
+ BasicTransformerBlock,
+ SinusoidalPositionalEmbedding,
+ AdaLayerNorm,
+ AdaLayerNormZero,
+ AdaLayerNormContinuous,
+ Attention,
+ FeedForward,
+ GatedSelfAttentionDense,
+ GELU,
+ GEGLU,
+ ApproximateGELU,
+ _chunked_feed_forward,
+)
+
+class CrossAttnInsertBasicTransformerBlock(BasicTransformerBlock):
+ def __init__(
+ self,
+ dim: int,
+ num_attention_heads: int,
+ attention_head_dim: int,
+ dropout=0.0,
+ cross_attention_dim: Optional[int] = None,
+ glyph_cross_attention_dim: Optional[int] = None,
+ activation_fn: str = "geglu",
+ num_embeds_ada_norm: Optional[int] = None,
+ attention_bias: bool = False,
+ only_cross_attention: bool = False,
+ double_self_attention: bool = False,
+ upcast_attention: bool = False,
+ norm_elementwise_affine: bool = True,
+ norm_type: str = "layer_norm", # 'layer_norm', 'ada_norm', 'ada_norm_zero', 'ada_norm_single', 'layer_norm_i2vgen'
+ norm_eps: float = 1e-5,
+ final_dropout: bool = False,
+ attention_type: str = "default",
+ positional_embeddings: Optional[str] = None,
+ num_positional_embeddings: Optional[int] = None,
+ ada_norm_continous_conditioning_embedding_dim: Optional[int] = None,
+ ada_norm_bias: Optional[int] = None,
+ ff_inner_dim: Optional[int] = None,
+ ff_bias: bool = True,
+ attention_out_bias: bool = True,
+ ):
+ super(BasicTransformerBlock, self).__init__()
+ self.only_cross_attention = only_cross_attention
+
+ if norm_type in ("ada_norm", "ada_norm_zero") and num_embeds_ada_norm is None:
+ raise ValueError(
+ f"`norm_type` is set to {norm_type}, but `num_embeds_ada_norm` is not defined. Please make sure to"
+ f" define `num_embeds_ada_norm` if setting `norm_type` to {norm_type}."
+ )
+
+ self.norm_type = norm_type
+ self.num_embeds_ada_norm = num_embeds_ada_norm
+
+ if positional_embeddings and (num_positional_embeddings is None):
+ raise ValueError(
+ "If `positional_embedding` type is defined, `num_positition_embeddings` must also be defined."
+ )
+
+ if positional_embeddings == "sinusoidal":
+ self.pos_embed = SinusoidalPositionalEmbedding(dim, max_seq_length=num_positional_embeddings)
+ else:
+ self.pos_embed = None
+
+ # Define 3 blocks. Each block has its own normalization layer.
+ # 1. Self-Attn
+ if norm_type == "ada_norm":
+ self.norm1 = AdaLayerNorm(dim, num_embeds_ada_norm)
+ elif norm_type == "ada_norm_zero":
+ self.norm1 = AdaLayerNormZero(dim, num_embeds_ada_norm)
+ elif norm_type == "ada_norm_continuous":
+ self.norm1 = AdaLayerNormContinuous(
+ dim,
+ ada_norm_continous_conditioning_embedding_dim,
+ norm_elementwise_affine,
+ norm_eps,
+ ada_norm_bias,
+ "rms_norm",
+ )
+ else:
+ self.norm1 = nn.LayerNorm(dim, elementwise_affine=norm_elementwise_affine, eps=norm_eps)
+
+ self.attn1 = Attention(
+ query_dim=dim,
+ heads=num_attention_heads,
+ dim_head=attention_head_dim,
+ dropout=dropout,
+ bias=attention_bias,
+ cross_attention_dim=cross_attention_dim if only_cross_attention else None,
+ upcast_attention=upcast_attention,
+ out_bias=attention_out_bias,
+ )
+
+ # 2. Cross-Attn
+ if cross_attention_dim is not None or double_self_attention:
+ # We currently only use AdaLayerNormZero for self attention where there will only be one attention block.
+ # I.e. the number of returned modulation chunks from AdaLayerZero would not make sense if returned during
+ # the second cross attention block.
+ if norm_type == "ada_norm":
+ self.norm2 = AdaLayerNorm(dim, num_embeds_ada_norm)
+ elif norm_type == "ada_norm_continuous":
+ self.norm2 = AdaLayerNormContinuous(
+ dim,
+ ada_norm_continous_conditioning_embedding_dim,
+ norm_elementwise_affine,
+ norm_eps,
+ ada_norm_bias,
+ "rms_norm",
+ )
+ else:
+ self.norm2 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine)
+
+ self.attn2 = Attention(
+ query_dim=dim,
+ cross_attention_dim=cross_attention_dim if not double_self_attention else None,
+ heads=num_attention_heads,
+ dim_head=attention_head_dim,
+ dropout=dropout,
+ bias=attention_bias,
+ upcast_attention=upcast_attention,
+ out_bias=attention_out_bias,
+ ) # is self-attn if encoder_hidden_states is none
+ else:
+ self.norm2 = None
+ self.attn2 = None
+
+ # 3. Feed-forward
+ if norm_type == "ada_norm_continuous":
+ self.norm3 = AdaLayerNormContinuous(
+ dim,
+ ada_norm_continous_conditioning_embedding_dim,
+ norm_elementwise_affine,
+ norm_eps,
+ ada_norm_bias,
+ "layer_norm",
+ )
+
+ elif norm_type in ["ada_norm_zero", "ada_norm", "layer_norm", "ada_norm_continuous"]:
+ self.norm3 = nn.LayerNorm(dim, norm_eps, norm_elementwise_affine)
+ elif norm_type == "layer_norm_i2vgen":
+ self.norm3 = None
+
+ self.ff = FeedForward(
+ dim,
+ dropout=dropout,
+ activation_fn=activation_fn,
+ final_dropout=final_dropout,
+ inner_dim=ff_inner_dim,
+ bias=ff_bias,
+ )
+
+ # 4. Fuser
+ if attention_type == "gated" or attention_type == "gated-text-image":
+ self.fuser = GatedSelfAttentionDense(dim, cross_attention_dim, num_attention_heads, attention_head_dim)
+
+ # 5. Scale-shift for PixArt-Alpha.
+ if norm_type == "ada_norm_single":
+ self.scale_shift_table = nn.Parameter(torch.randn(6, dim) / dim**0.5)
+
+ # let chunk size default to None
+ self._chunk_size = None
+ self._chunk_dim = 0
+
+ def get_inserted_modules(self):
+ return ()
+
+ def get_inserted_modules_names(self):
+ return ()
+
+ def get_origin_modules(self):
+ inserted_modules = self.get_inserted_modules()
+ origin_modules = []
+ for module in self.children():
+ if module not in inserted_modules:
+ origin_modules.append(module)
+ return tuple(origin_modules)
+
+
+ @classmethod
+ def from_transformer_block(
+ cls,
+ transformer_block,
+ glyph_cross_attention_dim,
+ ):
+ inner_dim = transformer_block.attn1.query_dim
+ num_attention_heads = transformer_block.attn1.heads
+ attention_head_dim = transformer_block.attn1.inner_dim // num_attention_heads
+ dropout = transformer_block.attn1.dropout
+ cross_attention_dim = transformer_block.attn2.cross_attention_dim
+ if isinstance(transformer_block.ff.net[0], GELU):
+ if transformer_block.ff.net[0].approximate == "tanh":
+ activation_fn = "gelu-approximate"
+ else:
+ activation_fn = "gelu"
+ elif isinstance(transformer_block.ff.net[0], GEGLU):
+ activation_fn = "geglu"
+ elif isinstance(transformer_block.ff.net[0], ApproximateGELU):
+ activation_fn = "geglu-approximate"
+ num_embeds_ada_norm = transformer_block.num_embeds_ada_norm
+ attention_bias = transformer_block.attn1.to_q.bias is not None
+ only_cross_attention = transformer_block.only_cross_attention
+ double_self_attention = transformer_block.attn2.cross_attention_dim is None
+ upcast_attention = transformer_block.attn1.upcast_attention
+ norm_type = transformer_block.norm_type
+ assert isinstance(transformer_block.norm1, nn.LayerNorm)
+ norm_elementwise_affine = transformer_block.norm1.elementwise_affine
+ norm_eps = transformer_block.norm1.eps
+ assert getattr(transformer_block, 'fuser', None) is None
+ attention_type = "default"
+ model = cls(
+ inner_dim,
+ num_attention_heads,
+ attention_head_dim,
+ dropout=dropout,
+ cross_attention_dim=cross_attention_dim,
+ glyph_cross_attention_dim=glyph_cross_attention_dim,
+ activation_fn=activation_fn,
+ num_embeds_ada_norm=num_embeds_ada_norm,
+ attention_bias=attention_bias,
+ only_cross_attention=only_cross_attention,
+ double_self_attention=double_self_attention,
+ upcast_attention=upcast_attention,
+ norm_type=norm_type,
+ norm_elementwise_affine=norm_elementwise_affine,
+ norm_eps=norm_eps,
+ attention_type=attention_type,
+ )
+ missing_keys, unexpected_keys = model.load_state_dict(
+ transformer_block.state_dict(),
+ strict=False,
+ )
+ assert len(unexpected_keys) == 0
+ assert all(i.startswith('glyph') for i in missing_keys)
+
+ return model
+
+ def forward(
+ self,
+ hidden_states: torch.FloatTensor,
+ attention_mask: Optional[torch.FloatTensor] = None,
+ encoder_hidden_states: Optional[torch.FloatTensor] = None,
+ encoder_attention_mask: Optional[torch.FloatTensor] = None,
+ timestep: Optional[torch.LongTensor] = None,
+ cross_attention_kwargs: Dict[str, Any] = None,
+ class_labels: Optional[torch.LongTensor] = None,
+ added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
+ ) -> torch.FloatTensor:
+ # Notice that normalization is always applied before the real computation in the following blocks.
+ # 0. Self-Attention
+ batch_size = hidden_states.shape[0]
+
+ if self.norm_type == "ada_norm":
+ norm_hidden_states = self.norm1(hidden_states, timestep)
+ elif self.norm_type == "ada_norm_zero":
+ norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
+ hidden_states, timestep, class_labels, hidden_dtype=hidden_states.dtype
+ )
+ elif self.norm_type in ["layer_norm", "layer_norm_i2vgen"]:
+ norm_hidden_states = self.norm1(hidden_states)
+ elif self.norm_type == "ada_norm_continuous":
+ norm_hidden_states = self.norm1(hidden_states, added_cond_kwargs["pooled_text_emb"])
+ elif self.norm_type == "ada_norm_single":
+ shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (
+ self.scale_shift_table[None] + timestep.reshape(batch_size, 6, -1)
+ ).chunk(6, dim=1)
+ norm_hidden_states = self.norm1(hidden_states)
+ norm_hidden_states = norm_hidden_states * (1 + scale_msa) + shift_msa
+ norm_hidden_states = norm_hidden_states.squeeze(1)
+ else:
+ raise ValueError("Incorrect norm used")
+
+ if self.pos_embed is not None:
+ norm_hidden_states = self.pos_embed(norm_hidden_states)
+
+ # 1. Retrieve lora scale.
+ lora_scale = cross_attention_kwargs.get("scale", 1.0) if cross_attention_kwargs is not None else 1.0
+
+ # 2. Prepare GLIGEN inputs
+ cross_attention_kwargs = cross_attention_kwargs.copy() if cross_attention_kwargs is not None else {}
+ gligen_kwargs = cross_attention_kwargs.pop("gligen", None)
+
+ glyph_encoder_hidden_states = cross_attention_kwargs.pop("glyph_encoder_hidden_states", None)
+ # a dict. visual_feat_len: tensor(b, visual_feat_len,text—_feat_len)
+ glyph_attn_mask = cross_attention_kwargs.pop("glyph_attn_masks_dict", None)
+ bg_attn_mask = cross_attention_kwargs.pop("bg_attn_masks_dict", None)
+ if glyph_attn_mask is not None:
+ glyph_attn_mask = glyph_attn_mask[hidden_states.shape[1]]
+ if bg_attn_mask is not None:
+ bg_attn_mask = bg_attn_mask[hidden_states.shape[1]]
+ assert encoder_attention_mask is None, "encoder_attention_mask is not supported in this block."
+
+ attn_output = self.attn1(
+ norm_hidden_states,
+ encoder_hidden_states=encoder_hidden_states if self.only_cross_attention else None,
+ attention_mask=attention_mask,
+ **cross_attention_kwargs,
+ )
+ if self.norm_type == "ada_norm_zero":
+ attn_output = gate_msa.unsqueeze(1) * attn_output
+ elif self.norm_type == "ada_norm_single":
+ attn_output = gate_msa * attn_output
+
+ hidden_states = attn_output + hidden_states
+ if hidden_states.ndim == 4:
+ hidden_states = hidden_states.squeeze(1)
+
+ # 2.5 GLIGEN Control
+ if gligen_kwargs is not None:
+ hidden_states = self.fuser(hidden_states, gligen_kwargs["objs"])
+
+ # 3. Cross-Attention
+ if self.attn2 is not None:
+ if self.norm_type == "ada_norm":
+ norm_hidden_states = self.norm2(hidden_states, timestep)
+ elif self.norm_type in ["ada_norm_zero", "layer_norm", "layer_norm_i2vgen"]:
+ norm_hidden_states = self.norm2(hidden_states)
+ elif self.norm_type == "ada_norm_single":
+ # For PixArt norm2 isn't applied here:
+ # https://github.com/PixArt-alpha/PixArt-alpha/blob/0f55e922376d8b797edd44d25d0e7464b260dcab/diffusion/model/nets/PixArtMS.py#L70C1-L76C103
+ norm_hidden_states = hidden_states
+ elif self.norm_type == "ada_norm_continuous":
+ norm_hidden_states = self.norm2(hidden_states, added_cond_kwargs["pooled_text_emb"])
+ else:
+ raise ValueError("Incorrect norm")
+
+ if self.pos_embed is not None and self.norm_type != "ada_norm_single":
+ norm_hidden_states = self.pos_embed(norm_hidden_states)
+
+ attn_output = self.attn2(
+ norm_hidden_states,
+ encoder_hidden_states=torch.cat([encoder_hidden_states, glyph_encoder_hidden_states], dim=1),
+ attention_mask=torch.cat([bg_attn_mask, glyph_attn_mask], dim=-1),
+ **cross_attention_kwargs,
+ )
+
+ hidden_states = attn_output + hidden_states
+
+ # 4. Feed-forward
+ # i2vgen doesn't have this norm 🤷♂️
+ if self.norm_type == "ada_norm_continuous":
+ norm_hidden_states = self.norm3(hidden_states, added_cond_kwargs["pooled_text_emb"])
+ elif not self.norm_type == "ada_norm_single":
+ norm_hidden_states = self.norm3(hidden_states)
+
+ if self.norm_type == "ada_norm_zero":
+ norm_hidden_states = norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+
+ if self.norm_type == "ada_norm_single":
+ norm_hidden_states = self.norm2(hidden_states)
+ norm_hidden_states = norm_hidden_states * (1 + scale_mlp) + shift_mlp
+
+ if self._chunk_size is not None:
+ # "feed_forward_chunk_size" can be used to save memory
+ ff_output = _chunked_feed_forward(
+ self.ff, norm_hidden_states, self._chunk_dim, self._chunk_size, lora_scale=lora_scale
+ )
+ else:
+ ff_output = self.ff(norm_hidden_states, scale=lora_scale)
+
+ if self.norm_type == "ada_norm_zero":
+ ff_output = gate_mlp.unsqueeze(1) * ff_output
+ elif self.norm_type == "ada_norm_single":
+ ff_output = gate_mlp * ff_output
+
+ hidden_states = ff_output + hidden_states
+ if hidden_states.ndim == 4:
+ hidden_states = hidden_states.squeeze(1)
+
+ return hidden_states
+
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/pipelines/__init__.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/pipelines/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e594d844fea0345c966b02699048fe28f11d9c70
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/pipelines/__init__.py
@@ -0,0 +1,5 @@
+from .pipeline_stable_diffusion_glyph_xl import StableDiffusionGlyphXLPipeline
+
+__all__ = [
+ 'StableDiffusionGlyphXLPipeline',
+]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/pipelines/pipeline_stable_diffusion_glyph_xl.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/pipelines/pipeline_stable_diffusion_glyph_xl.py
new file mode 100644
index 0000000000000000000000000000000000000000..53603fe7978811e504d95543eff92f458ae5c45d
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/custom_diffusers/pipelines/pipeline_stable_diffusion_glyph_xl.py
@@ -0,0 +1,922 @@
+
+from typing import Optional, List, Union, Dict, Tuple, Callable, Any
+import torch
+
+from transformers import T5EncoderModel, T5Tokenizer
+import torch.nn.functional as F
+
+from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
+ StableDiffusionXLPipeline,
+ AutoencoderKL,
+ CLIPTextModel,
+ CLIPTextModelWithProjection,
+ CLIPTokenizer,
+ UNet2DConditionModel,
+ KarrasDiffusionSchedulers,
+ CLIPVisionModelWithProjection,
+ CLIPImageProcessor,
+ VaeImageProcessor,
+ is_invisible_watermark_available,
+ StableDiffusionXLLoraLoaderMixin,
+ PipelineImageInput,
+ adjust_lora_scale_text_encoder,
+ scale_lora_layers,
+ unscale_lora_layers,
+ USE_PEFT_BACKEND,
+ StableDiffusionXLPipelineOutput,
+ ImageProjection,
+ logging,
+ rescale_noise_cfg,
+ retrieve_timesteps,
+ deprecate,
+)
+import numpy as np
+logger = logging.get_logger(__name__) # pylint: disable=invalid-name
+
+from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
+
+class StableDiffusionGlyphXLPipeline(StableDiffusionXLPipeline):
+ model_cpu_offload_seq = "text_encoder->text_encoder_2->byt5_text_encoder->image_encoder->unet->byt5_mapper->vae"
+ _optional_components = [
+ "tokenizer",
+ "tokenizer_2",
+ "byt5_tokenizer",
+ "text_encoder",
+ "text_encoder_2",
+ "byt5_text_encoder",
+ "byt5_mapper",
+ "image_encoder",
+ "feature_extractor",
+ ]
+ _callback_tensor_inputs = [
+ "latents",
+ "prompt_embeds",
+ "negative_prompt_embeds",
+ "add_text_embeds",
+ "add_time_ids",
+ "negative_pooled_prompt_embeds",
+ "negative_add_time_ids",
+ ]
+ def __init__(
+ self,
+ vae: AutoencoderKL,
+ text_encoder: CLIPTextModel,
+ text_encoder_2: CLIPTextModelWithProjection,
+ byt5_text_encoder: T5EncoderModel,
+ tokenizer: CLIPTokenizer,
+ tokenizer_2: CLIPTokenizer,
+ byt5_tokenizer: T5Tokenizer,
+ byt5_mapper,
+ unet: UNet2DConditionModel,
+ scheduler: KarrasDiffusionSchedulers,
+ byt5_max_length: int = 512,
+ image_encoder: CLIPVisionModelWithProjection = None,
+ feature_extractor: CLIPImageProcessor = None,
+ force_zeros_for_empty_prompt: bool = True,
+ add_watermarker: Optional[bool] = None,
+ ):
+ super(StableDiffusionXLPipeline, self).__init__()
+
+ self.register_modules(
+ vae=vae,
+ text_encoder=text_encoder,
+ text_encoder_2=text_encoder_2,
+ byt5_text_encoder=byt5_text_encoder,
+ tokenizer=tokenizer,
+ tokenizer_2=tokenizer_2,
+ byt5_tokenizer=byt5_tokenizer,
+ byt5_mapper=byt5_mapper,
+ unet=unet,
+ scheduler=scheduler,
+ image_encoder=image_encoder,
+ feature_extractor=feature_extractor,
+ )
+ self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
+ self.register_to_config(byt5_max_length=byt5_max_length)
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+ self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+ self.byt5_max_length = byt5_max_length
+
+ self.default_sample_size = self.unet.config.sample_size
+
+ add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
+
+ if add_watermarker:
+ self.watermark = StableDiffusionXLWatermarker()
+ else:
+ self.watermark = None
+
+ def encode_prompt(
+ self,
+ prompt: str,
+ prompt_2: Optional[str] = None,
+ text_prompt = None,
+ device: Optional[torch.device] = None,
+ num_images_per_prompt: int = 1,
+ do_classifier_free_guidance: bool = True,
+ negative_prompt: Optional[str] = None,
+ negative_prompt_2: Optional[str] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ lora_scale: Optional[float] = None,
+ clip_skip: Optional[int] = None,
+ text_attn_mask: Optional[torch.LongTensor] = None,
+ byt5_prompt_embeds: Optional[torch.FloatTensor] = None,
+ ):
+ r"""
+ Encodes the prompt into text encoder hidden states.
+
+ Args:
+ prompt (`str` or `List[str]`, *optional*):
+ prompt to be encoded
+ prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+ used in both text-encoders
+ device: (`torch.device`):
+ torch device
+ num_images_per_prompt (`int`):
+ number of images that should be generated per prompt
+ do_classifier_free_guidance (`bool`):
+ whether to use classifier free guidance or not
+ negative_prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+ less than `1`).
+ negative_prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+ `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+ prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+ provided, text embeddings will be generated from `prompt` input argument.
+ negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+ argument.
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+ input argument.
+ lora_scale (`float`, *optional*):
+ A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
+ clip_skip (`int`, *optional*):
+ Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
+ the output of the pre-final layer will be used for computing the prompt embeddings.
+ """
+ device = device or self._execution_device
+
+ # set lora scale so that monkey patched LoRA
+ # function of text encoder can correctly access it
+ if lora_scale is not None and isinstance(self, StableDiffusionXLLoraLoaderMixin):
+ self._lora_scale = lora_scale
+
+ # dynamically adjust the LoRA scale
+ if self.text_encoder is not None:
+ if not USE_PEFT_BACKEND:
+ adjust_lora_scale_text_encoder(self.text_encoder, lora_scale)
+ else:
+ scale_lora_layers(self.text_encoder, lora_scale)
+
+ if self.text_encoder_2 is not None:
+ if not USE_PEFT_BACKEND:
+ adjust_lora_scale_text_encoder(self.text_encoder_2, lora_scale)
+ else:
+ scale_lora_layers(self.text_encoder_2, lora_scale)
+
+ prompt = [prompt] if isinstance(prompt, str) else prompt
+
+ if prompt is not None:
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ # Define tokenizers and text encoders
+ tokenizers = [self.tokenizer, self.tokenizer_2] if self.tokenizer is not None else [self.tokenizer_2]
+ text_encoders = (
+ [self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
+ )
+
+ if prompt_embeds is None:
+ assert len(prompt) == 1
+ prompt_2 = prompt_2 or prompt
+ prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
+
+ text_prompt = [text_prompt] if isinstance(text_prompt, str) else text_prompt
+
+ # textual inversion: procecss multi-vector tokens if necessary
+ prompt_embeds_list = []
+ prompts = [prompt, prompt_2]
+ text_input_id_batchs = []
+ for prompt, tokenizer in zip(prompts, tokenizers):
+ pad_token = tokenizer.pad_token_id
+ total_tokens = tokenizer(prompt, truncation=False)['input_ids'][0]
+ bos = total_tokens[0]
+ eos = total_tokens[-1]
+ total_tokens = total_tokens[1:-1]
+ new_total_tokens = []
+ empty_flag = True
+ while len(total_tokens) >= 75:
+ head_75_tokens = [total_tokens.pop(0) for _ in range(75)]
+ temp_77_token_ids = [bos] + head_75_tokens + [eos]
+ new_total_tokens.append(temp_77_token_ids)
+ empty_flag = False
+ if len(total_tokens) > 0 or empty_flag:
+ padding_len = 75 - len(total_tokens)
+ temp_77_token_ids = [bos] + total_tokens + [eos] + [pad_token] * padding_len
+ new_total_tokens.append(temp_77_token_ids)
+ # 1,segment_len, 77
+ new_total_tokens = torch.tensor(new_total_tokens, dtype=torch.long).unsqueeze(0)
+ text_input_id_batchs.append(new_total_tokens)
+ if text_input_id_batchs[0].shape[1] > text_input_id_batchs[1].shape[1]:
+ tokenizer = tokenizers[1]
+ pad_token = tokenizer.pad_token_id
+ bos = tokenizer.bos_token_id
+ eos = tokenizer.eos_token_id
+ padding_len = text_input_id_batchs[0].shape[1] - text_input_id_batchs[1].shape[1]
+ # padding_len, 77
+ padding_part = torch.tensor([[bos] + [eos] + [pad_token] * 75 for _ in range(padding_len)])
+ # 1, padding_len, 77
+ padding_part = padding_part.unsqueeze(0)
+ text_input_id_batchs[1] = torch.cat((text_input_id_batchs[1],padding_part), dim=1)
+ elif text_input_id_batchs[0].shape[1] < text_input_id_batchs[1].shape[1]:
+ tokenizer = tokenizers[0]
+ pad_token = tokenizer.pad_token_id
+ bos = tokenizer.bos_token_id
+ eos = tokenizer.eos_token_id
+ padding_len = text_input_id_batchs[1].shape[1] - text_input_id_batchs[0].shape[1]
+ # padding_len, 77
+ padding_part = torch.tensor([[bos] + [eos] + [pad_token] * 75 for _ in range(padding_len)])
+ # 1, padding_len, 77
+ padding_part = padding_part.unsqueeze(0)
+ text_input_id_batchs[0] = torch.cat((text_input_id_batchs[0],padding_part), dim=1)
+
+ embeddings = []
+ for segment_idx in range(text_input_id_batchs[0].shape[1]):
+ prompt_embeds_list = []
+ for i, text_encoder in enumerate(text_encoders):
+ # 1, segment_len, sequence_len
+ text_input_ids = text_input_id_batchs[i].to(text_encoder.device)
+ # 1, sequence_len, dim
+ prompt_embeds = text_encoder(
+ text_input_ids[:, segment_idx],
+ output_hidden_states=True,
+ )
+
+ # We are only ALWAYS interested in the pooled output of the final text encoder
+ temp_pooled_prompt_embeds = prompt_embeds[0]
+ if clip_skip is None:
+ prompt_embeds = prompt_embeds.hidden_states[-2]
+ else:
+ prompt_embeds = prompt_embeds.hidden_states[-(clip_skip + 2)]
+ bs_embed, seq_len, _ = prompt_embeds.shape
+ prompt_embeds = prompt_embeds.view(bs_embed, seq_len, -1)
+ prompt_embeds_list.append(prompt_embeds)
+ # b, sequence_len, dim
+ prompt_embeds = torch.concat(prompt_embeds_list, dim=-1)
+ embeddings.append(prompt_embeds)
+ if segment_idx == 0:
+ # use the first segment's pooled prompt embeddings as
+ # the pooled prompt embeddings
+ # b, dim->b, dim
+ pooled_prompt_embeds = temp_pooled_prompt_embeds.view(bs_embed, -1)
+ # b, segment_len * sequence_len, dim
+ prompt_embeds = torch.cat(embeddings, dim=1)
+
+ if byt5_prompt_embeds is None:
+ byt5_text_inputs = self.byt5_tokenizer(
+ text_prompt,
+ padding="max_length",
+ max_length=self.byt5_max_length,
+ truncation=True,
+ add_special_tokens=True,
+ return_tensors="pt",
+ )
+ byt5_text_input_ids = byt5_text_inputs.input_ids
+ byt5_attention_mask = byt5_text_inputs.attention_mask.to(self.byt5_text_encoder.device) if text_attn_mask is None else text_attn_mask.to(self.byt5_text_encoder.device, dtype=byt5_text_inputs.attention_mask.dtype)
+ with torch.cuda.amp.autocast(enabled=False):
+ byt5_prompt_embeds = self.byt5_text_encoder(
+ byt5_text_input_ids.to(self.byt5_text_encoder.device),
+ attention_mask=byt5_attention_mask.float(),
+ )
+ byt5_prompt_embeds = byt5_prompt_embeds[0]
+ byt5_prompt_embeds = self.byt5_mapper(byt5_prompt_embeds, byt5_attention_mask)
+
+ # get unconditional embeddings for classifier free guidance
+ zero_out_negative_prompt = negative_prompt is None and self.config.force_zeros_for_empty_prompt
+ if do_classifier_free_guidance and negative_prompt_embeds is None and zero_out_negative_prompt:
+ negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+ negative_byt5_prompt_embeds = torch.zeros_like(byt5_prompt_embeds)
+ negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
+ elif do_classifier_free_guidance and negative_prompt_embeds is None:
+ raise NotImplementedError
+
+ if self.text_encoder_2 is not None:
+ prompt_embeds = prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+ else:
+ prompt_embeds = prompt_embeds.to(dtype=self.unet.dtype, device=device)
+
+ bs_embed, seq_len, _ = prompt_embeds.shape
+ # duplicate text embeddings for each generation per prompt, using mps friendly method
+ prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
+ prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)
+
+ if do_classifier_free_guidance:
+ # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
+ seq_len = negative_prompt_embeds.shape[1]
+ byt5_seq_len = negative_byt5_prompt_embeds.shape[1]
+
+ if self.text_encoder_2 is not None:
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder_2.dtype, device=device)
+ else:
+ negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.unet.dtype, device=device)
+ negative_byt5_prompt_embeds = negative_byt5_prompt_embeds.to(dtype=self.byt5_text_encoder.dtype, device=device)
+
+ negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
+ negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
+ negative_byt5_prompt_embeds = negative_byt5_prompt_embeds.repeat(1, num_images_per_prompt, 1)
+ negative_byt5_prompt_embeds = negative_byt5_prompt_embeds.view(batch_size * num_images_per_prompt, byt5_seq_len, -1)
+
+ pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+ bs_embed * num_images_per_prompt, -1
+ )
+ if do_classifier_free_guidance:
+ negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
+ bs_embed * num_images_per_prompt, -1
+ )
+
+ if self.text_encoder is not None:
+ if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
+ # Retrieve the original scale by scaling back the LoRA layers
+ unscale_lora_layers(self.text_encoder, lora_scale)
+
+ if self.text_encoder_2 is not None:
+ if isinstance(self, StableDiffusionXLLoraLoaderMixin) and USE_PEFT_BACKEND:
+ # Retrieve the original scale by scaling back the LoRA layers
+ unscale_lora_layers(self.text_encoder_2, lora_scale)
+
+ return (
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ byt5_prompt_embeds,
+ negative_byt5_prompt_embeds,
+ )
+
+ @torch.no_grad()
+ def __call__(
+ self,
+ prompt: Union[str, List[str]] = None,
+ prompt_2: Optional[Union[str, List[str]]] = None,
+ text_prompt = None,
+ texts = None,
+ bboxes = None,
+ height: Optional[int] = None,
+ width: Optional[int] = None,
+ num_inference_steps: int = 50,
+ timesteps: List[int] = None,
+ denoising_end: Optional[float] = None,
+ guidance_scale: float = 5.0,
+ negative_prompt: Optional[Union[str, List[str]]] = None,
+ negative_prompt_2: Optional[Union[str, List[str]]] = None,
+ num_images_per_prompt: Optional[int] = 1,
+ eta: float = 0.0,
+ generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+ latents: Optional[torch.FloatTensor] = None,
+ prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+ ip_adapter_image: Optional[PipelineImageInput] = None,
+ output_type: Optional[str] = "pil",
+ return_dict: bool = True,
+ cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+ guidance_rescale: float = 0.0,
+ original_size: Optional[Tuple[int, int]] = None,
+ crops_coords_top_left: Tuple[int, int] = (0, 0),
+ target_size: Optional[Tuple[int, int]] = None,
+ negative_original_size: Optional[Tuple[int, int]] = None,
+ negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
+ negative_target_size: Optional[Tuple[int, int]] = None,
+ clip_skip: Optional[int] = None,
+ callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+ callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+ text_attn_mask: torch.LongTensor = None,
+ denoising_start: Optional[float] = None,
+ byt5_prompt_embeds: Optional[torch.FloatTensor] = None,
+ **kwargs,
+ ):
+ r"""
+ Function invoked when calling the pipeline for generation.
+
+ Args:
+ prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+ instead.
+ prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
+ used in both text-encoders
+ height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+ The height in pixels of the generated image. This is set to 1024 by default for the best results.
+ Anything below 512 pixels won't work well for
+ [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+ and checkpoints that are not specifically fine-tuned on low resolutions.
+ width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+ The width in pixels of the generated image. This is set to 1024 by default for the best results.
+ Anything below 512 pixels won't work well for
+ [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+ and checkpoints that are not specifically fine-tuned on low resolutions.
+ num_inference_steps (`int`, *optional*, defaults to 50):
+ The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+ expense of slower inference.
+ timesteps (`List[int]`, *optional*):
+ Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
+ in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
+ passed will be used. Must be in descending order.
+ denoising_end (`float`, *optional*):
+ When specified, determines the fraction (between 0.0 and 1.0) of the total denoising process to be
+ completed before it is intentionally prematurely terminated. As a result, the returned sample will
+ still retain a substantial amount of noise as determined by the discrete timesteps selected by the
+ scheduler. The denoising_end parameter should ideally be utilized when this pipeline forms a part of a
+ "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
+ Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
+ guidance_scale (`float`, *optional*, defaults to 5.0):
+ Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+ `guidance_scale` is defined as `w` of equation 2. of [Imagen
+ Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+ 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+ usually at the expense of lower image quality.
+ negative_prompt (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation. If not defined, one has to pass
+ `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+ less than `1`).
+ negative_prompt_2 (`str` or `List[str]`, *optional*):
+ The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
+ `text_encoder_2`. If not defined, `negative_prompt` is used in both text-encoders
+ num_images_per_prompt (`int`, *optional*, defaults to 1):
+ The number of images to generate per prompt.
+ eta (`float`, *optional*, defaults to 0.0):
+ Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
+ [`schedulers.DDIMScheduler`], will be ignored for others.
+ generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+ One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+ to make generation deterministic.
+ latents (`torch.FloatTensor`, *optional*):
+ Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+ generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+ tensor will ge generated by sampling using the supplied random `generator`.
+ prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+ provided, text embeddings will be generated from `prompt` input argument.
+ negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+ argument.
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+ input argument.
+ ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
+ output_type (`str`, *optional*, defaults to `"pil"`):
+ The output format of the generate image. Choose between
+ [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+ return_dict (`bool`, *optional*, defaults to `True`):
+ Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
+ of a plain tuple.
+ cross_attention_kwargs (`dict`, *optional*):
+ A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
+ `self.processor` in
+ [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+ guidance_rescale (`float`, *optional*, defaults to 0.0):
+ Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
+ Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
+ [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
+ Guidance rescale factor should fix overexposure when using zero terminal SNR.
+ original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
+ `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
+ explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+ crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+ `crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
+ `crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
+ `crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+ target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ For most cases, `target_size` should be set to the desired height and width of the generated image. If
+ not specified it will default to `(height, width)`. Part of SDXL's micro-conditioning as explained in
+ section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
+ negative_original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ To negatively condition the generation process based on a specific image resolution. Part of SDXL's
+ micro-conditioning as explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+ information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+ negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
+ To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
+ micro-conditioning as explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+ information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+ negative_target_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
+ To negatively condition the generation process based on a target image resolution. It should be as same
+ as the `target_size` for most cases. Part of SDXL's micro-conditioning as explained in section 2.2 of
+ [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
+ information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
+ callback_on_step_end (`Callable`, *optional*):
+ A function that calls at the end of each denoising steps during the inference. The function is called
+ with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
+ callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
+ `callback_on_step_end_tensor_inputs`.
+ callback_on_step_end_tensor_inputs (`List`, *optional*):
+ The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
+ will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
+ `._callback_tensor_inputs` attribute of your pipeline class.
+
+ Examples:
+
+ Returns:
+ [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] or `tuple`:
+ [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] if `return_dict` is True, otherwise a
+ `tuple`. When returning a tuple, the first element is a list with the generated images.
+ """
+
+ callback = kwargs.pop("callback", None)
+ callback_steps = kwargs.pop("callback_steps", None)
+
+ if callback is not None:
+ deprecate(
+ "callback",
+ "1.0.0",
+ "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+ )
+ if callback_steps is not None:
+ deprecate(
+ "callback_steps",
+ "1.0.0",
+ "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+ )
+
+ # 0. Default height and width to unet
+ height = height or self.default_sample_size * self.vae_scale_factor
+ width = width or self.default_sample_size * self.vae_scale_factor
+
+ original_size = original_size or (height, width)
+ target_size = target_size or (height, width)
+
+ # 1. Check inputs. Raise error if not correct
+ self.check_inputs(
+ prompt,
+ prompt_2,
+ height,
+ width,
+ callback_steps,
+ negative_prompt,
+ negative_prompt_2,
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ callback_on_step_end_tensor_inputs,
+ )
+
+ self._guidance_scale = guidance_scale
+ self._guidance_rescale = guidance_rescale
+ self._clip_skip = clip_skip
+ self._cross_attention_kwargs = cross_attention_kwargs
+ self._denoising_end = denoising_end
+ self._interrupt = False
+
+ # 2. Define call parameters
+ if prompt is not None and isinstance(prompt, str):
+ batch_size = 1
+ elif prompt is not None and isinstance(prompt, list):
+ batch_size = len(prompt)
+ else:
+ batch_size = prompt_embeds.shape[0]
+
+ device = self._execution_device
+
+ # 3. Encode input prompt
+ lora_scale = (
+ self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
+ )
+
+ (
+ prompt_embeds,
+ negative_prompt_embeds,
+ pooled_prompt_embeds,
+ negative_pooled_prompt_embeds,
+ byt5_prompt_embeds,
+ negative_byt5_prompt_embeds,
+ ) = self.encode_prompt(
+ prompt=prompt,
+ prompt_2=prompt_2,
+ text_prompt=text_prompt,
+ device=device,
+ num_images_per_prompt=num_images_per_prompt,
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
+ negative_prompt=negative_prompt,
+ negative_prompt_2=negative_prompt_2,
+ prompt_embeds=prompt_embeds,
+ negative_prompt_embeds=negative_prompt_embeds,
+ pooled_prompt_embeds=pooled_prompt_embeds,
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+ lora_scale=lora_scale,
+ clip_skip=self.clip_skip,
+ text_attn_mask=text_attn_mask,
+ byt5_prompt_embeds=byt5_prompt_embeds,
+ )
+
+ # 4. Prepare timesteps
+ timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
+
+ # 5. Prepare latent variables
+ num_channels_latents = self.unet.config.in_channels
+ latents = self.prepare_latents(
+ batch_size * num_images_per_prompt,
+ num_channels_latents,
+ height,
+ width,
+ prompt_embeds.dtype,
+ device,
+ generator,
+ latents,
+ )
+
+ # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+ extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+ # 7. Prepare added time ids & embeddings
+ add_text_embeds = pooled_prompt_embeds
+ if self.text_encoder_2 is None:
+ text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+ else:
+ text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
+
+ add_time_ids = self._get_add_time_ids(
+ original_size,
+ crops_coords_top_left,
+ target_size,
+ dtype=prompt_embeds.dtype,
+ text_encoder_projection_dim=text_encoder_projection_dim,
+ )
+ if negative_original_size is not None and negative_target_size is not None:
+ negative_add_time_ids = self._get_add_time_ids(
+ negative_original_size,
+ negative_crops_coords_top_left,
+ negative_target_size,
+ dtype=prompt_embeds.dtype,
+ text_encoder_projection_dim=text_encoder_projection_dim,
+ )
+ else:
+ negative_add_time_ids = add_time_ids
+
+ if self.do_classifier_free_guidance:
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+ byt5_prompt_embeds = torch.cat([negative_byt5_prompt_embeds, byt5_prompt_embeds], dim=0)
+
+ add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+ add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+ prompt_embeds = prompt_embeds.to(device)
+ byt5_prompt_embeds = byt5_prompt_embeds.to(device)
+ add_text_embeds = add_text_embeds.to(device)
+ add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+ if ip_adapter_image is not None:
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
+ image_embeds, negative_image_embeds = self.encode_image(
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
+ )
+ if self.do_classifier_free_guidance:
+ image_embeds = torch.cat([negative_image_embeds, image_embeds])
+ image_embeds = image_embeds.to(device)
+
+ # 8. Denoising loop
+ num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+ # 8.1 Apply denoising_end
+ if (
+ self.denoising_end is not None
+ and isinstance(self.denoising_end, float)
+ and self.denoising_end > 0
+ and self.denoising_end < 1
+ ):
+ discrete_timestep_cutoff = int(
+ round(
+ self.scheduler.config.num_train_timesteps
+ - (self.denoising_end * self.scheduler.config.num_train_timesteps)
+ )
+ )
+ num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+ timesteps = timesteps[:num_inference_steps]
+
+ # 9. Optionally get Guidance Scale Embedding
+ timestep_cond = None
+ if self.unet.config.time_cond_proj_dim is not None:
+ guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
+ timestep_cond = self.get_guidance_scale_embedding(
+ guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+ ).to(device=device, dtype=latents.dtype)
+
+ assert batch_size == 1, "batch_size > 1 is not supported"
+ if texts is not None:
+ glyph_attn_mask = self.get_glyph_attn_mask(texts, bboxes)
+ # h,w
+ bg_attn_mask = glyph_attn_mask.sum(-1) == 0
+ # 1,h,w,byt5_max_len
+ glyph_attn_masks = glyph_attn_mask.unsqueeze(0).to(device)
+ # 1,h,w
+ bg_attn_masks = bg_attn_mask.unsqueeze(0).to(glyph_attn_masks.dtype).to(device)
+
+ # b, h, w, text_feat_len
+ glyph_attn_masks = (1 - glyph_attn_masks) * -10000.0
+ # b, h, w
+ bg_attn_masks = (1 - bg_attn_masks) * -10000.0
+ num_down_sample = sum(1 if i == 'CrossAttnDownBlock2D' else 0 for i in self.unet.config['down_block_types']) - 1
+ initial_resolution = self.default_sample_size
+ initial_resolution = initial_resolution // 2**sum(1 if i == 'DownBlock2D' else 0 for i in self.unet.config['down_block_types'])
+ resolution_list = [initial_resolution] + [initial_resolution // 2**i for i in range(1, num_down_sample + 1)]
+ glyph_attn_masks_dict = dict()
+ bg_attn_masks_dict = dict()
+ # b, text_fet_len, h, w
+ glyph_attn_masks = glyph_attn_masks.permute(0, 3, 1, 2)
+ # b, 1, h, w
+ bg_attn_masks = bg_attn_masks.unsqueeze(1)
+ for mask_resolution in resolution_list:
+ down_scaled_glyph_attn_masks = F.interpolate(
+ glyph_attn_masks, size=(mask_resolution, mask_resolution), mode='nearest',
+ )
+ # b, text_fet_len, h, w->b, h, w, text_fet_len->b, h*w, text_fet_len
+ down_scaled_glyph_attn_masks = down_scaled_glyph_attn_masks.permute(0, 2, 3, 1).flatten(1, 2)
+ glyph_attn_masks_dict[mask_resolution * mask_resolution] = down_scaled_glyph_attn_masks
+
+ down_scaled_bg_attn_masks = F.interpolate(
+ bg_attn_masks, size=(mask_resolution, mask_resolution), mode='nearest',
+ )
+ # b,1,h,w->b,h,w->b,h,w,1->b,h*w,1->b,h*w,clip_feat_len
+ down_scaled_bg_attn_masks = down_scaled_bg_attn_masks.squeeze(1).unsqueeze(-1)
+ down_scaled_bg_attn_masks = down_scaled_bg_attn_masks.flatten(1, 2)
+ down_scaled_bg_attn_masks = down_scaled_bg_attn_masks.repeat(1, 1, prompt_embeds.shape[1])
+ bg_attn_masks_dict[mask_resolution * mask_resolution] = down_scaled_bg_attn_masks
+ if self.do_classifier_free_guidance:
+ for key in glyph_attn_masks_dict:
+ glyph_attn_masks_dict[key] = torch.cat([
+ torch.zeros_like(glyph_attn_masks_dict[key]),
+ glyph_attn_masks_dict[key]],
+ dim=0)
+ for key in bg_attn_masks_dict:
+ bg_attn_masks_dict[key] = torch.cat([
+ torch.zeros_like(bg_attn_masks_dict[key]),
+ bg_attn_masks_dict[key]],
+ dim=0)
+ else:
+ glyph_attn_masks_dict = None
+ bg_attn_masks_dict = None
+
+ self._num_timesteps = len(timesteps)
+ with self.progress_bar(total=num_inference_steps) as progress_bar:
+ for i, t in enumerate(timesteps):
+ if self.interrupt:
+ continue
+
+ # expand the latents if we are doing classifier free guidance
+ latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+
+ latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+ # predict the noise residual
+ added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+ if ip_adapter_image is not None:
+ added_cond_kwargs["image_embeds"] = image_embeds
+ if self.cross_attention_kwargs is None:
+ cross_attention_kwargs = {}
+ else:
+ cross_attention_kwargs = self.cross_attention_kwargs
+ cross_attention_kwargs['glyph_encoder_hidden_states'] = byt5_prompt_embeds
+ cross_attention_kwargs['glyph_attn_masks_dict'] = glyph_attn_masks_dict
+ cross_attention_kwargs['bg_attn_masks_dict'] = bg_attn_masks_dict
+
+ noise_pred = self.unet(
+ latent_model_input,
+ t,
+ encoder_hidden_states=prompt_embeds,
+ timestep_cond=timestep_cond,
+ cross_attention_kwargs=cross_attention_kwargs,
+ added_cond_kwargs=added_cond_kwargs,
+ return_dict=False,
+ )[0]
+
+ # perform guidance
+ if self.do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+ if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
+ # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+ noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
+
+ # compute the previous noisy sample x_t -> x_t-1
+ latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+ if callback_on_step_end is not None:
+ callback_kwargs = {}
+ for k in callback_on_step_end_tensor_inputs:
+ callback_kwargs[k] = locals()[k]
+ callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+ latents = callback_outputs.pop("latents", latents)
+ prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+ negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+ add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
+ negative_pooled_prompt_embeds = callback_outputs.pop(
+ "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
+ )
+ add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
+ negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
+
+ # call the callback, if provided
+ if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+ progress_bar.update()
+ if callback is not None and i % callback_steps == 0:
+ step_idx = i // getattr(self.scheduler, "order", 1)
+ callback(step_idx, t, latents)
+
+ if not output_type == "latent":
+ # make sure the VAE is in float32 mode, as it overflows in float16
+ needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+
+ if needs_upcasting:
+ self.upcast_vae()
+ latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+ image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+
+ # cast back to fp16 if needed
+ if needs_upcasting:
+ self.vae.to(dtype=torch.float16)
+ else:
+ image = latents
+
+ if not output_type == "latent":
+ # apply watermark if available
+ if self.watermark is not None:
+ image = self.watermark.apply_watermark(image)
+
+ image = self.image_processor.postprocess(image, output_type=output_type)
+
+ # Offload all models
+ self.maybe_free_model_hooks()
+
+ if not return_dict:
+ return (image,)
+
+ return StableDiffusionXLPipelineOutput(images=image)
+
+ def get_glyph_attn_mask(self, texts, bboxes):
+ resolution = self.default_sample_size
+ text_idx_list = self.get_text_start_pos(texts)
+ mask_tensor = torch.zeros(
+ resolution, resolution, self.byt5_max_length,
+ )
+ for idx, bbox in enumerate(bboxes):
+ # box is in [x, y, w, h] format
+ # area of [y:y+h, x:x+w]
+ bbox = [int(v * resolution + 0.5) for v in bbox]
+ bbox[2] = max(bbox[2], 1)
+ bbox[3] = max(bbox[3], 1)
+ bbox[0: 2] = np.clip(bbox[0: 2], 0, resolution - 1).tolist()
+ bbox[2: 4] = np.clip(bbox[2: 4], 1, resolution).tolist()
+ mask_tensor[
+ bbox[1]: bbox[1] + bbox[3],
+ bbox[0]: bbox[0] + bbox[2],
+ text_idx_list[idx]: text_idx_list[idx + 1]
+ ] = 1
+ return mask_tensor
+
+ def get_text_start_pos(self, texts):
+ prompt = "".encode('utf-8')
+ '''
+ Text "{text}" in {color}, {type}.
+ '''
+ pos_list = []
+ for text in texts:
+ pos_list.append(len(prompt))
+ text_prompt = f'Text "{text}"'
+
+ attr_list = ['0', '1']
+
+ attr_suffix = ", ".join(attr_list)
+ text_prompt += " in " + attr_suffix
+ text_prompt += ". "
+ text_prompt = text_prompt.encode('utf-8')
+
+ prompt = prompt + text_prompt
+ pos_list.append(len(prompt))
+ return pos_list
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/__init__.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f8b7e221eb0e6184b014cf3dcfff92342841727
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/__init__.py
@@ -0,0 +1,7 @@
+from .simple_byt5_mapper import ByT5Mapper
+from .byt5_block_byt5_mapper import T5EncoderBlockByT5Mapper
+
+__all__ = [
+ 'ByT5Mapper',
+ 'T5EncoderBlockByT5Mapper',
+]
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/byt5_block_byt5_mapper.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/byt5_block_byt5_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..574d05077ce0d296e006e1e6862955a9f477bad8
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/byt5_block_byt5_mapper.py
@@ -0,0 +1,151 @@
+import torch
+import torch.nn as nn
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import warnings
+
+import logging
+from torch import Tensor
+from diffusers import ModelMixin
+from transformers.models.t5.modeling_t5 import T5LayerSelfAttention, T5LayerFF, T5LayerNorm
+
+logger = logging.getLogger(__name__)
+
+class T5EncoderBlock(nn.Module):
+ def __init__(self, config, has_relative_attention_bias=False):
+ super().__init__()
+ self.layer = nn.ModuleList()
+ self.layer.append(T5LayerSelfAttention(config, has_relative_attention_bias=has_relative_attention_bias))
+ self.layer.append(T5LayerFF(config))
+
+ def forward(
+ self,
+ hidden_states,
+ attention_mask=None,
+ position_bias=None,
+ layer_head_mask=None,
+ output_attentions=False,
+ ):
+ self_attn_past_key_value, cross_attn_past_key_value = None, None
+
+ self_attention_outputs = self.layer[0](
+ hidden_states,
+ attention_mask=attention_mask,
+ position_bias=position_bias,
+ layer_head_mask=layer_head_mask,
+ past_key_value=self_attn_past_key_value,
+ use_cache=False,
+ output_attentions=output_attentions,
+ )
+ hidden_states, present_key_value_state = self_attention_outputs[:2]
+ attention_outputs = self_attention_outputs[2:] # Keep self-attention outputs and relative position weights
+
+ # clamp inf values to enable fp16 training
+ if hidden_states.dtype == torch.float16:
+ clamp_value = torch.where(
+ torch.isinf(hidden_states).any(),
+ torch.finfo(hidden_states.dtype).max - 1000,
+ torch.finfo(hidden_states.dtype).max,
+ )
+ hidden_states = torch.clamp(hidden_states, min=-clamp_value, max=clamp_value)
+
+ # Apply Feed Forward layer
+ hidden_states = self.layer[-1](hidden_states)
+
+ # clamp inf values to enable fp16 training
+ if hidden_states.dtype == torch.float16:
+ clamp_value = torch.where(
+ torch.isinf(hidden_states).any(),
+ torch.finfo(hidden_states.dtype).max - 1000,
+ torch.finfo(hidden_states.dtype).max,
+ )
+ hidden_states = torch.clamp(hidden_states, min=-clamp_value, max=clamp_value)
+
+ outputs = (hidden_states,) + attention_outputs
+
+ return outputs # hidden-states, present_key_value_states, (self-attention position bias), (self-attention weights), (cross-attention position bias), (cross-attention weights)
+
+class T5EncoderBlockByT5Mapper(ModelMixin):
+ def __init__(self, byt5_config, num_layers, sdxl_channels=None):
+ super().__init__()
+ if num_layers > 0:
+ self.blocks = nn.ModuleList(
+ [
+ T5EncoderBlock(
+ byt5_config,
+ has_relative_attention_bias=bool(i == 0))
+ for i in range(num_layers)
+ ]
+ )
+ else:
+ self.blocks = None
+ self.layer_norm = T5LayerNorm(byt5_config.d_model, eps=byt5_config.layer_norm_epsilon)
+ if sdxl_channels is not None:
+ self.channel_mapper = nn.Linear(byt5_config.d_model, sdxl_channels)
+ self.final_layer_norm = T5LayerNorm(sdxl_channels, eps=byt5_config.layer_norm_epsilon)
+ else:
+ self.channel_mapper = None
+ self.final_layer_norm = None
+
+ def get_extended_attention_mask(
+ self, attention_mask: Tensor, input_shape: Tuple[int], device: torch.device = None, dtype: torch.float = None
+ ) -> Tensor:
+ """
+ Makes broadcastable attention and causal masks so that future and masked tokens are ignored.
+
+ Arguments:
+ attention_mask (`torch.Tensor`):
+ Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
+ input_shape (`Tuple[int]`):
+ The shape of the input to the model.
+
+ Returns:
+ `torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
+ """
+ if dtype is None:
+ dtype = self.dtype
+
+ # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+ # ourselves in which case we just need to make it broadcastable to all heads.
+ if attention_mask.dim() == 3:
+ extended_attention_mask = attention_mask[:, None, :, :]
+ elif attention_mask.dim() == 2:
+ # Provided a padding mask of dimensions [batch_size, seq_length]
+ # - if the model is a decoder, apply a causal mask in addition to the padding mask
+ # - if the model is an encoder, make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length]
+ extended_attention_mask = attention_mask[:, None, None, :]
+ else:
+ raise ValueError(
+ f"Wrong shape for input_ids (shape {input_shape}) or attention_mask (shape {attention_mask.shape})"
+ )
+
+ # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
+ # masked positions, this operation will create a tensor which is 0.0 for
+ # positions we want to attend and the dtype's smallest value for masked positions.
+ # Since we are adding it to the raw scores before the softmax, this is
+ # effectively the same as removing these entirely.
+ extended_attention_mask = extended_attention_mask.to(dtype=dtype) # fp16 compatibility
+ extended_attention_mask = (1.0 - extended_attention_mask) * torch.finfo(dtype).min
+ return extended_attention_mask
+
+
+ def forward(self, inputs_embeds, attention_mask):
+ input_shape = inputs_embeds.size()[:-1]
+ extended_attention_mask = self.get_extended_attention_mask(attention_mask, input_shape)
+
+ hidden_states = inputs_embeds
+ position_bias = None
+
+ if self.blocks is not None:
+ for layer_module in self.blocks:
+ layer_outputs = layer_module(
+ hidden_states,
+ attention_mask=extended_attention_mask,
+ position_bias=position_bias,
+ )
+ hidden_states, position_bias = layer_outputs
+ hidden_states = self.layer_norm(hidden_states)
+ if self.channel_mapper is not None:
+ hidden_states = self.channel_mapper(hidden_states)
+ hidden_states = self.final_layer_norm(hidden_states)
+ return hidden_states
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/simple_byt5_mapper.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/simple_byt5_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed31fe06f04cc4ae2d2862901dacf84c8bc0f549
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/modules/simple_byt5_mapper.py
@@ -0,0 +1,16 @@
+from diffusers import ModelMixin
+import torch.nn as nn
+
+class ByT5Mapper(ModelMixin):
+ def __init__(self, byt5_output_dim, sdxl_text_dim):
+ super().__init__()
+ self.mapper = nn.Sequential(
+ nn.LayerNorm(byt5_output_dim),
+ nn.Linear(byt5_output_dim, sdxl_text_dim),
+ nn.ReLU(),
+ nn.Linear(sdxl_text_dim, sdxl_text_dim)
+ )
+
+ def forward(self, byt5_embedding):
+ return self.mapper(byt5_embedding)
+
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/__init__.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..267a07065978dcbdb1a16c3f21ca3017e1a9cd7a
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/__init__.py
@@ -0,0 +1,23 @@
+from .parse_config import parse_config
+from .constants import (
+ UNET_CKPT_NAME,
+ BYT5_CKPT_NAME,
+ BYT5_MAPPER_CKPT_NAME,
+ INSERTED_ATTN_CKPT_NAME,
+ huggingface_cache_dir,
+)
+from .load_pretrained_byt5 import load_byt5_and_byt5_tokenizer
+from .format_prompt import PromptFormat, MultilingualPromptFormat
+
+__all__ = [
+ 'parse_config',
+ 'UNET_CKPT_NAME',
+ 'BYT5_CKPT_NAME',
+ 'BYT5_MAPPER_CKPT_NAME',
+ 'huggingface_cache_dir',
+ 'load_byt5_and_byt5_tokenizer',
+ 'INSERTED_ATTN_CKPT_NAME',
+ 'PromptFormat',
+ 'MultilingualPromptFormat',
+]
+
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/constants.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/constants.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ed6b7f0082350d101e19183e267ed2c5976148d
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/constants.py
@@ -0,0 +1,5 @@
+UNET_CKPT_NAME = "unet_lora.pt"
+INSERTED_ATTN_CKPT_NAME = "unet_inserted_attn.pt"
+BYT5_CKPT_NAME = "byt5_model.pt"
+BYT5_MAPPER_CKPT_NAME = "byt5_mapper.pt"
+huggingface_cache_dir = None
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/format_prompt.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/format_prompt.py
new file mode 100644
index 0000000000000000000000000000000000000000..692762cf548ba6cb51204da526cc96fd1defe283
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/format_prompt.py
@@ -0,0 +1,113 @@
+import json
+import webcolors
+
+
+def closest_color(requested_color):
+ min_colors = {}
+ for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
+ r_c, g_c, b_c = webcolors.hex_to_rgb(key)
+ rd = (r_c - requested_color[0]) ** 2
+ gd = (g_c - requested_color[1]) ** 2
+ bd = (b_c - requested_color[2]) ** 2
+ min_colors[(rd + gd + bd)] = name
+ return min_colors[min(min_colors.keys())]
+
+def convert_rgb_to_names(rgb_tuple):
+ try:
+ color_name = webcolors.rgb_to_name(rgb_tuple)
+ except ValueError:
+ color_name = closest_color(rgb_tuple)
+ return color_name
+
+class PromptFormat():
+ def __init__(
+ self,
+ font_path: str = 'assets/font_idx_512.json',
+ color_path: str = 'assets/color_idx.json',
+ ):
+ with open(font_path, 'r') as f:
+ self.font_dict = json.load(f)
+ with open(color_path, 'r') as f:
+ self.color_dict = json.load(f)
+
+ def format_checker(self, texts, styles):
+ assert len(texts) == len(styles), 'length of texts must be equal to length of styles'
+ for style in styles:
+ assert style['font-family'] in self.font_dict, f"invalid font-family: {style['font-family']}"
+ rgb_color = webcolors.hex_to_rgb(style['color'])
+ color_name = convert_rgb_to_names(rgb_color)
+ assert color_name in self.color_dict, f"invalid color hex {color_name}"
+
+ def format_prompt(self, texts, styles):
+ self.format_checker(texts, styles)
+
+ prompt = ""
+ '''
+ Text "{text}" in {color}, {type}.
+ '''
+ for text, style in zip(texts, styles):
+ text_prompt = f'Text "{text}"'
+
+ attr_list = []
+
+ # format color
+ hex_color = style["color"]
+ rgb_color = webcolors.hex_to_rgb(hex_color)
+ color_name = convert_rgb_to_names(rgb_color)
+ attr_list.append(f"")
+
+ # format font
+ attr_list.append(f"")
+ attr_suffix = ", ".join(attr_list)
+ text_prompt += " in " + attr_suffix
+ text_prompt += ". "
+
+ prompt = prompt + text_prompt
+ return prompt
+
+
+class MultilingualPromptFormat():
+ def __init__(
+ self,
+ font_path: str = 'assets/multilingual_10-lang_idx.json',
+ color_path: str = 'assets/color_idx.json',
+ ):
+ with open(font_path, 'r') as f:
+ self.font_dict = json.load(f)
+ with open(color_path, 'r') as f:
+ self.color_dict = json.load(f)
+
+ def format_checker(self, texts, styles):
+ assert len(texts) == len(styles), 'length of texts must be equal to length of styles'
+ for style in styles:
+ assert style['font-family'] in self.font_dict, f"invalid font-family: {style['font-family']}"
+ rgb_color = webcolors.hex_to_rgb(style['color'])
+ color_name = convert_rgb_to_names(rgb_color)
+ assert color_name in self.color_dict, f"invalid color hex {color_name}"
+
+ def format_prompt(self, texts, styles):
+ self.format_checker(texts, styles)
+
+ prompt = ""
+ '''
+ Text "{text}" in {color}, {type}.
+ '''
+ for text, style in zip(texts, styles):
+ text_prompt = f'Text "{text}"'
+
+ attr_list = []
+
+ # format color
+ hex_color = style["color"]
+ rgb_color = webcolors.hex_to_rgb(hex_color)
+ color_name = convert_rgb_to_names(rgb_color)
+ attr_list.append(f"")
+
+ # format font
+ attr_list.append(f"<{style['font-family'][:2]}-font-{self.font_dict[style['font-family']]}>")
+ attr_suffix = ", ".join(attr_list)
+ text_prompt += " in " + attr_suffix
+ text_prompt += ". "
+
+ prompt = prompt + text_prompt
+ return prompt
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/load_pretrained_byt5.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/load_pretrained_byt5.py
new file mode 100644
index 0000000000000000000000000000000000000000..f74661aaf27eee6f2fd75b14e9a1f7fa45840255
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/load_pretrained_byt5.py
@@ -0,0 +1,60 @@
+import json
+
+from transformers import AutoTokenizer, T5ForConditionalGeneration
+from diffusers.utils import logging
+
+logger = logging.get_logger(__name__) # pylint: disable=invalid-name
+
+def add_special_token(tokenizer, text_encoder, add_color, add_font, color_ann_path, font_ann_path, multilingual=False):
+ with open(font_ann_path, 'r') as f:
+ idx_font_dict = json.load(f)
+ with open(color_ann_path, 'r') as f:
+ idx_color_dict = json.load(f)
+
+ if multilingual:
+ font_token = []
+ for font_code in idx_font_dict:
+ prefix = font_code[:2]
+ font_token.append(f'<{prefix}-font-{idx_font_dict[font_code]}>')
+ else:
+ font_token = [f'' for i in range(len(idx_font_dict))]
+ color_token = [f'' for i in range(len(idx_color_dict))]
+ additional_special_tokens = []
+ if add_color:
+ additional_special_tokens += color_token
+ if add_font:
+ additional_special_tokens += font_token
+ tokenizer.add_tokens(additional_special_tokens, special_tokens=True)
+ text_encoder.resize_token_embeddings(len(tokenizer))
+
+def load_byt5_and_byt5_tokenizer(
+ byt5_name='google/byt5-small',
+ special_token=False,
+ color_special_token=False,
+ font_special_token=False,
+ color_ann_path='assets/color_idx.json',
+ font_ann_path='assets/font_idx_512.json',
+ huggingface_cache_dir=None,
+ multilingual=False,
+):
+ byt5_tokenizer = AutoTokenizer.from_pretrained(
+ byt5_name, cache_dir=huggingface_cache_dir,
+ )
+ byt5_text_encoder = T5ForConditionalGeneration.from_pretrained(
+ byt5_name, cache_dir=huggingface_cache_dir,
+ ).get_encoder()
+
+ if special_token:
+ add_special_token(
+ byt5_tokenizer,
+ byt5_text_encoder,
+ add_color=color_special_token,
+ add_font=font_special_token,
+ color_ann_path=color_ann_path,
+ font_ann_path=font_ann_path,
+ multilingual=multilingual,
+ )
+
+ logger.info(f'Loaded original byt5 weight')
+
+ return byt5_text_encoder, byt5_tokenizer
\ No newline at end of file
diff --git a/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/parse_config.py b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/parse_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..1bfa29d19c098ceb69a2e4f315209e56b454f0bd
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/glyph_sdxl/utils/parse_config.py
@@ -0,0 +1,17 @@
+import argparse
+import os
+import os.path as osp
+from mmengine.config import Config
+
+
+def parse_config(path=None):
+ if path is None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument('config_dir', type=str)
+ args = parser.parse_args()
+ path = args.config_dir
+ config = Config.fromfile(path)
+
+ config.config_dir = path
+
+ return config
diff --git a/text_encoder/Glyph-SDXL-v2/inference_multilingual.py b/text_encoder/Glyph-SDXL-v2/inference_multilingual.py
new file mode 100644
index 0000000000000000000000000000000000000000..51c875916a682b7e35730bb98d14b076e12d3f73
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/inference_multilingual.py
@@ -0,0 +1,236 @@
+import argparse
+import os
+import json
+import copy
+import os.path as osp
+
+import torch
+from diffusers import UNet2DConditionModel, AutoencoderKL
+from diffusers.models.attention import BasicTransformerBlock
+from peft import LoraConfig
+from peft.utils import set_peft_model_state_dict
+from transformers import PretrainedConfig
+
+from diffusers import DPMSolverMultistepScheduler
+
+from glyph_sdxl.utils import (
+ parse_config,
+ UNET_CKPT_NAME,
+ huggingface_cache_dir,
+ load_byt5_and_byt5_tokenizer,
+ BYT5_MAPPER_CKPT_NAME,
+ INSERTED_ATTN_CKPT_NAME,
+ BYT5_CKPT_NAME,
+ MultilingualPromptFormat,
+)
+from glyph_sdxl.custom_diffusers import (
+ StableDiffusionGlyphXLPipeline,
+ CrossAttnInsertBasicTransformerBlock,
+)
+from glyph_sdxl.modules import T5EncoderBlockByT5Mapper
+
+byt5_mapper_dict = [T5EncoderBlockByT5Mapper]
+byt5_mapper_dict = {mapper.__name__: mapper for mapper in byt5_mapper_dict}
+
+
+def import_model_class_from_model_name_or_path(
+ pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder",
+):
+ text_encoder_config = PretrainedConfig.from_pretrained(
+ pretrained_model_name_or_path,
+ subfolder=subfolder,
+ revision=revision,
+ )
+ model_class = text_encoder_config.architectures[0]
+
+ if model_class == "CLIPTextModel":
+ from transformers import CLIPTextModel
+
+ return CLIPTextModel
+ elif model_class == "CLIPTextModelWithProjection":
+ from transformers import CLIPTextModelWithProjection
+
+ return CLIPTextModelWithProjection
+ else:
+ raise ValueError(f"{model_class} is not supported.")
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("config_dir", type=str)
+ parser.add_argument("ckpt_dir", type=str)
+ parser.add_argument("ann_path", type=str, default='examples/shower.json')
+ parser.add_argument("--out_folder", type=str, default='None')
+ parser.add_argument("--device", type=str, default='cuda')
+ parser.add_argument("--sampler", type=str, choices=['euler', 'dpm'])
+ parser.add_argument("--cfg", type=float, default=5.0)
+ args = parser.parse_args()
+
+ config = parse_config(args.config_dir)
+
+ text_encoder_cls_one = import_model_class_from_model_name_or_path(
+ config.pretrained_model_name_or_path, config.revision,
+ )
+ text_encoder_cls_two = import_model_class_from_model_name_or_path(
+ config.pretrained_model_name_or_path, config.revision, subfolder="text_encoder_2",
+ )
+ text_encoder_one = text_encoder_cls_one.from_pretrained(
+ config.pretrained_model_name_or_path, subfolder="text_encoder", revision=config.revision,
+ cache_dir=huggingface_cache_dir,
+ )
+ text_encoder_two = text_encoder_cls_two.from_pretrained(
+ config.pretrained_model_name_or_path, subfolder="text_encoder_2", revision=config.revision,
+ cache_dir=huggingface_cache_dir,
+ )
+
+ unet = UNet2DConditionModel.from_pretrained(
+ config.pretrained_model_name_or_path,
+ subfolder="unet",
+ revision=config.revision,
+ cache_dir=huggingface_cache_dir,
+ )
+
+ vae_path = (
+ config.pretrained_model_name_or_path
+ if config.pretrained_vae_model_name_or_path is None
+ else config.pretrained_vae_model_name_or_path
+ )
+ vae = AutoencoderKL.from_pretrained(
+ vae_path, subfolder="vae" if config.pretrained_vae_model_name_or_path is None else None,
+ revision=config.revision,
+ cache_dir=huggingface_cache_dir,
+ )
+
+ byt5_model, byt5_tokenizer = load_byt5_and_byt5_tokenizer(
+ **config.byt5_config,
+ huggingface_cache_dir=huggingface_cache_dir,
+ )
+
+ inference_dtype = torch.float32
+ if config.inference_dtype == "fp16":
+ inference_dtype = torch.float16
+ elif config.inference_dtype == "bf16":
+ inference_dtype = torch.bfloat16
+
+ if config.pretrained_vae_model_name_or_path is None:
+ vae.to(args.device, dtype=torch.float32)
+ else:
+ vae.to(args.device, dtype=inference_dtype)
+ text_encoder_one.to(args.device, dtype=inference_dtype)
+ text_encoder_two.to(args.device, dtype=inference_dtype)
+ byt5_model.to(args.device)
+ unet.to(args.device, dtype=inference_dtype)
+
+ inserted_new_modules_para_set = set()
+ for name, module in unet.named_modules():
+ if isinstance(module, BasicTransformerBlock) and name in config.attn_block_to_modify:
+ parent_module = unet
+ for n in name.split(".")[:-1]:
+ parent_module = getattr(parent_module, n)
+ new_block = CrossAttnInsertBasicTransformerBlock.from_transformer_block(
+ module,
+ byt5_model.config.d_model if config.byt5_mapper_config.sdxl_channels is None else config.byt5_mapper_config.sdxl_channels,
+ )
+ new_block.requires_grad_(False)
+ for inserted_module_name, inserted_module in zip(
+ new_block.get_inserted_modules_names(),
+ new_block.get_inserted_modules()
+ ):
+ inserted_module.requires_grad_(True)
+ for para_name, para in inserted_module.named_parameters():
+ para_key = name + '.' + inserted_module_name + '.' + para_name
+ assert para_key not in inserted_new_modules_para_set
+ inserted_new_modules_para_set.add(para_key)
+ for origin_module in new_block.get_origin_modules():
+ origin_module.to(args.device, dtype=inference_dtype)
+ parent_module.register_module(name.split(".")[-1], new_block)
+ print(f"inserted cross attn block to {name}")
+
+ byt5_mapper = byt5_mapper_dict[config.byt5_mapper_type](
+ byt5_model.config,
+ **config.byt5_mapper_config,
+ )
+
+ unet_lora_target_modules = [
+ "attn1.to_k", "attn1.to_q", "attn1.to_v", "attn1.to_out.0",
+ "attn2.to_k", "attn2.to_q", "attn2.to_v", "attn2.to_out.0",
+ ]
+ unet_lora_config = LoraConfig(
+ r=config.unet_lora_rank,
+ lora_alpha=config.unet_lora_rank,
+ init_lora_weights="gaussian",
+ target_modules=unet_lora_target_modules,
+ )
+ unet.add_adapter(unet_lora_config)
+
+ unet_lora_layers_para = torch.load(osp.join(args.ckpt_dir, UNET_CKPT_NAME), map_location='cpu')
+ incompatible_keys = set_peft_model_state_dict(unet, unet_lora_layers_para, adapter_name="default")
+ if getattr(incompatible_keys, 'unexpected_keys', []) == []:
+ print(f"loaded unet_lora_layers_para")
+ else:
+ print(f"unet_lora_layers has unexpected_keys: {getattr(incompatible_keys, 'unexpected_keys', None)}")
+
+ inserted_attn_module_paras = torch.load(osp.join(args.ckpt_dir, INSERTED_ATTN_CKPT_NAME), map_location='cpu')
+ missing_keys, unexpected_keys = unet.load_state_dict(inserted_attn_module_paras, strict=False)
+ assert len(unexpected_keys) == 0, unexpected_keys
+
+ byt5_mapper_para = torch.load(osp.join(args.ckpt_dir, BYT5_MAPPER_CKPT_NAME), map_location='cpu')
+ byt5_mapper.load_state_dict(byt5_mapper_para)
+
+ byt5_model_para = torch.load(osp.join(args.ckpt_dir, BYT5_CKPT_NAME), map_location='cpu')
+ byt5_model.load_state_dict(byt5_model_para)
+
+ pipeline = StableDiffusionGlyphXLPipeline.from_pretrained(
+ config.pretrained_model_name_or_path,
+ vae=vae,
+ text_encoder=text_encoder_one,
+ text_encoder_2=text_encoder_two,
+ byt5_text_encoder=byt5_model,
+ byt5_tokenizer=byt5_tokenizer,
+ byt5_mapper=byt5_mapper,
+ unet=unet,
+ byt5_max_length=config.byt5_max_length,
+ revision=config.revision,
+ torch_dtype=inference_dtype,
+ safety_checker=None,
+ cache_dir=huggingface_cache_dir,
+ )
+
+ if args.sampler == 'dpm':
+ pipeline.scheduler = DPMSolverMultistepScheduler.from_pretrained(
+ config.pretrained_model_name_or_path,
+ subfolder="scheduler",
+ use_karras_sigmas=True,
+ )
+
+ pipeline = pipeline.to(args.device)
+
+ with open(args.ann_path, 'r') as f:
+ ann = json.load(f)
+
+ os.makedirs(args.out_folder, exist_ok=True)
+
+ prompt_format = MultilingualPromptFormat()
+
+ texts = copy.deepcopy(ann['texts'])
+ bboxes = copy.deepcopy(ann['bbox'])
+ styles = copy.deepcopy(ann['styles'])
+
+ text_prompt = prompt_format.format_prompt(texts, styles)
+
+ if 'seed' not in ann:
+ generator = torch.Generator(device=args.device)
+ else:
+ generator = torch.Generator(device=args.device).manual_seed(ann['seed'])
+
+ with torch.cuda.amp.autocast():
+ image = pipeline(
+ prompt=ann['bg_prompt'],
+ text_prompt=text_prompt,
+ texts=texts,
+ bboxes=bboxes,
+ num_inference_steps=50,
+ generator=generator,
+ text_attn_mask=None,
+ guidance_scale=args.cfg,
+ ).images[0]
+ image.save(f'{args.out_folder}/result.png')
diff --git a/text_encoder/Glyph-SDXL-v2/requirements.txt b/text_encoder/Glyph-SDXL-v2/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ff971fa8ff7a7b62aa50006d0a77e8e7034e3566
--- /dev/null
+++ b/text_encoder/Glyph-SDXL-v2/requirements.txt
@@ -0,0 +1,10 @@
+transformers==4.36.2
+diffusers==0.26.1
+mmengine
+accelerate
+torch==2.2.0
+torchvision==0.17.0
+deepspeed
+peft
+webcolors
+gradio
\ No newline at end of file
diff --git a/text_encoder/byt5-small/.gitattributes b/text_encoder/byt5-small/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..d699711d884c16db686245dffb53739ddf85119c
--- /dev/null
+++ b/text_encoder/byt5-small/.gitattributes
@@ -0,0 +1,17 @@
+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tar.gz filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
diff --git a/text_encoder/byt5-small/README.md b/text_encoder/byt5-small/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d5bcdbff691101114c9e68dd5aed60e5aca833ca
--- /dev/null
+++ b/text_encoder/byt5-small/README.md
@@ -0,0 +1,158 @@
+---
+language:
+- multilingual
+- af
+- am
+- ar
+- az
+- be
+- bg
+- bn
+- ca
+- ceb
+- co
+- cs
+- cy
+- da
+- de
+- el
+- en
+- eo
+- es
+- et
+- eu
+- fa
+- fi
+- fil
+- fr
+- fy
+- ga
+- gd
+- gl
+- gu
+- ha
+- haw
+- hi
+- hmn
+- ht
+- hu
+- hy
+- ig
+- is
+- it
+- iw
+- ja
+- jv
+- ka
+- kk
+- km
+- kn
+- ko
+- ku
+- ky
+- la
+- lb
+- lo
+- lt
+- lv
+- mg
+- mi
+- mk
+- ml
+- mn
+- mr
+- ms
+- mt
+- my
+- ne
+- nl
+- no
+- ny
+- pa
+- pl
+- ps
+- pt
+- ro
+- ru
+- sd
+- si
+- sk
+- sl
+- sm
+- sn
+- so
+- sq
+- sr
+- st
+- su
+- sv
+- sw
+- ta
+- te
+- tg
+- th
+- tr
+- uk
+- und
+- ur
+- uz
+- vi
+- xh
+- yi
+- yo
+- zh
+- zu
+datasets:
+- mc4
+
+license: apache-2.0
+---
+
+# ByT5 - Small
+
+ByT5 is a tokenizer-free version of [Google's T5](https://ai.googleblog.com/2020/02/exploring-transfer-learning-with-t5.html) and generally follows the architecture of [MT5](https://huggingface.co/google/mt5-small).
+
+ByT5 was only pre-trained on [mC4](https://www.tensorflow.org/datasets/catalog/c4#c4multilingual) excluding any supervised training with an average span-mask of 20 UTF-8 characters. Therefore, this model has to be fine-tuned before it is useable on a downstream task.
+
+ByT5 works especially well on noisy text data,*e.g.*, `google/byt5-small` significantly outperforms [mt5-small](https://huggingface.co/google/mt5-small) on [TweetQA](https://arxiv.org/abs/1907.06292).
+
+Paper: [ByT5: Towards a token-free future with pre-trained byte-to-byte models](https://arxiv.org/abs/2105.13626)
+
+Authors: *Linting Xue, Aditya Barua, Noah Constant, Rami Al-Rfou, Sharan Narang, Mihir Kale, Adam Roberts, Colin Raffel*
+
+## Example Inference
+
+ByT5 works on raw UTF-8 bytes and can be used without a tokenizer:
+
+```python
+from transformers import T5ForConditionalGeneration
+import torch
+
+model = T5ForConditionalGeneration.from_pretrained('google/byt5-small')
+
+input_ids = torch.tensor([list("Life is like a box of chocolates.".encode("utf-8"))]) + 3 # add 3 for special tokens
+labels = torch.tensor([list("La vie est comme une boîte de chocolat.".encode("utf-8"))]) + 3 # add 3 for special tokens
+
+loss = model(input_ids, labels=labels).loss # forward pass
+```
+
+For batched inference & training it is however recommended using a tokenizer class for padding:
+
+```python
+from transformers import T5ForConditionalGeneration, AutoTokenizer
+
+model = T5ForConditionalGeneration.from_pretrained('google/byt5-small')
+tokenizer = AutoTokenizer.from_pretrained('google/byt5-small')
+
+model_inputs = tokenizer(["Life is like a box of chocolates.", "Today is Monday."], padding="longest", return_tensors="pt")
+labels = tokenizer(["La vie est comme une boîte de chocolat.", "Aujourd'hui c'est lundi."], padding="longest", return_tensors="pt").input_ids
+
+loss = model(**model_inputs, labels=labels).loss # forward pass
+```
+
+## Abstract
+
+Most widely-used pre-trained language models operate on sequences of tokens corresponding to word or subword units. Encoding text as a sequence of tokens requires a tokenizer, which is typically created as an independent artifact from the model. Token-free models that instead operate directly on raw text (bytes or characters) have many benefits: they can process text in any language out of the box, they are more robust to noise, and they minimize technical debt by removing complex and error-prone text preprocessing pipelines. Since byte or character sequences are longer than token sequences, past work on token-free models has often introduced new model architectures designed to amortize the cost of operating directly on raw text. In this paper, we show that a standard Transformer architecture can be used with minimal modifications to process byte sequences. We carefully characterize the trade-offs in terms of parameter count, training FLOPs, and inference speed, and show that byte-level models are competitive with their token-level counterparts. We also demonstrate that byte-level models are significantly more robust to noise and perform better on tasks that are sensitive to spelling and pronunciation. As part of our contribution, we release a new set of pre-trained byte-level Transformer models based on the T5 architecture, as well as all code and data used in our experiments.
+
+
+
diff --git a/text_encoder/byt5-small/config.json b/text_encoder/byt5-small/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1e1b57007120fd114ec64194d658c6abd74a4fb3
--- /dev/null
+++ b/text_encoder/byt5-small/config.json
@@ -0,0 +1,28 @@
+{
+ "_name_or_path": "/home/patrick/t5/byt5-small",
+ "architectures": [
+ "T5ForConditionalGeneration"
+ ],
+ "d_ff": 3584,
+ "d_kv": 64,
+ "d_model": 1472,
+ "decoder_start_token_id": 0,
+ "dropout_rate": 0.1,
+ "eos_token_id": 1,
+ "feed_forward_proj": "gated-gelu",
+ "gradient_checkpointing": false,
+ "initializer_factor": 1.0,
+ "is_encoder_decoder": true,
+ "layer_norm_epsilon": 1e-06,
+ "model_type": "t5",
+ "num_decoder_layers": 4,
+ "num_heads": 6,
+ "num_layers": 12,
+ "pad_token_id": 0,
+ "relative_attention_num_buckets": 32,
+ "tie_word_embeddings": false,
+ "tokenizer_class": "ByT5Tokenizer",
+ "transformers_version": "4.7.0.dev0",
+ "use_cache": true,
+ "vocab_size": 384
+}
diff --git a/text_encoder/byt5-small/flax_model.msgpack b/text_encoder/byt5-small/flax_model.msgpack
new file mode 100644
index 0000000000000000000000000000000000000000..533dee8f7f6b75816dcef5dca39a5c8f68980dbd
--- /dev/null
+++ b/text_encoder/byt5-small/flax_model.msgpack
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3aafee96d60e98aa18b3c7f73a2c5a2360f1f2f6df79361190a4c9e05c5ab21
+size 1198558445
diff --git a/text_encoder/byt5-small/generation_config.json b/text_encoder/byt5-small/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d52815623b46b7db1c4b957b5a83a8ad30b0146a
--- /dev/null
+++ b/text_encoder/byt5-small/generation_config.json
@@ -0,0 +1,7 @@
+{
+ "_from_model_config": true,
+ "decoder_start_token_id": 0,
+ "eos_token_id": 1,
+ "pad_token_id": 0,
+ "transformers_version": "4.27.0.dev0"
+}
diff --git a/text_encoder/byt5-small/pytorch_model.bin b/text_encoder/byt5-small/pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b4e8f9c10bce6acfecc6caa898dac3a055c70624
--- /dev/null
+++ b/text_encoder/byt5-small/pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c5aaf56299d6f2d4eaadad550a40765198828ead4d74f0a15f91cbe0961931a
+size 1198627927
diff --git a/text_encoder/byt5-small/special_tokens_map.json b/text_encoder/byt5-small/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..ccd82cceace616a60eb54886cf3a051c53ec7e74
--- /dev/null
+++ b/text_encoder/byt5-small/special_tokens_map.json
@@ -0,0 +1 @@
+{"eos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "additional_special_tokens}
\ No newline at end of file
diff --git a/text_encoder/byt5-small/tf_model.h5 b/text_encoder/byt5-small/tf_model.h5
new file mode 100644
index 0000000000000000000000000000000000000000..36bd08f7ce19d73c13959b09c0e6e039a7ec1f4f
--- /dev/null
+++ b/text_encoder/byt5-small/tf_model.h5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f97320dd5eb49cb2323a21d584cef7c1cfc9a0976efa978fcef438676b952bc2
+size 1198900664
diff --git a/text_encoder/byt5-small/tokenizer_config.json b/text_encoder/byt5-small/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..512088e166d430a4d25c7b9c6d498a494a16d5b3
--- /dev/null
+++ b/text_encoder/byt5-small/tokenizer_config.json
@@ -0,0 +1 @@
+{"eos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "unk_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "extra_ids": 125, "additional_special_tokens": ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "