junkim100 commited on
Commit
b6a87fb
·
verified ·
1 Parent(s): dba1050

Fix: Add position_ids to CLIP encoder and config.json to match Volkopat/DeepSeek-DeepEncoder exactly

Browse files
Files changed (2) hide show
  1. clip_encoder.pth +2 -2
  2. config.json +29 -0
clip_encoder.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d3f97c24bd69378a5f5a657ad81223134025ebf52f784eb32042d4d2b57404f
3
- size 606449932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d9fb61233775d3f81587378737cc307265356fe9ddb677aa67c9e418b09872c
3
+ size 606452303
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sam": {
3
+ "params": 95569152,
4
+ "architecture": "SAM ViT-B",
5
+ "image_size": 1024,
6
+ "patch_size": 16,
7
+ "embed_dim": 768,
8
+ "depth": 12,
9
+ "num_heads": 12
10
+ },
11
+ "clip": {
12
+ "params": 303177728,
13
+ "architecture": "CLIP-Large",
14
+ "image_size": 224,
15
+ "patch_size": 14,
16
+ "width": 1024,
17
+ "layers": 24,
18
+ "heads": 16
19
+ },
20
+ "projector": {
21
+ "params": 2622720,
22
+ "type": "linear",
23
+ "input_dim": 2048,
24
+ "output_dim": 1280
25
+ },
26
+ "total_params": 401369600,
27
+ "output_tokens": 256,
28
+ "output_dim": 1280
29
+ }