Update README.md
Browse files
README.md
CHANGED
|
@@ -22,7 +22,7 @@ inference:
|
|
| 22 |
max_length: 300
|
| 23 |
---
|
| 24 |
|
| 25 |
-
# LongCap: Finetuned [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-
|
| 26 |
|
| 27 |
|
| 28 |
## Usage
|
|
@@ -41,8 +41,8 @@ import requests
|
|
| 41 |
from PIL import Image
|
| 42 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 43 |
|
| 44 |
-
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
| 45 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap")
|
| 46 |
|
| 47 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
| 48 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
@@ -51,7 +51,7 @@ inputs = processor(raw_image, return_tensors="pt")
|
|
| 51 |
pixel_values = inputs.pixel_values
|
| 52 |
out = model.generate(pixel_values=pixel_values, max_length=250)
|
| 53 |
print(processor.decode(out[0], skip_special_tokens=True))
|
| 54 |
-
>>> a
|
| 55 |
|
| 56 |
```
|
| 57 |
</details>
|
|
@@ -68,8 +68,8 @@ import requests
|
|
| 68 |
from PIL import Image
|
| 69 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 70 |
|
| 71 |
-
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
| 72 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap").to("cuda")
|
| 73 |
|
| 74 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
| 75 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
@@ -78,7 +78,7 @@ inputs = processor(raw_image, return_tensors="pt").to("cuda")
|
|
| 78 |
pixel_values = inputs.pixel_values
|
| 79 |
out = model.generate(pixel_values=pixel_values, max_length=250)
|
| 80 |
print(processor.decode(out[0], skip_special_tokens=True))
|
| 81 |
-
>>> a
|
| 82 |
```
|
| 83 |
</details>
|
| 84 |
|
|
@@ -93,8 +93,8 @@ import requests
|
|
| 93 |
from PIL import Image
|
| 94 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 95 |
|
| 96 |
-
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
| 97 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap", torch_dtype=torch.float16).to("cuda")
|
| 98 |
|
| 99 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
| 100 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
@@ -103,6 +103,6 @@ inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
|
|
| 103 |
pixel_values = inputs.pixel_values
|
| 104 |
out = model.generate(pixel_values=pixel_values, max_length=250)
|
| 105 |
print(processor.decode(out[0], skip_special_tokens=True))
|
| 106 |
-
>>> a
|
| 107 |
```
|
| 108 |
</details>
|
|
|
|
| 22 |
max_length: 300
|
| 23 |
---
|
| 24 |
|
| 25 |
+
# LongCap: Finetuned [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-large) for generating long captions of images, suitable for prompts for text-to-image generation and captioning text-to-image datasets
|
| 26 |
|
| 27 |
|
| 28 |
## Usage
|
|
|
|
| 41 |
from PIL import Image
|
| 42 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 43 |
|
| 44 |
+
processor = BlipProcessor.from_pretrained("unography/blip-large-long-cap")
|
| 45 |
+
model = BlipForConditionalGeneration.from_pretrained("unography/blip-large-long-cap")
|
| 46 |
|
| 47 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
| 48 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
|
|
| 51 |
pixel_values = inputs.pixel_values
|
| 52 |
out = model.generate(pixel_values=pixel_values, max_length=250)
|
| 53 |
print(processor.decode(out[0], skip_special_tokens=True))
|
| 54 |
+
>>> a woman sitting on the beach, wearing a checkered shirt and a dog collar. the woman is interacting with the dog, which is positioned towards the left side of the image. the setting is a beachfront with a calm sea and a golden hue.
|
| 55 |
|
| 56 |
```
|
| 57 |
</details>
|
|
|
|
| 68 |
from PIL import Image
|
| 69 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 70 |
|
| 71 |
+
processor = BlipProcessor.from_pretrained("unography/blip-large-long-cap")
|
| 72 |
+
model = BlipForConditionalGeneration.from_pretrained("unography/blip-large-long-cap").to("cuda")
|
| 73 |
|
| 74 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
| 75 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
|
|
| 78 |
pixel_values = inputs.pixel_values
|
| 79 |
out = model.generate(pixel_values=pixel_values, max_length=250)
|
| 80 |
print(processor.decode(out[0], skip_special_tokens=True))
|
| 81 |
+
>>> a woman sitting on the beach, wearing a checkered shirt and a dog collar. the woman is interacting with the dog, which is positioned towards the left side of the image. the setting is a beachfront with a calm sea and a golden hue.
|
| 82 |
```
|
| 83 |
</details>
|
| 84 |
|
|
|
|
| 93 |
from PIL import Image
|
| 94 |
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 95 |
|
| 96 |
+
processor = BlipProcessor.from_pretrained("unography/blip-large-long-cap")
|
| 97 |
+
model = BlipForConditionalGeneration.from_pretrained("unography/blip-large-long-cap", torch_dtype=torch.float16).to("cuda")
|
| 98 |
|
| 99 |
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
| 100 |
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
|
|
|
| 103 |
pixel_values = inputs.pixel_values
|
| 104 |
out = model.generate(pixel_values=pixel_values, max_length=250)
|
| 105 |
print(processor.decode(out[0], skip_special_tokens=True))
|
| 106 |
+
>>> a woman sitting on the beach, wearing a checkered shirt and a dog collar. the woman is interacting with the dog, which is positioned towards the left side of the image. the setting is a beachfront with a calm sea and a golden hue.
|
| 107 |
```
|
| 108 |
</details>
|