Spaces:
Sleeping
Sleeping
initial commit
Browse files- .gitattributes +3 -0
- anger.png +0 -0
- app.py +15 -0
- bratt.jpg +0 -0
- celebrity_custom_model +3 -0
- celebrity_resnet_model +3 -0
- emotion_resnet_model +3 -0
- joha.jpg +0 -0
- model.py +96 -0
- predict.py +46 -0
- utils.py +22 -0
.gitattributes
CHANGED
|
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
celebrity_custom_model filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
celebrity_resnet_model filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
emotion_resnet_model filter=lfs diff=lfs merge=lfs -text
|
anger.png
ADDED
|
app.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from predict import predict_one_image
|
| 3 |
+
|
| 4 |
+
input_image = [
|
| 5 |
+
gr.components.Image(type='filepath',label='Input Image')
|
| 6 |
+
]
|
| 7 |
+
examples = ['joha.jpg','anger.png','bratt.jpg']
|
| 8 |
+
gr.Interface(
|
| 9 |
+
fn=predict_one_image,
|
| 10 |
+
inputs=input_image,
|
| 11 |
+
outputs='text',
|
| 12 |
+
title="CELEBRITY & EMOTION RECOGNITION APP",
|
| 13 |
+
examples=examples,
|
| 14 |
+
cache_examples=False,
|
| 15 |
+
).launch()
|
bratt.jpg
ADDED
|
celebrity_custom_model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:064745679f779aeb31206e7b7190077367c89dfa97145bc452ab9fb1aafbc1b9
|
| 3 |
+
size 47943319
|
celebrity_resnet_model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56b67d6619d01cc4fcf6b5639ae59003c3cb878c0e7acaa5a7d95791b07bd154
|
| 3 |
+
size 103103793
|
emotion_resnet_model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18f37453beaccf41597f85730b803d009d3b109c02d1ccd2144906b01c030e4e
|
| 3 |
+
size 103103793
|
joha.jpg
ADDED
|
model.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
|
| 4 |
+
N_EMOTIONS = 8
|
| 5 |
+
N_CELEBRITIES = 17
|
| 6 |
+
class CustomModel(nn.Module) :
|
| 7 |
+
def __init__(self,mode = 'emotion') :
|
| 8 |
+
super().__init__()
|
| 9 |
+
self.mode = mode
|
| 10 |
+
|
| 11 |
+
self.backbone = nn.Sequential(
|
| 12 |
+
#3x224x224
|
| 13 |
+
nn.Conv2d(3, 64, kernel_size=3, stride=1, bias=False),
|
| 14 |
+
nn.BatchNorm2d(64),
|
| 15 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 16 |
+
# out: 64 x 222 x 222
|
| 17 |
+
|
| 18 |
+
nn.Conv2d(64, 32, kernel_size=3, stride=1, bias=False),
|
| 19 |
+
nn.BatchNorm2d(32),
|
| 20 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 21 |
+
nn.MaxPool2d(kernel_size=2),
|
| 22 |
+
nn.Dropout(0.2),
|
| 23 |
+
# out: 32 x 110 x 110
|
| 24 |
+
|
| 25 |
+
nn.Conv2d(32, 32, kernel_size=3, stride=1, bias=False),
|
| 26 |
+
nn.BatchNorm2d(32),
|
| 27 |
+
nn.LeakyReLU(0.2, inplace=True),
|
| 28 |
+
nn.MaxPool2d(kernel_size=2),
|
| 29 |
+
nn.Dropout(0.3),
|
| 30 |
+
# out: 32 x 54 x 54
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
nn.Flatten(),
|
| 36 |
+
)
|
| 37 |
+
self.in_features = 32*54*54
|
| 38 |
+
self.neck = nn.Sequential(
|
| 39 |
+
nn.Linear(self.in_features,128),
|
| 40 |
+
nn.ReLU(),
|
| 41 |
+
nn.Linear(128,64),
|
| 42 |
+
nn.ReLU()
|
| 43 |
+
)
|
| 44 |
+
self.emotion_classifier = nn.Linear(64,N_EMOTIONS)
|
| 45 |
+
self.celebrity_classifier = nn.Linear(64,N_CELEBRITIES)
|
| 46 |
+
|
| 47 |
+
def forward(self,image) :
|
| 48 |
+
features = self.backbone(image)
|
| 49 |
+
features = self.neck(features)
|
| 50 |
+
if self.mode=='emotion' :
|
| 51 |
+
emotion_logits = self.emotion_classifier(features)
|
| 52 |
+
return emotion_logits
|
| 53 |
+
elif self.mode=='celebrity' :
|
| 54 |
+
celebrity_logits = self.celebrity_classifier(features)
|
| 55 |
+
return celebrity_logits
|
| 56 |
+
else :
|
| 57 |
+
emotion_logits = self.emotion_classifier(features)
|
| 58 |
+
celebrity_logits = self.celebrity_classifier(features)
|
| 59 |
+
return emotion_logits,celebrity_logits
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
import torchvision.models as models
|
| 64 |
+
class ResNet50Model(nn.Module) :
|
| 65 |
+
def __init__(self,mode = 'emotion') :
|
| 66 |
+
super().__init__()
|
| 67 |
+
self.mode = mode
|
| 68 |
+
|
| 69 |
+
self.backbone = getattr(models, 'resnet50')(False)
|
| 70 |
+
self.in_features = 1000
|
| 71 |
+
self.neck = nn.Sequential(
|
| 72 |
+
nn.Linear(self.in_features,128),
|
| 73 |
+
nn.ReLU(),
|
| 74 |
+
nn.Linear(128,64),
|
| 75 |
+
nn.ReLU()
|
| 76 |
+
)
|
| 77 |
+
self.emotion_classifier = nn.Linear(64,N_EMOTIONS)
|
| 78 |
+
self.celebrity_classifier = nn.Linear(64,N_CELEBRITIES)
|
| 79 |
+
|
| 80 |
+
def forward(self,image) :
|
| 81 |
+
features = self.backbone(image)
|
| 82 |
+
features = self.neck(features)
|
| 83 |
+
if self.mode=='emotion' :
|
| 84 |
+
emotion_logits = self.emotion_classifier(features)
|
| 85 |
+
return emotion_logits
|
| 86 |
+
elif self.mode=='celebrity' :
|
| 87 |
+
celebrity_logits = self.celebrity_classifier(features)
|
| 88 |
+
return celebrity_logits
|
| 89 |
+
else :
|
| 90 |
+
emotion_logits = self.emotion_classifier(features)
|
| 91 |
+
celebrity_logits = self.celebrity_classifier(features)
|
| 92 |
+
return emotion_logits,celebrity_logits
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
predict.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
from utils import read_image,get_valid_augs
|
| 3 |
+
import torch
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
from model import ResNet50Model
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
CKPT_EMOTION = 'emotion_resnet_model'
|
| 9 |
+
CKPT_CELEBRITY = 'celebrity_resnet_model'
|
| 10 |
+
FaceInverseTargetMapper = {0: 'Tom Hanks',1: 'Sandra Bullock',2: 'Natalie Portman',3: 'Scarlett Johansson',4: 'Robert Downey Jr',5: 'Nicole Kidman',6: 'Brad Pitt',
|
| 11 |
+
7: 'Hugh Jackman',8: 'Tom Cruise',9: 'Leonardo DiCaprio',10: 'Megan Fox',11: 'Johnny Depp',12: 'Will Smith',13: 'Denzel Washington',14: 'Jennifer Lawrence',15: 'Kate Winslet',16: 'Angelina Jolie'}
|
| 12 |
+
EmotionMapper = {0: 'sadness',1: 'contempt',2: 'happiness',3: 'surprise',
|
| 13 |
+
4: 'fear',5: 'anger',6: 'disgust',7: 'neutrality'}
|
| 14 |
+
def predict_one_image(path) :
|
| 15 |
+
image = read_image(path)
|
| 16 |
+
image = get_valid_augs()(image=image)['image']
|
| 17 |
+
image = torch.tensor(image,dtype=torch.float)
|
| 18 |
+
image = image.reshape((1,3,224,224))
|
| 19 |
+
emotion_model = ResNet50Model('emotion')
|
| 20 |
+
#loading ckpt
|
| 21 |
+
emotion_model.load_state_dict(torch.load(CKPT_EMOTION,map_location=torch.device('cpu')))
|
| 22 |
+
|
| 23 |
+
celebrity_model = ResNet50Model('celebrity')
|
| 24 |
+
#loading ckpt
|
| 25 |
+
celebrity_model.load_state_dict(torch.load(CKPT_CELEBRITY,map_location=torch.device('cpu')))
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
with torch.no_grad() :
|
| 29 |
+
#emotion
|
| 30 |
+
outputs = emotion_model(image)
|
| 31 |
+
outputs = torch.nn.functional.softmax(outputs).cpu().detach().numpy()
|
| 32 |
+
print(outputs.shape)
|
| 33 |
+
emotion = np.argmax(outputs,axis=1)[0]
|
| 34 |
+
emotion_proba = np.max(outputs,axis=1)[0]
|
| 35 |
+
print(emotion_proba)
|
| 36 |
+
#celebrity
|
| 37 |
+
outputs = celebrity_model(image)
|
| 38 |
+
outputs = torch.nn.functional.softmax(outputs).cpu().detach().numpy()
|
| 39 |
+
print(outputs.shape)
|
| 40 |
+
celebrity = np.argmax(outputs,axis=1)[0]
|
| 41 |
+
celebrity_proba = np.max(outputs,axis=1)[0]
|
| 42 |
+
print(celebrity_proba)
|
| 43 |
+
|
| 44 |
+
if celebrity_proba<0.45 :
|
| 45 |
+
return f"Unkonwn Person Detected with emotion {EmotionMapper[emotion]} "
|
| 46 |
+
return f"Detected {FaceInverseTargetMapper[celebrity]} with emotion {EmotionMapper[emotion]} "
|
utils.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import albumentations as A
|
| 3 |
+
from albumentations.pytorch import ToTensorV2
|
| 4 |
+
|
| 5 |
+
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
| 6 |
+
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
|
| 7 |
+
|
| 8 |
+
def read_image(path) :
|
| 9 |
+
img = cv2.imread(path)
|
| 10 |
+
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
|
| 11 |
+
return img
|
| 12 |
+
|
| 13 |
+
def get_valid_augs() :
|
| 14 |
+
return A.Compose([
|
| 15 |
+
A.Resize(height=224, width=224, always_apply=True, p=1),
|
| 16 |
+
A.Normalize(
|
| 17 |
+
mean = IMAGENET_DEFAULT_MEAN,
|
| 18 |
+
std = IMAGENET_DEFAULT_STD,
|
| 19 |
+
max_pixel_value=255
|
| 20 |
+
),
|
| 21 |
+
ToTensorV2(),
|
| 22 |
+
])
|