File size: 3,512 Bytes
c452bc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import pickle
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
from collections import deque
import gradio as gr

# Constants (fallback defaults)
SEQUENCE_LENGTH = 30
FEATURE_LENGTH = 168

# Load model & preprocessing
def load_model_and_preprocessing():
    global scaler, label_encoder, model, labels_dict
    with open('lstm_preprocessing.pickle', 'rb') as f:
        preproc = pickle.load(f)
    scaler = preproc['scaler']
    label_encoder = preproc['label_encoder']
    timesteps = preproc['timesteps']
    n_features = preproc['n_features']

    global SEQUENCE_LENGTH, FEATURE_LENGTH
    SEQUENCE_LENGTH = timesteps
    FEATURE_LENGTH = n_features

    model = tf.keras.models.load_model('lstm_model.h5')

    labels_dict = {0:'salam',1:'good morning',2:'thanks',3:'ana',4:'anta',5:'ante',6:'hua',
                   7:'hea',8:'antm',9:'hm',10:'name',11:'how r u',12:'thanks god',13:'happy',
                   14:'sad',15:'angry',16:'good',17:'bad',18:'tired',19:'sick',20:'see',21:'say',
                   22:'talk',23:'walk',24:'went',25:'came',26:'home',27:'eat',28:'slept',29:'university',
                   30:'today',31:'tmrw',32:'sunday',33:'tuesday',34:'thursday',35:'friday',36:'week',
                   37:'month',38:'year',39:'when',40:'I know',41:'thinking',42:'forgetten',43:'love',
                   44:'I want',45:'helps',46:'not allowed',47:'agree',48:'together',49:'different'}

load_model_and_preprocessing()

# MediaPipe setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=2)

def extract_hand_features(image):
    data_aux = np.zeros(FEATURE_LENGTH, dtype=np.float32)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image_rgb)
    if not results.multi_hand_landmarks:
        return None

    for hand_idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
        if hand_idx >= 2: break
        min_x = min([lm.x for lm in hand_landmarks.landmark])
        min_y = min([lm.y for lm in hand_landmarks.landmark])
        base_idx = hand_idx * 84
        for i, lm in enumerate(hand_landmarks.landmark):
            data_aux[base_idx + i * 2] = lm.x - min_x
            data_aux[base_idx + i * 2 + 1] = lm.y - min_y

    return data_aux

sequence_buffer = deque(maxlen=SEQUENCE_LENGTH)

def predict_sign(image):
    image = np.array(image)
    features = extract_hand_features(image)
    if features is None:
        return "No hand detected"
    sequence_buffer.append(features)

    if len(sequence_buffer) < SEQUENCE_LENGTH:
        return f"Waiting for sequence: {len(sequence_buffer)}/{SEQUENCE_LENGTH}"

    sequence_data = np.array(list(sequence_buffer)).reshape(1, SEQUENCE_LENGTH, FEATURE_LENGTH)
    scaled = scaler.transform(sequence_data.reshape(-1, FEATURE_LENGTH)).reshape(1, SEQUENCE_LENGTH, FEATURE_LENGTH)
    prediction_scores = model.predict(scaled, verbose=0)[0]
    predicted_idx = np.argmax(prediction_scores)
    confidence = prediction_scores[predicted_idx]
    label = labels_dict.get(predicted_idx, "Unknown")

    return f"{label} ({confidence:.2f})"

# Gradio interface
interface = gr.Interface(fn=predict_sign,
                         inputs=gr.Image(source="webcam", tool="editor", type="numpy"),
                         outputs="text",
                         title="Sign Language Recognition",
                         description="Perform a sign and capture it. Wait for prediction...")

# ✅ LAUNCH THE APP
interface.launch()