Spaces:

MarkProMaster229
/

ClassificationSmall

Sleeping

App Files Files Community

MarkProMaster229 commited on Jan 17

Commit

f40edc1

verified ·

1 Parent(s): 8e21f8e

Create app.py

Browse files

Files changed (1) hide show

app.py +101 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import gradio as gr
+import torch
+import torch.nn as nn
+from transformers import PreTrainedTokenizerFast
+from huggingface_hub import hf_hub_download
+repo_id = "MarkProMaster229/ClassificationSmall"
+weights_path = hf_hub_download(repo_id=repo_id, filename="model_weights.pth")
+tokenizer_path = hf_hub_download(repo_id=repo_id, filename="tokenizer.json")
+vocab_path = hf_hub_download(repo_id=repo_id, filename="vocab.txt")
+class TransformerBlock(nn.Module):
+    def __init__(self, sizeVector=256, numHeads=8, dropout=0.5):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(sizeVector)
+        self.attn = nn.MultiheadAttention(sizeVector, numHeads, batch_first=True)
+        self.dropout_attn = nn.Dropout(dropout)
+        self.ln2 = nn.LayerNorm(sizeVector)
+        self.ff = nn.Sequential(
+            nn.Linear(sizeVector, sizeVector*4),
+            nn.GELU(),
+            nn.Linear(sizeVector*4, sizeVector),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x, attention_mask=None):
+        key_padding_mask = ~attention_mask.bool() if attention_mask is not None else None
+        h = self.ln1(x)
+        attn_out, _ = self.attn(h, h, h, key_padding_mask=key_padding_mask)
+        x = x + self.dropout_attn(attn_out)
+        x = x + self.ff(self.ln2(x))
+        return x
+class TransformerRun(nn.Module):
+    def __init__(self, vocabSize=120000, maxLen=100, sizeVector=256, numBlocks=4, numHeads=8, numClasses=3, dropout=0.5):
+        super().__init__()
+        self.token_emb = nn.Embedding(vocabSize, sizeVector)
+        self.pos_emb = nn.Embedding(maxLen, sizeVector)
+        self.layers = nn.ModuleList([
+            TransformerBlock(sizeVector=sizeVector, numHeads=numHeads, dropout=dropout)
+            for _ in range(numBlocks)
+        ])
+        self.dropout = nn.Dropout(dropout)
+        self.ln = nn.LayerNorm(sizeVector*2)
+        self.classifier = nn.Linear(sizeVector*2, numClasses)
+    def forward(self, x, attention_mask=None):
+        B, T = x.shape
+        tok = self.token_emb(x)
+        pos = self.pos_emb(torch.arange(T, device=x.device).unsqueeze(0).expand(B, T))
+        h = tok + pos
+        for layer in self.layers:
+            h = layer(h, attention_mask)
+        cls_token = h[:,0,:]
+        mean_pool = h.mean(dim=1)
+        combined = torch.cat([cls_token, mean_pool], dim=1)
+        combined = self.ln(self.dropout(combined))
+        logits = self.classifier(combined)
+        return logits
+config_dict = {
+    'vocabSize': 119547,
+    'maxLong': 100,
+    'sizeVector': 256,
+    'numLayers': 4,
+    'numHeads': 8,
+    'numClasses': 3
+}
+model = TransformerRun(
+    vocabSize=config_dict['vocabSize'],
+    maxLen=config_dict['maxLong'],
+    sizeVector=config_dict['sizeVector'],
+    numBlocks=config_dict['numLayers'],
+    numHeads=config_dict['numHeads'],
+    numClasses=config_dict['numClasses'],
+    dropout=0.1
+)
+state_dict = torch.load(weights_path, map_location="cpu")
+model.load_state_dict(state_dict)
+model.eval()
+tokenizer = PreTrainedTokenizerFast(tokenizer_file=tokenizer_path, vocab_file=vocab_path)
+label_map = {0:"positive", 1:"negative", 2:"neutral"}
+def classify(text):
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=config_dict['maxLong'])
+    with torch.no_grad():
+        logits = model(inputs['input_ids'])
+        pred_idx = torch.argmax(logits, dim=1).item()
+        return label_map[pred_idx]
+demo = gr.Interface(
+    fn=classify,
+    inputs=gr.Textbox(lines=2, placeholder="Введите текст..."),
+    outputs="text",
+    title="Text Sentiment Classifier",
+    description="Простая модель классификации текста"
+)
+demo.launch()