Spaces:
Sleeping
Sleeping
| from typing import ( | |
| Dict, | |
| List, | |
| Tuple, | |
| ) | |
| import torch | |
| from transformers import ( | |
| AutoModelForSequenceClassification, | |
| AutoTokenizer, | |
| ) | |
| from src.application.config import AI_TEXT_CLASSIFICATION_MODEL | |
| def load_model_and_tokenizer( | |
| model_path: str = AI_TEXT_CLASSIFICATION_MODEL, | |
| ) -> Tuple[AutoTokenizer, AutoModelForSequenceClassification]: | |
| """ | |
| Loads the trained model and tokenizer from the specified path. | |
| Args: | |
| model_path: path of directory containing the saved model and tokenizer. | |
| Returns: | |
| A tuple containing the loaded tokenizer and model. | |
| """ | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
| model.eval() | |
| return tokenizer, model | |
| def predict( | |
| texts: List[str], | |
| model: AutoModelForSequenceClassification, | |
| tokenizer: AutoTokenizer, | |
| ) -> List[Dict[str, str]]: | |
| """ | |
| Classify on input texts into gpt-4o or gpt-4o-mini. | |
| Args: | |
| texts: A list of input text strings to be classified. | |
| model: The loaded model for sequence classification. | |
| tokenizer: The loaded tokenizer. | |
| Returns: | |
| A list of dictionaries, where each dictionary contains the input text, | |
| the predicted label, and the confidence score. | |
| """ | |
| label_map = {0: "GPT-4o", 1: "GPT-4o mini"} | |
| inputs = tokenizer( | |
| texts, | |
| padding="max_length", | |
| truncation=True, | |
| return_tensors="pt", | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| confidence, predictions = torch.max(probabilities, dim=-1) | |
| results = [] | |
| for text, pred, conf in zip( | |
| texts, | |
| predictions.tolist(), | |
| confidence.tolist(), | |
| ): | |
| results.append( | |
| {"input": text, "prediction": label_map[pred], "confidence": conf}, | |
| ) | |
| return results | |
| if __name__ == "__main__": | |
| text = """The resignation brings a long political chapter to an end. | |
| Trudeau has been in office since 2015, when he brought the Liberals back | |
| to power from the political wilderness. | |
| """ | |
| tokenizer, model = load_model_and_tokenizer("ductuan024/gpts-detector") | |
| predictions = predict(text, model, tokenizer) | |
| print(predictions[0]["prediction"]) | |
| print(predictions[0]["confidence"]) | |