| | from transformers import Trainer, AutoModelForSequenceClassification, AutoTokenizer |
| | from datasets import load_dataset, load_metric |
| | import json |
| |
|
| | |
| | with open('../config/config.json') as f: |
| | config = json.load(f) |
| |
|
| | |
| | model = AutoModelForSequenceClassification.from_pretrained('../model') |
| | tokenizer = AutoTokenizer.from_pretrained(config['model_name']) |
| |
|
| | |
| | dataset = load_dataset('csv', data_files={'test': '../data/test.csv'}) |
| | tokenized_datasets = dataset.map(lambda x: tokenizer(x['text'], padding="max_length", truncation=True), batched=True) |
| |
|
| | |
| | metric = load_metric("accuracy") |
| |
|
| | def compute_metrics(eval_pred): |
| | logits, labels = eval_pred |
| | predictions = logits.argmax(axis=-1) |
| | return metric.compute(predictions=predictions, references=labels) |
| |
|
| | trainer = Trainer( |
| | model=model, |
| | tokenizer=tokenizer, |
| | compute_metrics=compute_metrics |
| | ) |
| |
|
| | results = trainer.evaluate(tokenized_datasets['test']) |
| | print(results) |
| |
|