Spaces:
Runtime error
Runtime error
add
Browse files
requirements.txt
CHANGED
|
@@ -1,11 +1,12 @@
|
|
| 1 |
transformers[torch]==4.36.2
|
| 2 |
numpy==1.26.3
|
| 3 |
#scikit-learn==1.3.2
|
| 4 |
-
|
| 5 |
datasets==2.16.1
|
| 6 |
evaluate==0.4.1
|
| 7 |
accelerate==0.25.0
|
| 8 |
seqeval==1.2.2
|
| 9 |
pandas==2.1.4
|
| 10 |
gradio==4.13.0
|
| 11 |
-
pydantic_settings==2.1.0
|
|
|
|
|
|
| 1 |
transformers[torch]==4.36.2
|
| 2 |
numpy==1.26.3
|
| 3 |
#scikit-learn==1.3.2
|
| 4 |
+
matplotlib==3.8.2
|
| 5 |
datasets==2.16.1
|
| 6 |
evaluate==0.4.1
|
| 7 |
accelerate==0.25.0
|
| 8 |
seqeval==1.2.2
|
| 9 |
pandas==2.1.4
|
| 10 |
gradio==4.13.0
|
| 11 |
+
pydantic_settings==2.1.0
|
| 12 |
+
sentencepiece==0.1.99
|
source/services/predicting_effective_arguments/train/02_classification copy.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
from datasets import load_dataset
|
| 5 |
+
from transformers import AutoTokenizer
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
TARGET = 'discourse_effectiveness'
|
| 9 |
+
TEXT = "discourse_text"
|
| 10 |
+
train_df = pd.read_csv("data/raw_data/train.csv")
|
| 11 |
+
test_df = pd.read_csv("data/raw_data/test.csv")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
"""
|
| 15 |
+
train_df[TARGET].value_counts(ascending=True).plot.barh()
|
| 16 |
+
plt.title("Frequency of Classes")
|
| 17 |
+
plt.show()
|
| 18 |
+
|
| 19 |
+
train_df['discourse_type'].value_counts(ascending=True).plot.barh()
|
| 20 |
+
plt.title("Frequency of discourse_type")
|
| 21 |
+
plt.show()
|
| 22 |
+
|
| 23 |
+
train_df["Words Per text"] = train_df[TEXT].str.split().apply(len)
|
| 24 |
+
train_df.boxplot("Words Per text", by=TARGET, grid=False, showfliers=False,
|
| 25 |
+
color="black")
|
| 26 |
+
plt.suptitle("")
|
| 27 |
+
plt.xlabel("")
|
| 28 |
+
plt.show()
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
model_ckpt = "distilbert-base-uncased"
|
| 33 |
+
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
| 34 |
+
tokenizer.model_max_length
|
| 35 |
+
pass
|