Commit
·
4d9e29f
1
Parent(s):
c85d79c
Update download comments
Browse files- SentimentClassification.ipynb +12 -0
- download_data.sh +4 -2
SentimentClassification.ipynb
CHANGED
|
@@ -55,9 +55,12 @@
|
|
| 55 |
],
|
| 56 |
"source": [
|
| 57 |
"# Download the datasets\n",
|
|
|
|
|
|
|
| 58 |
"if not os.path.isfile(\"local_datasets/twitter-airline-sentiment/Tweets.csv\"):\n",
|
| 59 |
" raise ValueError(\"Please launch the `download_data.sh` script to get datasets\")\n",
|
| 60 |
"\n",
|
|
|
|
| 61 |
"train = pd.read_csv(\"local_datasets/twitter-airline-sentiment/Tweets.csv\", index_col=0)\n",
|
| 62 |
"text_X = train[\"text\"]\n",
|
| 63 |
"y = train[\"airline_sentiment\"]\n",
|
|
@@ -981,6 +984,15 @@
|
|
| 981 |
"metadata": {
|
| 982 |
"execution": {
|
| 983 |
"timeout": 10800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 984 |
}
|
| 985 |
},
|
| 986 |
"nbformat": 4,
|
|
|
|
| 55 |
],
|
| 56 |
"source": [
|
| 57 |
"# Download the datasets\n",
|
| 58 |
+
"# The dataset can be downloaded through the `download_data.sh` script, which requires to set up\n",
|
| 59 |
+
"# Kaggle's CLI, or manually at https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment\n",
|
| 60 |
"if not os.path.isfile(\"local_datasets/twitter-airline-sentiment/Tweets.csv\"):\n",
|
| 61 |
" raise ValueError(\"Please launch the `download_data.sh` script to get datasets\")\n",
|
| 62 |
"\n",
|
| 63 |
+
"\n",
|
| 64 |
"train = pd.read_csv(\"local_datasets/twitter-airline-sentiment/Tweets.csv\", index_col=0)\n",
|
| 65 |
"text_X = train[\"text\"]\n",
|
| 66 |
"y = train[\"airline_sentiment\"]\n",
|
|
|
|
| 984 |
"metadata": {
|
| 985 |
"execution": {
|
| 986 |
"timeout": 10800
|
| 987 |
+
},
|
| 988 |
+
"kernelspec": {
|
| 989 |
+
"display_name": ".venv",
|
| 990 |
+
"language": "python",
|
| 991 |
+
"name": "python3"
|
| 992 |
+
},
|
| 993 |
+
"language_info": {
|
| 994 |
+
"name": "python",
|
| 995 |
+
"version": "3.10.11"
|
| 996 |
}
|
| 997 |
},
|
| 998 |
"nbformat": 4,
|
download_data.sh
CHANGED
|
@@ -2,8 +2,10 @@
|
|
| 2 |
|
| 3 |
set -e
|
| 4 |
|
| 5 |
-
# You need to have a valid ~/.kaggle/kaggle.json, that you can
|
| 6 |
-
# on your account page in kaggle.com
|
|
|
|
|
|
|
| 7 |
rm -rf local_datasets
|
| 8 |
mkdir local_datasets
|
| 9 |
cd local_datasets
|
|
|
|
| 2 |
|
| 3 |
set -e
|
| 4 |
|
| 5 |
+
# You need to install kaggle using pip and then have a valid ~/.kaggle/kaggle.json, that you can
|
| 6 |
+
# generate from "Create new API token" on your account page in kaggle.com
|
| 7 |
+
# Alternatively, the dataset can be downloaded manually at
|
| 8 |
+
# https://www.kaggle.com/datasets/crowdflower/twitter-airline-sentiment
|
| 9 |
rm -rf local_datasets
|
| 10 |
mkdir local_datasets
|
| 11 |
cd local_datasets
|