Spaces:
Build error
Build error
| import pandas as pd | |
| def load_stopwords(file_path): | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| stopwords = f.read().splitlines() # Her satır bir stopword olacak şekilde yükle | |
| return set(stopwords) | |
| stop_words = load_stopwords('stopwords.txt') | |
| df = pd.read_csv('veriler_cleaned.csv') | |
| def remove_stopwords_without_nltk(text): | |
| if isinstance(text, str): | |
| words = text.split() | |
| filtered_words = [word for word in words if word.lower() not in stop_words] | |
| return ' '.join(filtered_words) | |
| else: | |
| return "" | |
| df['stopwords_text'] = df['cleaned_text'].apply(remove_stopwords_without_nltk) | |
| print(df[['cleaned_text', 'stopwords_text']].head()) | |
| df.to_csv('temizlenmis_veri.csv', index=False) | |
| """ | |
| import pandas as pd | |
| import nltk | |
| from nltk.tokenize import word_tokenize | |
| nltk.download('stopwords') | |
| nltk.download('punkt') | |
| from nltk.corpus import stopwords | |
| stop_words = set(stopwords.words('turkish')) | |
| def load_custom_stopwords(file_path): | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| custom_stopwords = f.read().splitlines() | |
| return set(custom_stopwords) | |
| custom_stopwords = load_custom_stopwords('stopwords.txt') | |
| stop_words.update(custom_stopwords) | |
| df = pd.read_csv('veriler_cleaned.csv') | |
| def remove_stopwords(text): | |
| if isinstance(text, str): | |
| words = word_tokenize(text) | |
| filtered_words = [word for word in words if word.lower() not in stop_words] # Stopwords'leri çıkar | |
| return ' '.join(filtered_words) | |
| else: | |
| return "" | |
| df['stopwords_text'] = df['cleaned_text'].apply(remove_stopwords) | |
| print(df[['cleaned_text', 'stopwords_text']].head()) | |
| df.to_csv('temizlenmis_veri.csv', index=False) | |
| """ |