Spaces:
Build error
Build error
| from dataPipeline import DataPipeline | |
| from my_tokenize import Database | |
| from yeni_tokenize import TokenizerProcessor | |
| from transformers import BertTokenizer | |
| # Tokenizer'ı başlat | |
| tokenizer_name = 'bert-base-cased' | |
| pipeline = DataPipeline(tokenizer_name=tokenizer_name, max_length=100) | |
| # MongoDB'den input metinlerini çek | |
| input_texts = [doc["Prompt"] for doc in Database.get_input_texts()] | |
| # Metinleri tokenize et | |
| tokenized_texts = pipeline.tokenize_texts(input_texts) | |
| print("Tokenized Texts:") | |
| for text, tokens in zip(input_texts, tokenized_texts): | |
| print(f"Original Text: {text}") | |
| print(f"Tokenized Text: {tokens}") | |
| # Metinleri encode et | |
| encoded_texts = pipeline.encode_texts(input_texts) | |
| print("Encoded Texts:") | |
| for text, encoded in zip(input_texts, encoded_texts): | |
| print(f"Original Text: {text}") | |
| print(f"Encoded Text: {encoded['input_ids'].squeeze().tolist()}") | |