Spaces:
Runtime error
Runtime error
| import sys | |
| import time | |
| import pandas as pd | |
| import requests | |
| from datasets import load_dataset | |
| import argilla as rg | |
| from argilla.labeling.text_classification import Rule, add_rules | |
| def load_datasets(): | |
| # This is the code that you want to execute when the endpoint is available | |
| print("Argilla is available! Loading datasets") | |
| api_key = sys.argv[-1] | |
| rg.init(api_key=api_key, workspace="team") | |
| # load dataset from json | |
| my_dataframe = pd.read_json( | |
| "https://raw.githubusercontent.com/recognai/datasets/main/sst-sentimentclassification.json" | |
| ) | |
| # convert pandas dataframe to DatasetForTextClassification | |
| dataset_rg = rg.DatasetForTextClassification.from_pandas(my_dataframe) | |
| # Define labeling schema to avoid UI user modification | |
| settings = rg.TextClassificationSettings(label_schema={"POSITIVE", "NEGATIVE"}) | |
| rg.configure_dataset(name="sst-sentiment-explainability", settings=settings) | |
| # log the dataset | |
| rg.log( | |
| dataset_rg, | |
| name="sst-sentiment-explainability", | |
| tags={ | |
| "description": "The sst2 sentiment dataset with predictions from a pretrained pipeline and explanations " | |
| "from Transformers Interpret. " | |
| }, | |
| ) | |
| dataset = load_dataset("argilla/news-summary", split="train").select(range(100)) | |
| dataset_rg = rg.read_datasets(dataset, task="Text2Text") | |
| # log the dataset | |
| rg.log( | |
| dataset_rg, | |
| name="news-text-summarization", | |
| tags={ | |
| "description": "A text summarization dataset with news pieces and their predicted summaries." | |
| }, | |
| ) | |
| # Read dataset from Hub | |
| dataset_rg = rg.read_datasets( | |
| load_dataset("argilla/agnews_weak_labeling", split="train"), | |
| task="TextClassification", | |
| ) | |
| # Define labeling schema to avoid UI user modification | |
| settings = rg.TextClassificationSettings( | |
| label_schema={"World", "Sports", "Sci/Tech", "Business"} | |
| ) | |
| rg.configure_dataset(name="news-programmatic-labeling", settings=settings) | |
| # log the dataset | |
| rg.log( | |
| dataset_rg, | |
| name="news-programmatic-labeling", | |
| tags={ | |
| "description": "The AG News with programmatic labeling rules (see weak labeling mode in the UI)." | |
| }, | |
| ) | |
| # define queries and patterns for each category (using ES DSL) | |
| queries = [ | |
| (["money", "financ*", "dollar*"], "Business"), | |
| (["war", "gov*", "minister*", "conflict"], "World"), | |
| (["*ball", "sport*", "game", "play*"], "Sports"), | |
| (["sci*", "techno*", "computer*", "software", "web"], "Sci/Tech"), | |
| ] | |
| # define rules | |
| rules = [ | |
| Rule(query=term, label=label) for terms, label in queries for term in terms | |
| ] | |
| # add rules to the dataset | |
| add_rules(dataset="news-programmatic-labeling", rules=rules) | |
| # load dataset from the hub | |
| dataset = load_dataset("argilla/gutenberg_spacy-ner", split="train") | |
| # read in dataset, assuming it's a dataset for token classification | |
| dataset_rg = rg.read_datasets(dataset, task="TokenClassification") | |
| # Define labeling schema to avoid UI user modification | |
| labels = { | |
| "CARDINAL", | |
| "DATE", | |
| "EVENT", | |
| "FAC", | |
| "GPE", | |
| "LANGUAGE", | |
| "LAW", | |
| "LOC", | |
| "MONEY", | |
| "NORP", | |
| "ORDINAL", | |
| "ORG", | |
| "PERCENT", | |
| "PERSON", | |
| "PRODUCT", | |
| "QUANTITY", | |
| "TIME", | |
| "WORK_OF_ART", | |
| } | |
| settings = rg.TokenClassificationSettings(label_schema=labels) | |
| rg.configure_dataset(name="gutenberg_spacy-ner-monitoring", settings=settings) | |
| # log the dataset | |
| rg.log( | |
| dataset_rg, | |
| "gutenberg_spacy-ner-monitoring", | |
| tags={ | |
| "description": "A dataset containing text from books with predictions from two spaCy NER pre-trained " | |
| "models. " | |
| }, | |
| ) | |
| if __name__ == "__main__": | |
| while True: | |
| try: | |
| response = requests.get("http://0.0.0.0:6900/") | |
| if response.status_code == 200: | |
| load_datasets() | |
| break | |
| except requests.exceptions.ConnectionError: | |
| pass | |
| except Exception as e: | |
| print(e) | |
| time.sleep(10) | |
| pass | |
| time.sleep(5) | |