Spaces:
Sleeping
Sleeping
| # app.py | |
| import random | |
| import csv | |
| import gradio as gr | |
| import pandas as pd | |
| import tempfile | |
| # Word lists | |
| nouns = [ | |
| "dog", "cat", "child", "teacher", "artist", "bird", "river", "mountain", | |
| "book", "city", "car", "tree", "flower", "student", "computer", "phone", | |
| "house", "garden", "song", "idea", "scientist", "engineer", "doctor", | |
| "chef", "musician", "athlete", "writer", "poet", "farmer", "pilot" | |
| ] | |
| adjectives = [ | |
| "quick", "lazy", "beautiful", "tall", "short", "happy", "sad", "bright", | |
| "dark", "colorful", "quiet", "loud", "new", "old", "young", "ancient", | |
| "modern", "cold", "warm", "soft", "hard", "heavy", "light", "calm", | |
| "stormy", "fresh", "strong", "weak", "brave" | |
| ] | |
| verbs = [ | |
| "run", "jump", "paint", "read", "fly", "write", "sing", "build", | |
| "create", "discover", "learn", "teach", "drive", "grow", "think", | |
| "talk", "listen", "play", "see", "walk", "swim", "code", "design", | |
| "cook", "dance", "draw", "study", "explore", "invent", "research" | |
| ] | |
| adverbs = [ | |
| "quickly", "slowly", "gracefully", "happily", "sadly", "quietly", "loudly", | |
| "brightly", "softly", "carefully", "eagerly", "angrily", "easily", "hardly", | |
| "rarely", "often", "never", "always", "sometimes", "soon", "daily", "patiently", | |
| "politely", "proudly", "silently", "warmly", "well", "badly", "closely", "deeply" | |
| ] | |
| prepositions = [ | |
| "in", "on", "over", "under", "beside", "with", "without", "near", | |
| "between", "through", "against", "among", "around", "before", "after", | |
| "inside", "outside", "above", "below", "across", "behind", "beyond", | |
| "during", "for", "from", "into", "like", "off", "toward" | |
| ] | |
| articles = ["the", "a", "an"] | |
| conjunctions = ["and", "but", "so", "because", "when", "while", "although", "if", "unless", "since"] | |
| # Semantic rules: mapping nouns to appropriate verbs | |
| noun_verb_map = { | |
| "dog": ["run", "jump", "bark", "play", "walk"], | |
| "cat": ["meow", "sleep", "jump", "play", "purr"], | |
| "child": ["play", "learn", "read", "laugh", "grow"], | |
| "teacher": ["teach", "explain", "guide", "help", "learn"], | |
| "artist": ["paint", "draw", "create", "design", "imagine"], | |
| "bird": ["fly", "sing", "chirp", "nest", "soar"], | |
| "river": ["flow", "run", "wind", "bend", "swell"], | |
| "mountain": ["stand", "tower", "rise", "loom", "shadow"], | |
| "book": ["tell", "describe", "illustrate", "explain", "reveal"], | |
| "city": ["grow", "expand", "develop", "bustle", "sleep"], | |
| "car": ["drive", "speed", "stop", "park", "honk"], | |
| "tree": ["grow", "sway", "stand", "shed", "bloom"], | |
| "flower": ["bloom", "grow", "wilt", "open", "close"], | |
| "student": ["study", "learn", "read", "write", "graduate"], | |
| "computer": ["compute", "process", "run", "crash", "boot"], | |
| "phone": ["ring", "vibrate", "charge", "die", "connect"], | |
| "house": ["stand", "shelter", "protect", "age", "burn"], | |
| "garden": ["grow", "bloom", "flourish", "wilt", "produce"], | |
| "song": ["play", "sound", "echo", "resonate", "end"], | |
| "idea": ["form", "grow", "develop", "emerge", "inspire"], | |
| "scientist": ["research", "discover", "experiment", "study", "invent"], | |
| "engineer": ["design", "build", "develop", "test", "solve"], | |
| "doctor": ["heal", "diagnose", "treat", "operate", "care"], | |
| "chef": ["cook", "prepare", "taste", "create", "serve"], | |
| "musician": ["play", "compose", "perform", "sing", "record"], | |
| "athlete": ["run", "train", "compete", "win", "lose"], | |
| "writer": ["write", "create", "imagine", "edit", "publish"], | |
| "poet": ["write", "compose", "imagine", "express", "rhyme"], | |
| "farmer": ["grow", "plant", "harvest", "plow", "raise"], | |
| "pilot": ["fly", "navigate", "land", "take off", "command"] | |
| } | |
| # Sentence templates | |
| templates = [ | |
| "{Article} {adjective} {noun} {adverb} {verb}s {preposition} {article} {adjective} {noun2}.", | |
| "{Article} {noun} {verb}s {preposition} {article} {noun2} {conjunction} {verb2}s {adverb}.", | |
| "{Noun_plural} {adverb} {verb} {preposition} {noun2}.", | |
| "{Noun} {verb}s {preposition} {article} {noun2} {conjunction} {article} {noun} {verb2}s.", | |
| "{Article} {adjective} {noun} {verb}s {preposition} {noun2} {conjunction} {adverb} {verb2}s.", | |
| "{Noun} {verb}s {article} {noun2} {preposition} {noun}." | |
| ] | |
| def generate_sentence(): | |
| template = random.choice(templates) | |
| noun = random.choice(nouns) | |
| # Get appropriate verbs for noun | |
| verbs_for_noun = noun_verb_map.get(noun, verbs) | |
| verb = random.choice(verbs_for_noun) | |
| noun2 = random.choice(nouns) | |
| # Ensure noun2 is different from noun | |
| while noun2 == noun: | |
| noun2 = random.choice(nouns) | |
| # Get appropriate verbs for noun2 | |
| verbs_for_noun2 = noun_verb_map.get(noun2, verbs) | |
| verb2 = random.choice(verbs_for_noun2) | |
| sentence = template.format( | |
| Article=random.choice(articles).capitalize(), | |
| article=random.choice(articles), | |
| adjective=random.choice(adjectives), | |
| noun=noun, | |
| noun2=noun2, | |
| Noun=noun.capitalize(), | |
| Noun_plural=noun.capitalize() + "s", | |
| verb=verb, | |
| verb2=verb2, | |
| adverb=random.choice(adverbs), | |
| preposition=random.choice(prepositions), | |
| conjunction=random.choice(conjunctions) | |
| ) | |
| # Capitalize the first letter and ensure proper punctuation | |
| sentence = sentence.capitalize() | |
| if not sentence.endswith('.'): | |
| sentence += '.' | |
| return sentence | |
| def generate_sentences(num_sentences): | |
| sentences = [generate_sentence() for _ in range(int(num_sentences))] | |
| df = pd.DataFrame(sentences, columns=["sentence"]) | |
| # Save to a temporary CSV file | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") | |
| df.to_csv(temp_file.name, index=False) | |
| return temp_file.name | |
| def generate_and_download(num_sentences): | |
| csv_file = generate_sentences(num_sentences) | |
| return csv_file | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # Sentence Dataset Generator with Semantic Rules | |
| Enter the number of sentences you want to generate, and download a CSV file containing the sentences. | |
| This generator uses semantic rules to create more coherent and meaningful sentences. | |
| """ | |
| ) | |
| num_sentences = gr.Number(label="Number of Sentences", value=1000, precision=0) | |
| output = gr.File(label="Download CSV") | |
| generate_button = gr.Button("Generate Sentences") | |
| generate_button.click( | |
| fn=generate_and_download, | |
| inputs=num_sentences, | |
| outputs=output | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |