Spaces:

Vishwas1
/

SimpleLaunguageDataset

Sleeping

App Files Files Community

SimpleLaunguageDataset / app.py

Vishwas1

Create app.py

802d0ef verified about 1 year ago

raw

history blame contribute delete

6.56 kB

	# app.py

	import random
	import csv
	import gradio as gr
	import pandas as pd
	import tempfile

	# Word lists
	nouns = [
	"dog", "cat", "child", "teacher", "artist", "bird", "river", "mountain",
	"book", "city", "car", "tree", "flower", "student", "computer", "phone",
	"house", "garden", "song", "idea", "scientist", "engineer", "doctor",
	"chef", "musician", "athlete", "writer", "poet", "farmer", "pilot"
	]

	adjectives = [
	"quick", "lazy", "beautiful", "tall", "short", "happy", "sad", "bright",
	"dark", "colorful", "quiet", "loud", "new", "old", "young", "ancient",
	"modern", "cold", "warm", "soft", "hard", "heavy", "light", "calm",
	"stormy", "fresh", "strong", "weak", "brave"
	]

	verbs = [
	"run", "jump", "paint", "read", "fly", "write", "sing", "build",
	"create", "discover", "learn", "teach", "drive", "grow", "think",
	"talk", "listen", "play", "see", "walk", "swim", "code", "design",
	"cook", "dance", "draw", "study", "explore", "invent", "research"
	]

	adverbs = [
	"quickly", "slowly", "gracefully", "happily", "sadly", "quietly", "loudly",
	"brightly", "softly", "carefully", "eagerly", "angrily", "easily", "hardly",
	"rarely", "often", "never", "always", "sometimes", "soon", "daily", "patiently",
	"politely", "proudly", "silently", "warmly", "well", "badly", "closely", "deeply"
	]

	prepositions = [
	"in", "on", "over", "under", "beside", "with", "without", "near",
	"between", "through", "against", "among", "around", "before", "after",
	"inside", "outside", "above", "below", "across", "behind", "beyond",
	"during", "for", "from", "into", "like", "off", "toward"
	]

	articles = ["the", "a", "an"]

	conjunctions = ["and", "but", "so", "because", "when", "while", "although", "if", "unless", "since"]

	# Semantic rules: mapping nouns to appropriate verbs
	noun_verb_map = {
	"dog": ["run", "jump", "bark", "play", "walk"],
	"cat": ["meow", "sleep", "jump", "play", "purr"],
	"child": ["play", "learn", "read", "laugh", "grow"],
	"teacher": ["teach", "explain", "guide", "help", "learn"],
	"artist": ["paint", "draw", "create", "design", "imagine"],
	"bird": ["fly", "sing", "chirp", "nest", "soar"],
	"river": ["flow", "run", "wind", "bend", "swell"],
	"mountain": ["stand", "tower", "rise", "loom", "shadow"],
	"book": ["tell", "describe", "illustrate", "explain", "reveal"],
	"city": ["grow", "expand", "develop", "bustle", "sleep"],
	"car": ["drive", "speed", "stop", "park", "honk"],
	"tree": ["grow", "sway", "stand", "shed", "bloom"],
	"flower": ["bloom", "grow", "wilt", "open", "close"],
	"student": ["study", "learn", "read", "write", "graduate"],
	"computer": ["compute", "process", "run", "crash", "boot"],
	"phone": ["ring", "vibrate", "charge", "die", "connect"],
	"house": ["stand", "shelter", "protect", "age", "burn"],
	"garden": ["grow", "bloom", "flourish", "wilt", "produce"],
	"song": ["play", "sound", "echo", "resonate", "end"],
	"idea": ["form", "grow", "develop", "emerge", "inspire"],
	"scientist": ["research", "discover", "experiment", "study", "invent"],
	"engineer": ["design", "build", "develop", "test", "solve"],
	"doctor": ["heal", "diagnose", "treat", "operate", "care"],
	"chef": ["cook", "prepare", "taste", "create", "serve"],
	"musician": ["play", "compose", "perform", "sing", "record"],
	"athlete": ["run", "train", "compete", "win", "lose"],
	"writer": ["write", "create", "imagine", "edit", "publish"],
	"poet": ["write", "compose", "imagine", "express", "rhyme"],
	"farmer": ["grow", "plant", "harvest", "plow", "raise"],
	"pilot": ["fly", "navigate", "land", "take off", "command"]
	}

	# Sentence templates
	templates = [
	"{Article} {adjective} {noun} {adverb} {verb}s {preposition} {article} {adjective} {noun2}.",
	"{Article} {noun} {verb}s {preposition} {article} {noun2} {conjunction} {verb2}s {adverb}.",
	"{Noun_plural} {adverb} {verb} {preposition} {noun2}.",
	"{Noun} {verb}s {preposition} {article} {noun2} {conjunction} {article} {noun} {verb2}s.",
	"{Article} {adjective} {noun} {verb}s {preposition} {noun2} {conjunction} {adverb} {verb2}s.",
	"{Noun} {verb}s {article} {noun2} {preposition} {noun}."
	]

	def generate_sentence():
	template = random.choice(templates)
	noun = random.choice(nouns)
	# Get appropriate verbs for noun
	verbs_for_noun = noun_verb_map.get(noun, verbs)
	verb = random.choice(verbs_for_noun)

	noun2 = random.choice(nouns)
	# Ensure noun2 is different from noun
	while noun2 == noun:
	noun2 = random.choice(nouns)
	# Get appropriate verbs for noun2
	verbs_for_noun2 = noun_verb_map.get(noun2, verbs)
	verb2 = random.choice(verbs_for_noun2)

	sentence = template.format(
	Article=random.choice(articles).capitalize(),
	article=random.choice(articles),
	adjective=random.choice(adjectives),
	noun=noun,
	noun2=noun2,
	Noun=noun.capitalize(),
	Noun_plural=noun.capitalize() + "s",
	verb=verb,
	verb2=verb2,
	adverb=random.choice(adverbs),
	preposition=random.choice(prepositions),
	conjunction=random.choice(conjunctions)
	)
	# Capitalize the first letter and ensure proper punctuation
	sentence = sentence.capitalize()
	if not sentence.endswith('.'):
	sentence += '.'
	return sentence

	def generate_sentences(num_sentences):
	sentences = [generate_sentence() for _ in range(int(num_sentences))]
	df = pd.DataFrame(sentences, columns=["sentence"])
	# Save to a temporary CSV file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
	df.to_csv(temp_file.name, index=False)
	return temp_file.name

	def generate_and_download(num_sentences):
	csv_file = generate_sentences(num_sentences)
	return csv_file

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Sentence Dataset Generator with Semantic Rules

	Enter the number of sentences you want to generate, and download a CSV file containing the sentences.

	This generator uses semantic rules to create more coherent and meaningful sentences.
	"""
	)
	num_sentences = gr.Number(label="Number of Sentences", value=1000, precision=0)
	output = gr.File(label="Download CSV")
	generate_button = gr.Button("Generate Sentences")
	generate_button.click(
	fn=generate_and_download,
	inputs=num_sentences,
	outputs=output
	)

	if __name__ == "__main__":
	demo.launch()