Jobly / generate_data.py
Valentina9502's picture
First commit
fdf5af0 verified
"""
Generate synthetic gig economy data for testing
Creates 50 worker profiles and 50 gig posts with realistic variety
"""
import json
import random
# Skills by category
HANDYMAN_SKILLS = ["Plumbing", "Electrical Work", "Carpentry", "Painting", "Drywall", "Tile Work", "Door Installation", "Window Repair"]
GARDENING_SKILLS = ["Lawn Mowing", "Tree Pruning", "Garden Design", "Plant Care", "Landscaping", "Hedge Trimming", "Weeding", "Irrigation"]
PHOTO_SKILLS = ["Event Photography", "Portrait Photography", "Photo Editing", "Lighting", "Drone Photography", "Wedding Photography", "Product Photography"]
PET_SKILLS = ["Dog Walking", "Pet Sitting", "Cat Care", "Basic Pet Training", "Pet First Aid", "Grooming", "Bird Care"]
MOVING_SKILLS = ["Furniture Moving", "Packing", "Heavy Lifting", "Van Transport", "Assembly", "Disassembly", "Storage"]
CLEANING_SKILLS = ["Deep Cleaning", "Regular Cleaning", "Organization", "Ironing", "Window Cleaning", "Carpet Cleaning", "Eco-friendly Products"]
FURNITURE_SKILLS = ["Custom Furniture", "Furniture Repair", "Wood Working", "Furniture Refinishing", "Upholstery", "Cabinet Making"]
ART_SKILLS = ["Mural Painting", "Portrait Art", "Interior Painting", "Canvas Art", "Custom Artwork", "Decorative Painting", "Restoration"]
TECH_SKILLS = ["Computer Repair", "TV Installation", "Smart Home Setup", "Network Setup", "Printer Repair", "Data Recovery"]
TUTORING_SKILLS = ["Math Tutoring", "Language Teaching", "Music Lessons", "Art Classes", "Homework Help", "Test Prep"]
# Cities in Europe
CITIES = [
"Rome, Italy", "Milan, Italy", "Florence, Italy", "Venice, Italy", "Naples, Italy",
"Paris, France", "Lyon, France", "Marseille, France", "Nice, France",
"Madrid, Spain", "Barcelona, Spain", "Valencia, Spain", "Seville, Spain",
"Berlin, Germany", "Munich, Germany", "Hamburg, Germany", "Frankfurt, Germany",
"Amsterdam, Netherlands", "Vienna, Austria", "Brussels, Belgium", "Lisbon, Portugal"
]
# First names by origin
FIRST_NAMES = [
"Marco", "Sofia", "Luca", "Giulia", "Alessandro", "Francesca", "Lorenzo", "Elena",
"Pierre", "Marie", "Jean", "Sophie", "Antoine", "Camille", "Lucas", "Emma",
"Carlos", "Maria", "Diego", "Ana", "Pablo", "Carmen", "Miguel", "Laura",
"Hans", "Anna", "Klaus", "Petra", "Lukas", "Julia", "Felix", "Nina",
"Ahmed", "Fatima", "Omar", "Leila", "Hassan", "Aisha", "Thomas", "Isabella"
]
LAST_NAMES = [
"Rossi", "Ferrari", "Russo", "Bianchi", "Romano", "Conti", "Ricci", "Marino",
"Dupont", "Martin", "Bernard", "Dubois", "Laurent", "Moreau", "Simon", "Michel",
"Garcia", "Rodriguez", "Martinez", "Sanchez", "Lopez", "Gonzalez", "Perez", "Torres",
"Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker",
"Hassan", "Ali", "Ibrahim", "Ahmed", "Khan", "Patel", "Chen", "Wang"
]
def generate_workers(n=50):
"""Generate n worker profiles"""
workers = []
categories = [
("Handyman & Home Repairs", HANDYMAN_SKILLS),
("Gardener & Landscaper", GARDENING_SKILLS),
("Photographer", PHOTO_SKILLS),
("Pet Care Specialist", PET_SKILLS),
("Moving & Delivery", MOVING_SKILLS),
("House Cleaner", CLEANING_SKILLS),
("Furniture Specialist", FURNITURE_SKILLS),
("Artist & Painter", ART_SKILLS),
("Tech Support", TECH_SKILLS),
("Tutor & Teacher", TUTORING_SKILLS),
]
for i in range(n):
category = random.choice(categories)
title, skill_pool = category
# Select 3-6 random skills from the category
num_skills = random.randint(3, 6)
skills = random.sample(skill_pool, min(num_skills, len(skill_pool)))
# Sometimes add skills from another category (versatile workers)
if random.random() > 0.7:
other_category = random.choice(categories)
skills.append(random.choice(other_category[1]))
experience_years = random.randint(2, 20)
hourly_rate = random.randint(15, 50)
worker = {
"id": f"w{i+1}",
"name": f"{random.choice(FIRST_NAMES)} {random.choice(LAST_NAMES)}",
"title": title,
"skills": skills,
"experience": f"{experience_years} years",
"location": random.choice(CITIES),
"hourly_rate": f"€{hourly_rate}/hour",
"availability": random.choice(["Full-time", "Part-time", "Weekends only", "Flexible", "Evenings & Weekends"]),
"bio": f"Experienced {title.lower()} with {experience_years} years in the field"
}
workers.append(worker)
return workers
def generate_gigs(n=50):
"""Generate n gig posts"""
gigs = []
gig_templates = [
# Handyman jobs
("Bathroom Plumbing Repair", "Private Homeowner", ["Plumbing", "Pipe Repair"], "3+ years", 100, 250, "Half day"),
("Kitchen Renovation Help", "Apartment Owner", ["Carpentry", "Tile Work", "Painting"], "5+ years", 400, 800, "3-5 days"),
("Electrical Outlet Installation", "Home Owner", ["Electrical Work", "Installation"], "4+ years", 80, 150, "2-3 hours"),
("Fence Repair & Painting", "Property Owner", ["Carpentry", "Painting"], "3+ years", 150, 300, "1 day"),
# Gardening jobs
("Weekly Lawn Maintenance", "Residential Property", ["Lawn Mowing", "Weeding"], "2+ years", 60, 100, "Ongoing"),
("Garden Redesign Project", "Villa Owner", ["Garden Design", "Landscaping", "Plant Care"], "5+ years", 500, 1000, "1-2 weeks"),
("Tree Removal & Stump Grinding", "Property Manager", ["Tree Pruning", "Heavy Equipment"], "6+ years", 300, 500, "1 day"),
("Spring Garden Cleanup", "Homeowner", ["Weeding", "Plant Care", "Cleanup"], "2+ years", 80, 150, "Half day"),
# Photography jobs
("Birthday Party Photography", "Private Family", ["Event Photography", "Photo Editing"], "3+ years", 200, 350, "3-4 hours"),
("Real Estate Property Photos", "Real Estate Agent", ["Product Photography", "Photo Editing"], "3+ years", 150, 300, "Half day"),
("Family Portrait Session", "Family", ["Portrait Photography", "Lighting"], "4+ years", 180, 300, "2 hours"),
("Corporate Event Coverage", "Company", ["Event Photography", "Lighting"], "5+ years", 400, 700, "Full day"),
# Pet care jobs
("Weekend Dog Sitting", "Pet Owner", ["Pet Sitting", "Dog Walking"], "2+ years", 80, 150, "2 days"),
("Daily Cat Feeding - 1 Week", "Traveling Owner", ["Cat Care", "Pet Sitting"], "1+ years", 100, 150, "1 week"),
("Puppy Training Sessions", "New Dog Owner", ["Basic Pet Training", "Dog Walking"], "4+ years", 200, 350, "4 sessions"),
("Multiple Pet Care", "Pet Owner", ["Dog Walking", "Cat Care", "Pet Sitting"], "3+ years", 120, 200, "10 days"),
# Moving jobs
("Studio Apartment Move", "Student", ["Furniture Moving", "Packing"], "2+ years", 150, 250, "Half day"),
("Piano Moving Service", "Homeowner", ["Heavy Lifting", "Special Equipment"], "5+ years", 200, 400, "2-3 hours"),
("Office Furniture Relocation", "Small Business", ["Furniture Moving", "Assembly"], "4+ years", 300, 500, "1 day"),
("Storage Unit to Apartment", "Individual", ["Moving", "Heavy Lifting"], "2+ years", 180, 300, "Half day"),
# Cleaning jobs
("Post-Party Cleaning", "Event Host", ["Deep Cleaning", "Organization"], "2+ years", 80, 150, "3-4 hours"),
("Move-Out Deep Clean", "Apartment Tenant", ["Deep Cleaning", "Window Cleaning"], "3+ years", 150, 250, "Full day"),
("Weekly House Cleaning", "Busy Family", ["Regular Cleaning", "Organization"], "2+ years", 70, 120, "Ongoing"),
("Commercial Office Cleaning", "Office Manager", ["Regular Cleaning", "Eco-friendly"], "3+ years", 200, 350, "Evening shift"),
# Furniture jobs
("Custom Dining Table", "Homeowner", ["Custom Furniture", "Wood Working"], "6+ years", 600, 1200, "2 weeks"),
("Antique Chair Restoration", "Collector", ["Furniture Repair", "Upholstery"], "8+ years", 250, 500, "1 week"),
("Built-in Closet System", "Apartment Owner", ["Custom Furniture", "Cabinet Making"], "5+ years", 800, 1500, "1 week"),
("Furniture Refinishing", "Homeowner", ["Furniture Refinishing", "Wood Working"], "4+ years", 200, 400, "3-5 days"),
# Art jobs
("Living Room Feature Wall", "Homeowner", ["Mural Painting", "Interior Painting"], "4+ years", 400, 700, "2-3 days"),
("Restaurant Interior Mural", "Restaurant Owner", ["Mural Painting", "Custom Artwork"], "6+ years", 1000, 2000, "1-2 weeks"),
("Portrait Commission", "Private Client", ["Portrait Art", "Canvas Art"], "5+ years", 300, 600, "2 weeks"),
("Kid's Playroom Decoration", "Parents", ["Mural Painting", "Decorative Painting"], "3+ years", 250, 450, "2 days"),
# Tech jobs
("Home Network Setup", "Homeowner", ["Network Setup", "Smart Home Setup"], "3+ years", 100, 200, "2-3 hours"),
("Computer Virus Removal", "Individual", ["Computer Repair", "Data Recovery"], "4+ years", 60, 120, "1-2 hours"),
("TV Wall Mounting & Setup", "Apartment Owner", ["TV Installation", "Cable Management"], "2+ years", 80, 150, "2 hours"),
("Smart Home Integration", "Tech Enthusiast", ["Smart Home Setup", "Network Setup"], "5+ years", 200, 400, "Half day"),
# Tutoring jobs
("High School Math Tutoring", "Student Parent", ["Math Tutoring", "Homework Help"], "3+ years", 150, 300, "4 weeks"),
("Piano Lessons for Beginner", "Adult Learner", ["Music Lessons"], "4+ years", 200, 350, "8 sessions"),
("Italian Language Teaching", "Expat", ["Language Teaching"], "3+ years", 180, 300, "6 weeks"),
("SAT Test Preparation", "High School Senior", ["Test Prep", "Math Tutoring"], "5+ years", 300, 500, "6 weeks"),
]
for i in range(n):
template = random.choice(gig_templates)
title, company, skills, exp, min_budget, max_budget, duration = template
# Add some variation to titles
variations = ["", " Needed", " Required", " - Urgent", " - Flexible Schedule"]
title_variation = title + random.choice(variations)
gig = {
"id": f"j{i+1}",
"title": title_variation,
"company": company,
"required_skills": skills,
"experience_level": exp,
"location": random.choice(CITIES),
"budget": f"€{min_budget}-{max_budget}",
"duration": duration,
"description": f"{title} - {', '.join(skills)} expertise needed"
}
gigs.append(gig)
return gigs
if __name__ == "__main__":
# Generate data
workers = generate_workers(50)
gigs = generate_gigs(50)
# Save to JSON files
with open("workers_data.json", "w") as f:
json.dump(workers, f, indent=2)
with open("gigs_data.json", "w") as f:
json.dump(gigs, f, indent=2)
print(f"✅ Generated {len(workers)} workers and {len(gigs)} gigs")
print(f"📁 Saved to workers_data.json and gigs_data.json")