i finally committed i guess
Signed-off-by: boris <boris@borishub.co.uk>
This commit is contained in:
568
Databases/generate_bulk_facilities.py
Normal file
568
Databases/generate_bulk_facilities.py
Normal file
@@ -0,0 +1,568 @@
|
||||
import sqlite3
|
||||
import random
|
||||
import csv
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
# Connect to the SQLite database
|
||||
conn = sqlite3.connect('Databases/ecobuddy.sqlite')
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get current max facility ID
|
||||
cursor.execute("SELECT MAX(id) FROM ecoFacilities")
|
||||
max_facility_id = cursor.fetchone()[0] or 0
|
||||
|
||||
# Get list of user IDs for contributors
|
||||
cursor.execute("SELECT id FROM ecoUser")
|
||||
user_ids = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Get list of categories
|
||||
cursor.execute("SELECT id, name FROM ecoCategories")
|
||||
categories = {row[0]: row[1] for row in cursor.fetchall()}
|
||||
|
||||
# UK Cities and Towns with their counties and approximate coordinates
|
||||
uk_locations = [
|
||||
# Format: Town/City, County, Latitude, Longitude, Postcode Area
|
||||
("London", "Greater London", 51.5074, -0.1278, "EC"),
|
||||
("Birmingham", "West Midlands", 52.4862, -1.8904, "B"),
|
||||
("Manchester", "Greater Manchester", 53.4808, -2.2426, "M"),
|
||||
("Glasgow", "Glasgow", 55.8642, -4.2518, "G"),
|
||||
("Liverpool", "Merseyside", 53.4084, -2.9916, "L"),
|
||||
("Bristol", "Bristol", 51.4545, -2.5879, "BS"),
|
||||
("Edinburgh", "Edinburgh", 55.9533, -3.1883, "EH"),
|
||||
("Leeds", "West Yorkshire", 53.8008, -1.5491, "LS"),
|
||||
("Sheffield", "South Yorkshire", 53.3811, -1.4701, "S"),
|
||||
("Newcastle upon Tyne", "Tyne and Wear", 54.9783, -1.6178, "NE"),
|
||||
("Nottingham", "Nottinghamshire", 52.9548, -1.1581, "NG"),
|
||||
("Cardiff", "Cardiff", 51.4816, -3.1791, "CF"),
|
||||
("Belfast", "Belfast", 54.5973, -5.9301, "BT"),
|
||||
("Brighton", "East Sussex", 50.8225, -0.1372, "BN"),
|
||||
("Leicester", "Leicestershire", 52.6369, -1.1398, "LE"),
|
||||
("Aberdeen", "Aberdeen", 57.1497, -2.0943, "AB"),
|
||||
("Portsmouth", "Hampshire", 50.8198, -1.0880, "PO"),
|
||||
("York", "North Yorkshire", 53.9599, -1.0873, "YO"),
|
||||
("Swansea", "Swansea", 51.6214, -3.9436, "SA"),
|
||||
("Oxford", "Oxfordshire", 51.7520, -1.2577, "OX"),
|
||||
("Cambridge", "Cambridgeshire", 52.2053, 0.1218, "CB"),
|
||||
("Exeter", "Devon", 50.7184, -3.5339, "EX"),
|
||||
("Bath", "Somerset", 51.3751, -2.3617, "BA"),
|
||||
("Reading", "Berkshire", 51.4543, -0.9781, "RG"),
|
||||
("Preston", "Lancashire", 53.7632, -2.7031, "PR"),
|
||||
("Coventry", "West Midlands", 52.4068, -1.5197, "CV"),
|
||||
("Hull", "East Yorkshire", 53.7676, -0.3274, "HU"),
|
||||
("Stoke-on-Trent", "Staffordshire", 53.0027, -2.1794, "ST"),
|
||||
("Wolverhampton", "West Midlands", 52.5870, -2.1288, "WV"),
|
||||
("Plymouth", "Devon", 50.3755, -4.1427, "PL"),
|
||||
("Derby", "Derbyshire", 52.9225, -1.4746, "DE"),
|
||||
("Sunderland", "Tyne and Wear", 54.9069, -1.3830, "SR"),
|
||||
("Southampton", "Hampshire", 50.9097, -1.4044, "SO"),
|
||||
("Norwich", "Norfolk", 52.6309, 1.2974, "NR"),
|
||||
("Bournemouth", "Dorset", 50.7192, -1.8808, "BH"),
|
||||
("Middlesbrough", "North Yorkshire", 54.5742, -1.2350, "TS"),
|
||||
("Blackpool", "Lancashire", 53.8175, -3.0357, "FY"),
|
||||
("Bolton", "Greater Manchester", 53.5785, -2.4299, "BL"),
|
||||
("Ipswich", "Suffolk", 52.0567, 1.1482, "IP"),
|
||||
("Telford", "Shropshire", 52.6784, -2.4453, "TF"),
|
||||
("Dundee", "Dundee", 56.4620, -2.9707, "DD"),
|
||||
("Peterborough", "Cambridgeshire", 52.5695, -0.2405, "PE"),
|
||||
("Huddersfield", "West Yorkshire", 53.6458, -1.7850, "HD"),
|
||||
("Luton", "Bedfordshire", 51.8787, -0.4200, "LU"),
|
||||
("Warrington", "Cheshire", 53.3900, -2.5970, "WA"),
|
||||
("Southend-on-Sea", "Essex", 51.5459, 0.7077, "SS"),
|
||||
("Swindon", "Wiltshire", 51.5557, -1.7797, "SN"),
|
||||
("Slough", "Berkshire", 51.5105, -0.5950, "SL"),
|
||||
("Watford", "Hertfordshire", 51.6565, -0.3903, "WD"),
|
||||
("Carlisle", "Cumbria", 54.8952, -2.9335, "CA")
|
||||
]
|
||||
|
||||
# Street name components for generating realistic street names
|
||||
street_prefixes = ["High", "Main", "Church", "Park", "Mill", "Station", "London", "Victoria", "Queen", "King", "North", "South", "East", "West", "New", "Old", "Castle", "Bridge", "Green", "Market", "School", "Manor", "Abbey", "Priory", "Cathedral", "University", "College", "Hospital", "Railway", "Canal", "River", "Forest", "Wood", "Hill", "Mount", "Valley", "Meadow", "Field", "Farm", "Garden", "Orchard", "Vineyard", "Grange", "Lodge", "Court", "Hall", "House", "Cottage", "Barn", "Mill", "Windmill", "Watermill", "Forge", "Quarry", "Mine", "Pit", "Well", "Spring", "Brook", "Stream", "Lake", "Pond", "Pool", "Reservoir", "Bay", "Cove", "Beach", "Cliff", "Rock", "Stone", "Granite", "Marble", "Slate", "Clay", "Sand", "Gravel", "Chalk", "Flint", "Coal", "Iron", "Steel", "Copper", "Silver", "Gold", "Tin", "Lead", "Zinc", "Brass", "Bronze", "Pewter", "Nickel", "Cobalt", "Chromium", "Titanium", "Aluminium", "Silicon", "Carbon", "Oxygen", "Hydrogen", "Nitrogen", "Helium", "Neon", "Argon", "Krypton", "Xenon", "Radon"]
|
||||
street_suffixes = ["Street", "Road", "Lane", "Avenue", "Drive", "Boulevard", "Way", "Place", "Square", "Court", "Terrace", "Close", "Crescent", "Gardens", "Grove", "Mews", "Alley", "Walk", "Path", "Trail", "Hill", "Rise", "View", "Heights", "Park", "Green", "Meadow", "Field", "Common", "Heath", "Moor", "Down", "Fell", "Pike", "Tor", "Crag", "Cliff", "Ridge", "Edge", "Top", "Bottom", "Side", "End", "Corner", "Junction", "Cross", "Gate", "Bridge", "Ford", "Ferry", "Wharf", "Quay", "Dock", "Harbor", "Port", "Bay", "Cove", "Beach", "Shore", "Bank", "Strand", "Esplanade", "Parade", "Promenade", "Embankment", "Causeway", "Viaduct", "Tunnel", "Passage", "Arcade", "Gallery", "Mall", "Market", "Bazaar", "Fair", "Exchange", "Mart", "Emporium", "Center", "Circle", "Oval", "Triangle", "Pentagon", "Hexagon", "Octagon", "Circus", "Ring", "Loop", "Bend", "Curve", "Turn", "Twist", "Spiral", "Coil", "Helix", "Maze", "Labyrinth"]
|
||||
|
||||
# Facility descriptions by category
|
||||
category_descriptions = {
|
||||
1: [ # Recycling Bins
|
||||
"Public recycling point for paper, glass, plastic, and metal",
|
||||
"Community recycling station with separate bins for different materials",
|
||||
"Recycling center with facilities for household waste separation",
|
||||
"Public access recycling bins for common household recyclables",
|
||||
"Multi-material recycling point with clear instructions for proper sorting"
|
||||
],
|
||||
2: [ # e-Scooters
|
||||
"Dockless e-scooter rental station with multiple vehicles available",
|
||||
"E-scooter parking and charging zone for public use",
|
||||
"Designated e-scooter pickup and drop-off point",
|
||||
"E-scooter sharing station with app-based rental system",
|
||||
"Electric scooter hub with maintenance and charging facilities"
|
||||
],
|
||||
3: [ # Bike Share Stations
|
||||
"Public bicycle sharing station with multiple bikes available",
|
||||
"Bike rental hub with secure docking stations",
|
||||
"Community bike share point with regular and electric bicycles",
|
||||
"Cycle hire station with self-service rental system",
|
||||
"Bike sharing facility with maintenance and repair services"
|
||||
],
|
||||
4: [ # Public EV Charging Stations
|
||||
"Electric vehicle charging point with multiple connectors",
|
||||
"Fast-charging station for electric vehicles",
|
||||
"Public EV charging facility with covered waiting area",
|
||||
"Multi-vehicle electric charging hub with different power options",
|
||||
"EV charging station with renewable energy source"
|
||||
],
|
||||
5: [ # Battery Recycling Points
|
||||
"Dedicated collection point for used batteries of all sizes",
|
||||
"Battery recycling bin with separate compartments for different types",
|
||||
"Safe disposal facility for household and small electronics batteries",
|
||||
"Battery collection point with educational information about recycling",
|
||||
"Secure battery recycling station to prevent environmental contamination"
|
||||
],
|
||||
6: [ # Community Compost Bins
|
||||
"Neighborhood composting facility for food and garden waste",
|
||||
"Community compost bins with educational signage",
|
||||
"Public composting station with separate sections for different stages",
|
||||
"Shared compost facility managed by local volunteers",
|
||||
"Urban composting hub turning food waste into valuable soil"
|
||||
],
|
||||
7: [ # Solar-Powered Benches
|
||||
"Solar bench with USB charging ports and WiFi connectivity",
|
||||
"Public seating with integrated solar panels and device charging",
|
||||
"Smart bench powered by solar energy with digital information display",
|
||||
"Solar-powered rest area with phone charging capabilities",
|
||||
"Eco-friendly bench with solar panels and LED lighting"
|
||||
],
|
||||
8: [ # Green Roofs
|
||||
"Building with extensive green roof system visible from public areas",
|
||||
"Accessible green roof garden with native plant species",
|
||||
"Public building showcasing sustainable rooftop vegetation",
|
||||
"Green roof installation with educational tours available",
|
||||
"Biodiverse roof garden with insect habitats and rainwater collection"
|
||||
],
|
||||
9: [ # Public Water Refill Stations
|
||||
"Free water refill station to reduce plastic bottle usage",
|
||||
"Public drinking fountain with bottle filling capability",
|
||||
"Water refill point with filtered water options",
|
||||
"Accessible water station encouraging reusable bottles",
|
||||
"Community water dispenser with usage counter display"
|
||||
],
|
||||
10: [ # Waste Oil Collection Points
|
||||
"Cooking oil recycling point for residential use",
|
||||
"Used oil collection facility with secure containers",
|
||||
"Waste oil drop-off point for conversion to biodiesel",
|
||||
"Community oil recycling station with spill prevention measures",
|
||||
"Cooking oil collection facility with educational information"
|
||||
],
|
||||
11: [ # Book Swap Stations
|
||||
"Community book exchange point with weatherproof shelving",
|
||||
"Public book sharing library in repurposed phone box",
|
||||
"Free book swap station encouraging reading and reuse",
|
||||
"Neighborhood book exchange with rotating collection",
|
||||
"Little free library with take-one-leave-one system"
|
||||
],
|
||||
12: [ # Pollinator Gardens
|
||||
"Public garden designed to support bees and butterflies",
|
||||
"Pollinator-friendly planting area with native flowering species",
|
||||
"Community garden dedicated to supporting local insect populations",
|
||||
"Bee-friendly garden with educational signage about pollinators",
|
||||
"Urban wildflower meadow supporting biodiversity"
|
||||
],
|
||||
13: [ # E-Waste Collection Bins
|
||||
"Secure collection point for electronic waste and small appliances",
|
||||
"E-waste recycling bin for phones, computers, and small electronics",
|
||||
"Electronic waste drop-off point with data security assurance",
|
||||
"Community e-waste collection facility with regular collection schedule",
|
||||
"Dedicated bin for responsible disposal of electronic items"
|
||||
],
|
||||
14: [ # Clothing Donation Bins
|
||||
"Textile recycling point for clothes and household fabrics",
|
||||
"Clothing donation bin supporting local charities",
|
||||
"Secure collection point for reusable clothing and textiles",
|
||||
"Community clothing recycling bin with regular collection",
|
||||
"Textile donation point preventing landfill waste"
|
||||
],
|
||||
15: [ # Community Tool Libraries
|
||||
"Tool lending library for community use and sharing",
|
||||
"Shared equipment facility reducing need for individual ownership",
|
||||
"Community resource center for borrowing tools and equipment",
|
||||
"Tool sharing hub with membership system and workshops",
|
||||
"Public tool library with wide range of equipment available"
|
||||
],
|
||||
16: [ # Urban Farms
|
||||
"Community-run urban farm providing local produce",
|
||||
"City farming project with volunteer opportunities",
|
||||
"Urban agriculture site with educational programs",
|
||||
"Local food growing initiative in repurposed urban space",
|
||||
"Community garden with vegetable plots and fruit trees"
|
||||
],
|
||||
17: [ # Rainwater Harvesting Systems
|
||||
"Public demonstration of rainwater collection for irrigation",
|
||||
"Rainwater harvesting system with educational displays",
|
||||
"Community rainwater collection facility for shared gardens",
|
||||
"Visible rainwater storage and filtration system",
|
||||
"Urban water conservation project with storage tanks"
|
||||
]
|
||||
}
|
||||
|
||||
# Status comments by category
|
||||
status_comments = {
|
||||
1: [ # Recycling Bins
|
||||
"Recently emptied and cleaned",
|
||||
"Some bins are nearly full",
|
||||
"All bins in good condition",
|
||||
"Paper bin is currently full",
|
||||
"New signage installed to improve sorting"
|
||||
],
|
||||
2: [ # e-Scooters
|
||||
"All scooters fully charged",
|
||||
"Three scooters currently available",
|
||||
"Maintenance scheduled for next week",
|
||||
"New scooters added to this location",
|
||||
"High usage area, scooters frequently unavailable"
|
||||
],
|
||||
3: [ # Bike Share Stations
|
||||
"All docking stations operational",
|
||||
"Five bikes currently available",
|
||||
"Some bikes need maintenance",
|
||||
"New electric bikes added",
|
||||
"Popular station with high turnover"
|
||||
],
|
||||
4: [ # Public EV Charging Stations
|
||||
"All charging points operational",
|
||||
"Fast charger currently under repair",
|
||||
"Peak usage during business hours",
|
||||
"New charging point added last month",
|
||||
"Payment system recently upgraded"
|
||||
],
|
||||
5: [ # Battery Recycling Points
|
||||
"Collection bin recently emptied",
|
||||
"Secure container in good condition",
|
||||
"New signage explaining battery types",
|
||||
"High usage from local businesses",
|
||||
"Additional capacity added"
|
||||
],
|
||||
6: [ # Community Compost Bins
|
||||
"Compost ready for collection",
|
||||
"Needs more brown material",
|
||||
"Recently turned and aerated",
|
||||
"New bins added to increase capacity",
|
||||
"Volunteer day scheduled for maintenance"
|
||||
],
|
||||
7: [ # Solar-Powered Benches
|
||||
"All charging ports working",
|
||||
"Solar panels recently cleaned",
|
||||
"WiFi currently unavailable",
|
||||
"LED lights need replacement",
|
||||
"High usage during lunch hours"
|
||||
],
|
||||
8: [ # Green Roofs
|
||||
"Plants thriving after recent rain",
|
||||
"Maintenance scheduled next month",
|
||||
"New species added to increase biodiversity",
|
||||
"Irrigation system working well",
|
||||
"Open for public tours on weekends"
|
||||
],
|
||||
9: [ # Public Water Refill Stations
|
||||
"Water quality tested weekly",
|
||||
"Fountain cleaned daily",
|
||||
"Bottle filler counter shows high usage",
|
||||
"New filter installed recently",
|
||||
"Popular during summer months"
|
||||
],
|
||||
10: [ # Waste Oil Collection Points
|
||||
"Container recently emptied",
|
||||
"Secure lid in good condition",
|
||||
"New funnel system installed",
|
||||
"Collection schedule posted",
|
||||
"Area kept clean and tidy"
|
||||
],
|
||||
11: [ # Book Swap Stations
|
||||
"Good selection currently available",
|
||||
"Children's books needed",
|
||||
"Recently reorganized by volunteers",
|
||||
"Weatherproof cover working well",
|
||||
"High turnover of popular titles"
|
||||
],
|
||||
12: [ # Pollinator Gardens
|
||||
"Plants in full bloom",
|
||||
"Many bees and butterflies observed",
|
||||
"New native species planted",
|
||||
"Volunteer day for maintenance scheduled",
|
||||
"Educational tours available"
|
||||
],
|
||||
13: [ # E-Waste Collection Bins
|
||||
"Bin recently emptied",
|
||||
"Secure deposit system working",
|
||||
"Collection schedule posted",
|
||||
"New items accepted now include small appliances",
|
||||
"Data destruction guaranteed"
|
||||
],
|
||||
14: [ # Clothing Donation Bins
|
||||
"Bin recently emptied",
|
||||
"Clean and well-maintained",
|
||||
"High quality donations appreciated",
|
||||
"Winter clothing especially needed",
|
||||
"Please bag items before donating"
|
||||
],
|
||||
15: [ # Community Tool Libraries
|
||||
"New inventory system implemented",
|
||||
"Popular tools often unavailable on weekends",
|
||||
"Tool maintenance workshop scheduled",
|
||||
"New donations recently added to collection",
|
||||
"Extended hours during summer"
|
||||
],
|
||||
16: [ # Urban Farms
|
||||
"Seasonal produce currently available",
|
||||
"Volunteer opportunities posted",
|
||||
"Educational workshops on weekends",
|
||||
"New growing area being developed",
|
||||
"Composting system recently expanded"
|
||||
],
|
||||
17: [ # Rainwater Harvesting Systems
|
||||
"System working efficiently after recent rainfall",
|
||||
"Water quality monitoring in place",
|
||||
"Educational tours available by appointment",
|
||||
"System capacity recently expanded",
|
||||
"Used for irrigation of nearby community garden"
|
||||
]
|
||||
}
|
||||
|
||||
# Generate a realistic UK postcode based on area code
|
||||
def generate_postcode(area_code):
|
||||
# Format: Area + District + Space + Sector + Unit
|
||||
# e.g., M1 1AA or SW1A 1AA
|
||||
district = random.randint(1, 99)
|
||||
sector = random.randint(1, 9)
|
||||
unit = ''.join(random.choices('ABCDEFGHJKLMNPQRSTUVWXYZ', k=2)) # Excluding I and O as they're not used
|
||||
|
||||
if len(area_code) == 1:
|
||||
return f"{area_code}{district} {sector}{unit}"
|
||||
else:
|
||||
return f"{area_code}{district} {sector}{unit}"
|
||||
|
||||
# Generate a realistic street name
|
||||
def generate_street_name():
|
||||
prefix = random.choice(street_prefixes)
|
||||
suffix = random.choice(street_suffixes)
|
||||
return f"{prefix} {suffix}"
|
||||
|
||||
# Generate a realistic house number
|
||||
def generate_house_number():
|
||||
# 80% chance of a simple number, 20% chance of a letter suffix or unit
|
||||
if random.random() < 0.8:
|
||||
return str(random.randint(1, 200))
|
||||
else:
|
||||
options = [
|
||||
f"{random.randint(1, 200)}{random.choice('ABCDEFG')}", # e.g., 42A
|
||||
f"Unit {random.randint(1, 20)}",
|
||||
f"Flat {random.randint(1, 50)}",
|
||||
f"Suite {random.randint(1, 10)}"
|
||||
]
|
||||
return random.choice(options)
|
||||
|
||||
# Add small random variation to coordinates to avoid facilities at exact same location
|
||||
def vary_coordinates(lat, lng):
|
||||
# Add variation of up to ~500 meters
|
||||
lat_variation = random.uniform(-0.004, 0.004)
|
||||
lng_variation = random.uniform(-0.006, 0.006)
|
||||
return lat + lat_variation, lng + lng_variation
|
||||
|
||||
# Generate facility title based on category and location
|
||||
def generate_title(category_name, location_name, street_name):
|
||||
templates = [
|
||||
f"{location_name} {category_name}",
|
||||
f"{category_name} at {street_name}",
|
||||
f"{street_name} {category_name}",
|
||||
f"Community {category_name} {location_name}",
|
||||
f"{location_name} Central {category_name}",
|
||||
f"{location_name} {street_name} {category_name}"
|
||||
]
|
||||
return random.choice(templates)
|
||||
|
||||
# Create a log file to track progress
|
||||
log_file = open("facility_generation_log.txt", "w")
|
||||
log_file.write(f"Starting facility generation at {datetime.now()}\n")
|
||||
log_file.write(f"Target: 1000 new facilities\n\n")
|
||||
|
||||
# Create a CSV file to store all generated facilities for reference
|
||||
csv_file = open("generated_facilities.csv", "w", newline='')
|
||||
csv_writer = csv.writer(csv_file)
|
||||
csv_writer.writerow(["ID", "Title", "Category", "Description", "Address", "Postcode", "Latitude", "Longitude", "Contributor"])
|
||||
|
||||
# Prepare for batch insertion to improve performance
|
||||
facilities_to_insert = []
|
||||
status_comments_to_insert = []
|
||||
|
||||
# Track unique titles to avoid duplicates
|
||||
existing_titles = set()
|
||||
cursor.execute("SELECT title FROM ecoFacilities")
|
||||
for row in cursor.fetchall():
|
||||
existing_titles.add(row[0])
|
||||
|
||||
# Generate 1000 facilities
|
||||
num_facilities = 1000
|
||||
facilities_created = 0
|
||||
|
||||
log_file.write("Generating facilities...\n")
|
||||
|
||||
while facilities_created < num_facilities:
|
||||
# Select a random location
|
||||
location = random.choice(uk_locations)
|
||||
location_name, county, base_lat, base_lng, postcode_area = location
|
||||
|
||||
# Generate 5-25 facilities per location to create clusters
|
||||
facilities_per_location = min(random.randint(5, 25), num_facilities - facilities_created)
|
||||
|
||||
for _ in range(facilities_per_location):
|
||||
# Select a random category
|
||||
category_id = random.choice(list(categories.keys()))
|
||||
category_name = categories[category_id]
|
||||
|
||||
# Generate address components
|
||||
street_name = generate_street_name()
|
||||
house_number = generate_house_number()
|
||||
lat, lng = vary_coordinates(base_lat, base_lng)
|
||||
postcode = generate_postcode(postcode_area)
|
||||
|
||||
# Generate title
|
||||
title_base = generate_title(category_name, location_name, street_name)
|
||||
title = title_base
|
||||
|
||||
# Ensure title is unique by adding a suffix if needed
|
||||
suffix = 2
|
||||
while title in existing_titles:
|
||||
title = f"{title_base} {suffix}"
|
||||
suffix += 1
|
||||
|
||||
existing_titles.add(title)
|
||||
|
||||
# Select description
|
||||
description = random.choice(category_descriptions[category_id])
|
||||
|
||||
# Select contributor
|
||||
contributor_id = random.choice(user_ids)
|
||||
|
||||
# Add to batch for insertion
|
||||
facilities_to_insert.append((
|
||||
title,
|
||||
category_id,
|
||||
description,
|
||||
house_number,
|
||||
street_name,
|
||||
county,
|
||||
location_name,
|
||||
postcode,
|
||||
lng,
|
||||
lat,
|
||||
contributor_id
|
||||
))
|
||||
|
||||
# Log progress periodically
|
||||
facilities_created += 1
|
||||
if facilities_created % 100 == 0:
|
||||
log_message = f"Generated {facilities_created} facilities so far..."
|
||||
print(log_message)
|
||||
log_file.write(log_message + "\n")
|
||||
|
||||
if facilities_created >= num_facilities:
|
||||
break
|
||||
|
||||
# Insert facilities in batches for better performance
|
||||
log_file.write("\nInserting facilities into database...\n")
|
||||
print("Inserting facilities into database...")
|
||||
|
||||
batch_size = 50
|
||||
for i in range(0, len(facilities_to_insert), batch_size):
|
||||
batch = facilities_to_insert[i:i+batch_size]
|
||||
cursor.executemany("""
|
||||
INSERT INTO ecoFacilities
|
||||
(title, category, description, houseNumber, streetName, county, town, postcode, lng, lat, contributor)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", batch)
|
||||
|
||||
# Get the IDs of the inserted facilities
|
||||
cursor.execute("SELECT last_insert_rowid()")
|
||||
last_id = cursor.fetchone()[0]
|
||||
first_id_in_batch = last_id - len(batch) + 1
|
||||
|
||||
# Generate status comments for each facility
|
||||
for j, facility in enumerate(batch):
|
||||
facility_id = first_id_in_batch + j
|
||||
category_id = facility[1] # Category ID is the second element
|
||||
|
||||
# Write to CSV for reference
|
||||
csv_writer.writerow([
|
||||
facility_id,
|
||||
facility[0], # title
|
||||
categories[category_id], # category name
|
||||
facility[2], # description
|
||||
f"{facility[3]} {facility[4]}, {facility[6]}, {facility[5]}", # address
|
||||
facility[7], # postcode
|
||||
facility[9], # lat
|
||||
facility[8], # lng
|
||||
facility[10] # contributor
|
||||
])
|
||||
|
||||
# Decide how many status comments to add (0-3)
|
||||
num_comments = random.choices([0, 1, 2, 3], weights=[30, 40, 20, 10])[0]
|
||||
|
||||
if num_comments > 0:
|
||||
# Get relevant comments for this category
|
||||
relevant_comments = status_comments.get(category_id, status_comments[1]) # Default to recycling bin comments
|
||||
|
||||
# Select random comments without repetition
|
||||
selected_comments = random.sample(relevant_comments, min(num_comments, len(relevant_comments)))
|
||||
|
||||
# Add to batch for insertion
|
||||
for comment in selected_comments:
|
||||
status_comments_to_insert.append((facility_id, comment))
|
||||
|
||||
# Commit after each batch
|
||||
conn.commit()
|
||||
|
||||
log_message = f"Inserted batch {i//batch_size + 1}/{(len(facilities_to_insert)-1)//batch_size + 1}"
|
||||
print(log_message)
|
||||
log_file.write(log_message + "\n")
|
||||
|
||||
# Insert status comments in batches
|
||||
if status_comments_to_insert:
|
||||
log_file.write("\nInserting status comments...\n")
|
||||
print("Inserting status comments...")
|
||||
|
||||
for i in range(0, len(status_comments_to_insert), batch_size):
|
||||
batch = status_comments_to_insert[i:i+batch_size]
|
||||
cursor.executemany("""
|
||||
INSERT INTO ecoFacilityStatus (facilityId, statusComment)
|
||||
VALUES (?, ?)
|
||||
""", batch)
|
||||
conn.commit()
|
||||
|
||||
log_message = f"Inserted comment batch {i//batch_size + 1}/{(len(status_comments_to_insert)-1)//batch_size + 1}"
|
||||
print(log_message)
|
||||
log_file.write(log_message + "\n")
|
||||
|
||||
# Get final counts
|
||||
cursor.execute("SELECT COUNT(*) FROM ecoFacilities")
|
||||
total_facilities = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("SELECT COUNT(*) FROM ecoFacilityStatus")
|
||||
total_comments = cursor.fetchone()[0]
|
||||
|
||||
# Log completion
|
||||
completion_message = f"\nGeneration complete at {datetime.now()}"
|
||||
print(completion_message)
|
||||
log_file.write(completion_message + "\n")
|
||||
|
||||
summary = f"Total facilities in database: {total_facilities}\n"
|
||||
summary += f"Total status comments in database: {total_comments}\n"
|
||||
summary += f"Generated facilities saved to generated_facilities.csv for reference"
|
||||
|
||||
print(summary)
|
||||
log_file.write(summary)
|
||||
|
||||
# Close connections
|
||||
log_file.close()
|
||||
csv_file.close()
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
print("\nSuccessfully added 1000 new ecological facilities to the database.")
|
||||
print("A detailed log and CSV export have been created for reference.")
|
Reference in New Issue
Block a user