568 lines
25 KiB
Python
568 lines
25 KiB
Python
import sqlite3
|
|
import random
|
|
import csv
|
|
import os
|
|
from datetime import datetime
|
|
|
|
# Connect to the SQLite database
|
|
conn = sqlite3.connect('ecobuddy.sqlite')
|
|
cursor = conn.cursor()
|
|
|
|
# Get current max facility ID
|
|
cursor.execute("SELECT MAX(id) FROM ecoFacilities")
|
|
max_facility_id = cursor.fetchone()[0] or 0
|
|
|
|
# Get list of user IDs for contributors
|
|
cursor.execute("SELECT id FROM ecoUser")
|
|
user_ids = [row[0] for row in cursor.fetchall()]
|
|
|
|
# Get list of categories
|
|
cursor.execute("SELECT id, name FROM ecoCategories")
|
|
categories = {row[0]: row[1] for row in cursor.fetchall()}
|
|
|
|
# UK Cities and Towns with their counties and approximate coordinates
|
|
uk_locations = [
|
|
# Format: Town/City, County, Latitude, Longitude, Postcode Area
|
|
("London", "Greater London", 51.5074, -0.1278, "EC"),
|
|
("Birmingham", "West Midlands", 52.4862, -1.8904, "B"),
|
|
("Manchester", "Greater Manchester", 53.4808, -2.2426, "M"),
|
|
("Glasgow", "Glasgow", 55.8642, -4.2518, "G"),
|
|
("Liverpool", "Merseyside", 53.4084, -2.9916, "L"),
|
|
("Bristol", "Bristol", 51.4545, -2.5879, "BS"),
|
|
("Edinburgh", "Edinburgh", 55.9533, -3.1883, "EH"),
|
|
("Leeds", "West Yorkshire", 53.8008, -1.5491, "LS"),
|
|
("Sheffield", "South Yorkshire", 53.3811, -1.4701, "S"),
|
|
("Newcastle upon Tyne", "Tyne and Wear", 54.9783, -1.6178, "NE"),
|
|
("Nottingham", "Nottinghamshire", 52.9548, -1.1581, "NG"),
|
|
("Cardiff", "Cardiff", 51.4816, -3.1791, "CF"),
|
|
("Belfast", "Belfast", 54.5973, -5.9301, "BT"),
|
|
("Brighton", "East Sussex", 50.8225, -0.1372, "BN"),
|
|
("Leicester", "Leicestershire", 52.6369, -1.1398, "LE"),
|
|
("Aberdeen", "Aberdeen", 57.1497, -2.0943, "AB"),
|
|
("Portsmouth", "Hampshire", 50.8198, -1.0880, "PO"),
|
|
("York", "North Yorkshire", 53.9599, -1.0873, "YO"),
|
|
("Swansea", "Swansea", 51.6214, -3.9436, "SA"),
|
|
("Oxford", "Oxfordshire", 51.7520, -1.2577, "OX"),
|
|
("Cambridge", "Cambridgeshire", 52.2053, 0.1218, "CB"),
|
|
("Exeter", "Devon", 50.7184, -3.5339, "EX"),
|
|
("Bath", "Somerset", 51.3751, -2.3617, "BA"),
|
|
("Reading", "Berkshire", 51.4543, -0.9781, "RG"),
|
|
("Preston", "Lancashire", 53.7632, -2.7031, "PR"),
|
|
("Coventry", "West Midlands", 52.4068, -1.5197, "CV"),
|
|
("Hull", "East Yorkshire", 53.7676, -0.3274, "HU"),
|
|
("Stoke-on-Trent", "Staffordshire", 53.0027, -2.1794, "ST"),
|
|
("Wolverhampton", "West Midlands", 52.5870, -2.1288, "WV"),
|
|
("Plymouth", "Devon", 50.3755, -4.1427, "PL"),
|
|
("Derby", "Derbyshire", 52.9225, -1.4746, "DE"),
|
|
("Sunderland", "Tyne and Wear", 54.9069, -1.3830, "SR"),
|
|
("Southampton", "Hampshire", 50.9097, -1.4044, "SO"),
|
|
("Norwich", "Norfolk", 52.6309, 1.2974, "NR"),
|
|
("Bournemouth", "Dorset", 50.7192, -1.8808, "BH"),
|
|
("Middlesbrough", "North Yorkshire", 54.5742, -1.2350, "TS"),
|
|
("Blackpool", "Lancashire", 53.8175, -3.0357, "FY"),
|
|
("Bolton", "Greater Manchester", 53.5785, -2.4299, "BL"),
|
|
("Ipswich", "Suffolk", 52.0567, 1.1482, "IP"),
|
|
("Telford", "Shropshire", 52.6784, -2.4453, "TF"),
|
|
("Dundee", "Dundee", 56.4620, -2.9707, "DD"),
|
|
("Peterborough", "Cambridgeshire", 52.5695, -0.2405, "PE"),
|
|
("Huddersfield", "West Yorkshire", 53.6458, -1.7850, "HD"),
|
|
("Luton", "Bedfordshire", 51.8787, -0.4200, "LU"),
|
|
("Warrington", "Cheshire", 53.3900, -2.5970, "WA"),
|
|
("Southend-on-Sea", "Essex", 51.5459, 0.7077, "SS"),
|
|
("Swindon", "Wiltshire", 51.5557, -1.7797, "SN"),
|
|
("Slough", "Berkshire", 51.5105, -0.5950, "SL"),
|
|
("Watford", "Hertfordshire", 51.6565, -0.3903, "WD"),
|
|
("Carlisle", "Cumbria", 54.8952, -2.9335, "CA")
|
|
]
|
|
|
|
# Street name components for generating realistic street names
|
|
street_prefixes = ["High", "Main", "Church", "Park", "Mill", "Station", "London", "Victoria", "Queen", "King", "North", "South", "East", "West", "New", "Old", "Castle", "Bridge", "Green", "Market", "School", "Manor", "Abbey", "Priory", "Cathedral", "University", "College", "Hospital", "Railway", "Canal", "River", "Forest", "Wood", "Hill", "Mount", "Valley", "Meadow", "Field", "Farm", "Garden", "Orchard", "Vineyard", "Grange", "Lodge", "Court", "Hall", "House", "Cottage", "Barn", "Mill", "Windmill", "Watermill", "Forge", "Quarry", "Mine", "Pit", "Well", "Spring", "Brook", "Stream", "Lake", "Pond", "Pool", "Reservoir", "Bay", "Cove", "Beach", "Cliff", "Rock", "Stone", "Granite", "Marble", "Slate", "Clay", "Sand", "Gravel", "Chalk", "Flint", "Coal", "Iron", "Steel", "Copper", "Silver", "Gold", "Tin", "Lead", "Zinc", "Brass", "Bronze", "Pewter", "Nickel", "Cobalt", "Chromium", "Titanium", "Aluminium", "Silicon", "Carbon", "Oxygen", "Hydrogen", "Nitrogen", "Helium", "Neon", "Argon", "Krypton", "Xenon", "Radon"]
|
|
street_suffixes = ["Street", "Road", "Lane", "Avenue", "Drive", "Boulevard", "Way", "Place", "Square", "Court", "Terrace", "Close", "Crescent", "Gardens", "Grove", "Mews", "Alley", "Walk", "Path", "Trail", "Hill", "Rise", "View", "Heights", "Park", "Green", "Meadow", "Field", "Common", "Heath", "Moor", "Down", "Fell", "Pike", "Tor", "Crag", "Cliff", "Ridge", "Edge", "Top", "Bottom", "Side", "End", "Corner", "Junction", "Cross", "Gate", "Bridge", "Ford", "Ferry", "Wharf", "Quay", "Dock", "Harbor", "Port", "Bay", "Cove", "Beach", "Shore", "Bank", "Strand", "Esplanade", "Parade", "Promenade", "Embankment", "Causeway", "Viaduct", "Tunnel", "Passage", "Arcade", "Gallery", "Mall", "Market", "Bazaar", "Fair", "Exchange", "Mart", "Emporium", "Center", "Circle", "Oval", "Triangle", "Pentagon", "Hexagon", "Octagon", "Circus", "Ring", "Loop", "Bend", "Curve", "Turn", "Twist", "Spiral", "Coil", "Helix", "Maze", "Labyrinth"]
|
|
|
|
# Facility descriptions by category
|
|
category_descriptions = {
|
|
1: [ # Recycling Bins
|
|
"Public recycling point for paper, glass, plastic, and metal",
|
|
"Community recycling station with separate bins for different materials",
|
|
"Recycling center with facilities for household waste separation",
|
|
"Public access recycling bins for common household recyclables",
|
|
"Multi-material recycling point with clear instructions for proper sorting"
|
|
],
|
|
2: [ # e-Scooters
|
|
"Dockless e-scooter rental station with multiple vehicles available",
|
|
"E-scooter parking and charging zone for public use",
|
|
"Designated e-scooter pickup and drop-off point",
|
|
"E-scooter sharing station with app-based rental system",
|
|
"Electric scooter hub with maintenance and charging facilities"
|
|
],
|
|
3: [ # Bike Share Stations
|
|
"Public bicycle sharing station with multiple bikes available",
|
|
"Bike rental hub with secure docking stations",
|
|
"Community bike share point with regular and electric bicycles",
|
|
"Cycle hire station with self-service rental system",
|
|
"Bike sharing facility with maintenance and repair services"
|
|
],
|
|
4: [ # Public EV Charging Stations
|
|
"Electric vehicle charging point with multiple connectors",
|
|
"Fast-charging station for electric vehicles",
|
|
"Public EV charging facility with covered waiting area",
|
|
"Multi-vehicle electric charging hub with different power options",
|
|
"EV charging station with renewable energy source"
|
|
],
|
|
5: [ # Battery Recycling Points
|
|
"Dedicated collection point for used batteries of all sizes",
|
|
"Battery recycling bin with separate compartments for different types",
|
|
"Safe disposal facility for household and small electronics batteries",
|
|
"Battery collection point with educational information about recycling",
|
|
"Secure battery recycling station to prevent environmental contamination"
|
|
],
|
|
6: [ # Community Compost Bins
|
|
"Neighborhood composting facility for food and garden waste",
|
|
"Community compost bins with educational signage",
|
|
"Public composting station with separate sections for different stages",
|
|
"Shared compost facility managed by local volunteers",
|
|
"Urban composting hub turning food waste into valuable soil"
|
|
],
|
|
7: [ # Solar-Powered Benches
|
|
"Solar bench with USB charging ports and WiFi connectivity",
|
|
"Public seating with integrated solar panels and device charging",
|
|
"Smart bench powered by solar energy with digital information display",
|
|
"Solar-powered rest area with phone charging capabilities",
|
|
"Eco-friendly bench with solar panels and LED lighting"
|
|
],
|
|
8: [ # Green Roofs
|
|
"Building with extensive green roof system visible from public areas",
|
|
"Accessible green roof garden with native plant species",
|
|
"Public building showcasing sustainable rooftop vegetation",
|
|
"Green roof installation with educational tours available",
|
|
"Biodiverse roof garden with insect habitats and rainwater collection"
|
|
],
|
|
9: [ # Public Water Refill Stations
|
|
"Free water refill station to reduce plastic bottle usage",
|
|
"Public drinking fountain with bottle filling capability",
|
|
"Water refill point with filtered water options",
|
|
"Accessible water station encouraging reusable bottles",
|
|
"Community water dispenser with usage counter display"
|
|
],
|
|
10: [ # Waste Oil Collection Points
|
|
"Cooking oil recycling point for residential use",
|
|
"Used oil collection facility with secure containers",
|
|
"Waste oil drop-off point for conversion to biodiesel",
|
|
"Community oil recycling station with spill prevention measures",
|
|
"Cooking oil collection facility with educational information"
|
|
],
|
|
11: [ # Book Swap Stations
|
|
"Community book exchange point with weatherproof shelving",
|
|
"Public book sharing library in repurposed phone box",
|
|
"Free book swap station encouraging reading and reuse",
|
|
"Neighborhood book exchange with rotating collection",
|
|
"Little free library with take-one-leave-one system"
|
|
],
|
|
12: [ # Pollinator Gardens
|
|
"Public garden designed to support bees and butterflies",
|
|
"Pollinator-friendly planting area with native flowering species",
|
|
"Community garden dedicated to supporting local insect populations",
|
|
"Bee-friendly garden with educational signage about pollinators",
|
|
"Urban wildflower meadow supporting biodiversity"
|
|
],
|
|
13: [ # E-Waste Collection Bins
|
|
"Secure collection point for electronic waste and small appliances",
|
|
"E-waste recycling bin for phones, computers, and small electronics",
|
|
"Electronic waste drop-off point with data security assurance",
|
|
"Community e-waste collection facility with regular collection schedule",
|
|
"Dedicated bin for responsible disposal of electronic items"
|
|
],
|
|
14: [ # Clothing Donation Bins
|
|
"Textile recycling point for clothes and household fabrics",
|
|
"Clothing donation bin supporting local charities",
|
|
"Secure collection point for reusable clothing and textiles",
|
|
"Community clothing recycling bin with regular collection",
|
|
"Textile donation point preventing landfill waste"
|
|
],
|
|
15: [ # Community Tool Libraries
|
|
"Tool lending library for community use and sharing",
|
|
"Shared equipment facility reducing need for individual ownership",
|
|
"Community resource center for borrowing tools and equipment",
|
|
"Tool sharing hub with membership system and workshops",
|
|
"Public tool library with wide range of equipment available"
|
|
],
|
|
16: [ # Urban Farms
|
|
"Community-run urban farm providing local produce",
|
|
"City farming project with volunteer opportunities",
|
|
"Urban agriculture site with educational programs",
|
|
"Local food growing initiative in repurposed urban space",
|
|
"Community garden with vegetable plots and fruit trees"
|
|
],
|
|
17: [ # Rainwater Harvesting Systems
|
|
"Public demonstration of rainwater collection for irrigation",
|
|
"Rainwater harvesting system with educational displays",
|
|
"Community rainwater collection facility for shared gardens",
|
|
"Visible rainwater storage and filtration system",
|
|
"Urban water conservation project with storage tanks"
|
|
]
|
|
}
|
|
|
|
# Status comments by category
|
|
status_comments = {
|
|
1: [ # Recycling Bins
|
|
"Recently emptied and cleaned",
|
|
"Some bins are nearly full",
|
|
"All bins in good condition",
|
|
"Paper bin is currently full",
|
|
"New signage installed to improve sorting"
|
|
],
|
|
2: [ # e-Scooters
|
|
"All scooters fully charged",
|
|
"Three scooters currently available",
|
|
"Maintenance scheduled for next week",
|
|
"New scooters added to this location",
|
|
"High usage area, scooters frequently unavailable"
|
|
],
|
|
3: [ # Bike Share Stations
|
|
"All docking stations operational",
|
|
"Five bikes currently available",
|
|
"Some bikes need maintenance",
|
|
"New electric bikes added",
|
|
"Popular station with high turnover"
|
|
],
|
|
4: [ # Public EV Charging Stations
|
|
"All charging points operational",
|
|
"Fast charger currently under repair",
|
|
"Peak usage during business hours",
|
|
"New charging point added last month",
|
|
"Payment system recently upgraded"
|
|
],
|
|
5: [ # Battery Recycling Points
|
|
"Collection bin recently emptied",
|
|
"Secure container in good condition",
|
|
"New signage explaining battery types",
|
|
"High usage from local businesses",
|
|
"Additional capacity added"
|
|
],
|
|
6: [ # Community Compost Bins
|
|
"Compost ready for collection",
|
|
"Needs more brown material",
|
|
"Recently turned and aerated",
|
|
"New bins added to increase capacity",
|
|
"Volunteer day scheduled for maintenance"
|
|
],
|
|
7: [ # Solar-Powered Benches
|
|
"All charging ports working",
|
|
"Solar panels recently cleaned",
|
|
"WiFi currently unavailable",
|
|
"LED lights need replacement",
|
|
"High usage during lunch hours"
|
|
],
|
|
8: [ # Green Roofs
|
|
"Plants thriving after recent rain",
|
|
"Maintenance scheduled next month",
|
|
"New species added to increase biodiversity",
|
|
"Irrigation system working well",
|
|
"Open for public tours on weekends"
|
|
],
|
|
9: [ # Public Water Refill Stations
|
|
"Water quality tested weekly",
|
|
"Fountain cleaned daily",
|
|
"Bottle filler counter shows high usage",
|
|
"New filter installed recently",
|
|
"Popular during summer months"
|
|
],
|
|
10: [ # Waste Oil Collection Points
|
|
"Container recently emptied",
|
|
"Secure lid in good condition",
|
|
"New funnel system installed",
|
|
"Collection schedule posted",
|
|
"Area kept clean and tidy"
|
|
],
|
|
11: [ # Book Swap Stations
|
|
"Good selection currently available",
|
|
"Children's books needed",
|
|
"Recently reorganized by volunteers",
|
|
"Weatherproof cover working well",
|
|
"High turnover of popular titles"
|
|
],
|
|
12: [ # Pollinator Gardens
|
|
"Plants in full bloom",
|
|
"Many bees and butterflies observed",
|
|
"New native species planted",
|
|
"Volunteer day for maintenance scheduled",
|
|
"Educational tours available"
|
|
],
|
|
13: [ # E-Waste Collection Bins
|
|
"Bin recently emptied",
|
|
"Secure deposit system working",
|
|
"Collection schedule posted",
|
|
"New items accepted now include small appliances",
|
|
"Data destruction guaranteed"
|
|
],
|
|
14: [ # Clothing Donation Bins
|
|
"Bin recently emptied",
|
|
"Clean and well-maintained",
|
|
"High quality donations appreciated",
|
|
"Winter clothing especially needed",
|
|
"Please bag items before donating"
|
|
],
|
|
15: [ # Community Tool Libraries
|
|
"New inventory system implemented",
|
|
"Popular tools often unavailable on weekends",
|
|
"Tool maintenance workshop scheduled",
|
|
"New donations recently added to collection",
|
|
"Extended hours during summer"
|
|
],
|
|
16: [ # Urban Farms
|
|
"Seasonal produce currently available",
|
|
"Volunteer opportunities posted",
|
|
"Educational workshops on weekends",
|
|
"New growing area being developed",
|
|
"Composting system recently expanded"
|
|
],
|
|
17: [ # Rainwater Harvesting Systems
|
|
"System working efficiently after recent rainfall",
|
|
"Water quality monitoring in place",
|
|
"Educational tours available by appointment",
|
|
"System capacity recently expanded",
|
|
"Used for irrigation of nearby community garden"
|
|
]
|
|
}
|
|
|
|
# Generate a realistic UK postcode based on area code
|
|
def generate_postcode(area_code):
|
|
# Format: Area + District + Space + Sector + Unit
|
|
# e.g., M1 1AA or SW1A 1AA
|
|
district = random.randint(1, 99)
|
|
sector = random.randint(1, 9)
|
|
unit = ''.join(random.choices('ABCDEFGHJKLMNPQRSTUVWXYZ', k=2)) # Excluding I and O as they're not used
|
|
|
|
if len(area_code) == 1:
|
|
return f"{area_code}{district} {sector}{unit}"
|
|
else:
|
|
return f"{area_code}{district} {sector}{unit}"
|
|
|
|
# Generate a realistic street name
|
|
def generate_street_name():
|
|
prefix = random.choice(street_prefixes)
|
|
suffix = random.choice(street_suffixes)
|
|
return f"{prefix} {suffix}"
|
|
|
|
# Generate a realistic house number
|
|
def generate_house_number():
|
|
# 80% chance of a simple number, 20% chance of a letter suffix or unit
|
|
if random.random() < 0.8:
|
|
return str(random.randint(1, 200))
|
|
else:
|
|
options = [
|
|
f"{random.randint(1, 200)}{random.choice('ABCDEFG')}", # e.g., 42A
|
|
f"Unit {random.randint(1, 20)}",
|
|
f"Flat {random.randint(1, 50)}",
|
|
f"Suite {random.randint(1, 10)}"
|
|
]
|
|
return random.choice(options)
|
|
|
|
# Add small random variation to coordinates to avoid facilities at exact same location
|
|
def vary_coordinates(lat, lng):
|
|
# Add variation of up to ~500 meters
|
|
lat_variation = random.uniform(-0.004, 0.004)
|
|
lng_variation = random.uniform(-0.006, 0.006)
|
|
return lat + lat_variation, lng + lng_variation
|
|
|
|
# Generate facility title based on category and location
|
|
def generate_title(category_name, location_name, street_name):
|
|
templates = [
|
|
f"{location_name} {category_name}",
|
|
f"{category_name} at {street_name}",
|
|
f"{street_name} {category_name}",
|
|
f"Community {category_name} {location_name}",
|
|
f"{location_name} Central {category_name}",
|
|
f"{location_name} {street_name} {category_name}"
|
|
]
|
|
return random.choice(templates)
|
|
|
|
# Create a log file to track progress
|
|
log_file = open("facility_generation_log.txt", "w")
|
|
log_file.write(f"Starting facility generation at {datetime.now()}\n")
|
|
log_file.write(f"Target: 1000 new facilities\n\n")
|
|
|
|
# Create a CSV file to store all generated facilities for reference
|
|
csv_file = open("generated_facilities.csv", "w", newline='')
|
|
csv_writer = csv.writer(csv_file)
|
|
csv_writer.writerow(["ID", "Title", "Category", "Description", "Address", "Postcode", "Latitude", "Longitude", "Contributor"])
|
|
|
|
# Prepare for batch insertion to improve performance
|
|
facilities_to_insert = []
|
|
status_comments_to_insert = []
|
|
|
|
# Track unique titles to avoid duplicates
|
|
existing_titles = set()
|
|
cursor.execute("SELECT title FROM ecoFacilities")
|
|
for row in cursor.fetchall():
|
|
existing_titles.add(row[0])
|
|
|
|
# Generate 1000 facilities
|
|
num_facilities = 1000
|
|
facilities_created = 0
|
|
|
|
log_file.write("Generating facilities...\n")
|
|
|
|
while facilities_created < num_facilities:
|
|
# Select a random location
|
|
location = random.choice(uk_locations)
|
|
location_name, county, base_lat, base_lng, postcode_area = location
|
|
|
|
# Generate 5-25 facilities per location to create clusters
|
|
facilities_per_location = min(random.randint(5, 25), num_facilities - facilities_created)
|
|
|
|
for _ in range(facilities_per_location):
|
|
# Select a random category
|
|
category_id = random.choice(list(categories.keys()))
|
|
category_name = categories[category_id]
|
|
|
|
# Generate address components
|
|
street_name = generate_street_name()
|
|
house_number = generate_house_number()
|
|
lat, lng = vary_coordinates(base_lat, base_lng)
|
|
postcode = generate_postcode(postcode_area)
|
|
|
|
# Generate title
|
|
title_base = generate_title(category_name, location_name, street_name)
|
|
title = title_base
|
|
|
|
# Ensure title is unique by adding a suffix if needed
|
|
suffix = 2
|
|
while title in existing_titles:
|
|
title = f"{title_base} {suffix}"
|
|
suffix += 1
|
|
|
|
existing_titles.add(title)
|
|
|
|
# Select description
|
|
description = random.choice(category_descriptions[category_id])
|
|
|
|
# Select contributor
|
|
contributor_id = random.choice(user_ids)
|
|
|
|
# Add to batch for insertion
|
|
facilities_to_insert.append((
|
|
title,
|
|
category_id,
|
|
description,
|
|
house_number,
|
|
street_name,
|
|
county,
|
|
location_name,
|
|
postcode,
|
|
lng,
|
|
lat,
|
|
contributor_id
|
|
))
|
|
|
|
# Log progress periodically
|
|
facilities_created += 1
|
|
if facilities_created % 100 == 0:
|
|
log_message = f"Generated {facilities_created} facilities so far..."
|
|
print(log_message)
|
|
log_file.write(log_message + "\n")
|
|
|
|
if facilities_created >= num_facilities:
|
|
break
|
|
|
|
# Insert facilities in batches for better performance
|
|
log_file.write("\nInserting facilities into database...\n")
|
|
print("Inserting facilities into database...")
|
|
|
|
batch_size = 50
|
|
for i in range(0, len(facilities_to_insert), batch_size):
|
|
batch = facilities_to_insert[i:i+batch_size]
|
|
cursor.executemany("""
|
|
INSERT INTO ecoFacilities
|
|
(title, category, description, houseNumber, streetName, county, town, postcode, lng, lat, contributor)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", batch)
|
|
|
|
# Get the IDs of the inserted facilities
|
|
cursor.execute("SELECT last_insert_rowid()")
|
|
last_id = cursor.fetchone()[0]
|
|
first_id_in_batch = last_id - len(batch) + 1
|
|
|
|
# Generate status comments for each facility
|
|
for j, facility in enumerate(batch):
|
|
facility_id = first_id_in_batch + j
|
|
category_id = facility[1] # Category ID is the second element
|
|
|
|
# Write to CSV for reference
|
|
csv_writer.writerow([
|
|
facility_id,
|
|
facility[0], # title
|
|
categories[category_id], # category name
|
|
facility[2], # description
|
|
f"{facility[3]} {facility[4]}, {facility[6]}, {facility[5]}", # address
|
|
facility[7], # postcode
|
|
facility[9], # lat
|
|
facility[8], # lng
|
|
facility[10] # contributor
|
|
])
|
|
|
|
# Decide how many status comments to add (0-3)
|
|
num_comments = random.choices([0, 1, 2, 3], weights=[30, 40, 20, 10])[0]
|
|
|
|
if num_comments > 0:
|
|
# Get relevant comments for this category
|
|
relevant_comments = status_comments.get(category_id, status_comments[1]) # Default to recycling bin comments
|
|
|
|
# Select random comments without repetition
|
|
selected_comments = random.sample(relevant_comments, min(num_comments, len(relevant_comments)))
|
|
|
|
# Add to batch for insertion
|
|
for comment in selected_comments:
|
|
status_comments_to_insert.append((facility_id, comment))
|
|
|
|
# Commit after each batch
|
|
conn.commit()
|
|
|
|
log_message = f"Inserted batch {i//batch_size + 1}/{(len(facilities_to_insert)-1)//batch_size + 1}"
|
|
print(log_message)
|
|
log_file.write(log_message + "\n")
|
|
|
|
# Insert status comments in batches
|
|
if status_comments_to_insert:
|
|
log_file.write("\nInserting status comments...\n")
|
|
print("Inserting status comments...")
|
|
|
|
for i in range(0, len(status_comments_to_insert), batch_size):
|
|
batch = status_comments_to_insert[i:i+batch_size]
|
|
cursor.executemany("""
|
|
INSERT INTO ecoFacilityStatus (facilityId, statusComment)
|
|
VALUES (?, ?)
|
|
""", batch)
|
|
conn.commit()
|
|
|
|
log_message = f"Inserted comment batch {i//batch_size + 1}/{(len(status_comments_to_insert)-1)//batch_size + 1}"
|
|
print(log_message)
|
|
log_file.write(log_message + "\n")
|
|
|
|
# Get final counts
|
|
cursor.execute("SELECT COUNT(*) FROM ecoFacilities")
|
|
total_facilities = cursor.fetchone()[0]
|
|
|
|
cursor.execute("SELECT COUNT(*) FROM ecoFacilityStatus")
|
|
total_comments = cursor.fetchone()[0]
|
|
|
|
# Log completion
|
|
completion_message = f"\nGeneration complete at {datetime.now()}"
|
|
print(completion_message)
|
|
log_file.write(completion_message + "\n")
|
|
|
|
summary = f"Total facilities in database: {total_facilities}\n"
|
|
summary += f"Total status comments in database: {total_comments}\n"
|
|
summary += f"Generated facilities saved to generated_facilities.csv for reference"
|
|
|
|
print(summary)
|
|
log_file.write(summary)
|
|
|
|
# Close connections
|
|
log_file.close()
|
|
csv_file.close()
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
print("\nSuccessfully added 1000 new ecological facilities to the database.")
|
|
print("A detailed log and CSV export have been created for reference.") |