Add game data seeding from PokeAPI with level ranges
Seed the database with Pokemon game data for 5 games (FireRed, LeafGreen, Emerald, HeartGold, SoulSilver) using pokebase. Includes Alembic migrations for route unique constraints and encounter level ranges, a two-phase seed system (offline fetch to JSON, then idempotent upserts), and Dockerfile updates for the seed runner. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
329
backend/src/app/seeds/fetch_pokeapi.py
Normal file
329
backend/src/app/seeds/fetch_pokeapi.py
Normal file
@@ -0,0 +1,329 @@
|
||||
"""Fetch game data from PokeAPI and write static JSON seed files.
|
||||
|
||||
Uses pokebase which provides built-in file caching — first run fetches
|
||||
from the API, subsequent runs are instant from disk cache.
|
||||
|
||||
Usage:
|
||||
# Against public PokeAPI (cached after first run):
|
||||
podman compose exec -w /app/src api python -m app.seeds.fetch_pokeapi
|
||||
|
||||
# Against local PokeAPI (no rate limits):
|
||||
podman compose exec -w /app/src api python -m app.seeds.fetch_pokeapi --base-url http://pokeapi-app:8000
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pokebase as pb
|
||||
import pokebase.common as pb_common
|
||||
|
||||
DATA_DIR = Path(__file__).parent / "data"
|
||||
|
||||
# Game definitions
|
||||
VERSION_GROUPS = {
|
||||
"firered-leafgreen": {
|
||||
"versions": ["firered", "leafgreen"],
|
||||
"generation": 3,
|
||||
"region": "kanto",
|
||||
"region_id": 1,
|
||||
"games": {
|
||||
"firered": {
|
||||
"name": "Pokemon FireRed",
|
||||
"slug": "firered",
|
||||
"release_year": 2004,
|
||||
},
|
||||
"leafgreen": {
|
||||
"name": "Pokemon LeafGreen",
|
||||
"slug": "leafgreen",
|
||||
"release_year": 2004,
|
||||
},
|
||||
},
|
||||
},
|
||||
"emerald": {
|
||||
"versions": ["emerald"],
|
||||
"generation": 3,
|
||||
"region": "hoenn",
|
||||
"region_id": 3,
|
||||
"games": {
|
||||
"emerald": {
|
||||
"name": "Pokemon Emerald",
|
||||
"slug": "emerald",
|
||||
"release_year": 2005,
|
||||
},
|
||||
},
|
||||
},
|
||||
"heartgold-soulsilver": {
|
||||
"versions": ["heartgold", "soulsilver"],
|
||||
"generation": 4,
|
||||
"region": "johto",
|
||||
"region_id": 2,
|
||||
"games": {
|
||||
"heartgold": {
|
||||
"name": "Pokemon HeartGold",
|
||||
"slug": "heartgold",
|
||||
"release_year": 2010,
|
||||
},
|
||||
"soulsilver": {
|
||||
"name": "Pokemon SoulSilver",
|
||||
"slug": "soulsilver",
|
||||
"release_year": 2010,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Encounter methods to include (excludes gift, legendary-only, etc.)
|
||||
INCLUDED_METHODS = {
|
||||
"walk",
|
||||
"surf",
|
||||
"old-rod",
|
||||
"good-rod",
|
||||
"super-rod",
|
||||
"rock-smash",
|
||||
"headbutt",
|
||||
}
|
||||
|
||||
# Collect all pokemon dex numbers across games
|
||||
all_pokemon_dex: set[int] = set()
|
||||
|
||||
|
||||
def clean_location_name(name: str) -> str:
|
||||
"""Convert PokeAPI location slug to a clean display name.
|
||||
|
||||
e.g. 'kanto-route-1' -> 'Route 1'
|
||||
'pallet-town' -> 'Pallet Town'
|
||||
"""
|
||||
for prefix in [
|
||||
"kanto-", "johto-", "hoenn-", "sinnoh-",
|
||||
"unova-", "kalos-", "alola-", "galar-",
|
||||
]:
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix):]
|
||||
break
|
||||
|
||||
name = name.replace("-", " ").title()
|
||||
name = re.sub(r"Route (\d+)", r"Route \1", name)
|
||||
return name
|
||||
|
||||
|
||||
def clean_area_name(area_name: str, location_name: str) -> str | None:
|
||||
"""Extract meaningful area suffix, or None if it's the default area."""
|
||||
if area_name.startswith(location_name):
|
||||
suffix = area_name[len(location_name):].strip("-").strip()
|
||||
if not suffix or suffix == "area":
|
||||
return None
|
||||
return suffix.replace("-", " ").title()
|
||||
return area_name.replace("-", " ").title()
|
||||
|
||||
|
||||
def get_encounters_for_area(area_id: int, version_name: str) -> list[dict]:
|
||||
"""Get encounter data for a location area, filtered by version."""
|
||||
area = pb.location_area(area_id)
|
||||
encounters = []
|
||||
|
||||
for pe in area.pokemon_encounters:
|
||||
pokemon_url = pe.pokemon.url
|
||||
dex_num = int(pokemon_url.rstrip("/").split("/")[-1])
|
||||
pokemon_name = pe.pokemon.name
|
||||
|
||||
for vd in pe.version_details:
|
||||
if vd.version.name != version_name:
|
||||
continue
|
||||
|
||||
for enc in vd.encounter_details:
|
||||
method = enc.method.name
|
||||
if method not in INCLUDED_METHODS:
|
||||
continue
|
||||
|
||||
encounters.append({
|
||||
"pokemon_name": pokemon_name,
|
||||
"national_dex": dex_num,
|
||||
"method": method,
|
||||
"chance": enc.chance,
|
||||
"min_level": enc.min_level,
|
||||
"max_level": enc.max_level,
|
||||
})
|
||||
|
||||
return encounters
|
||||
|
||||
|
||||
def aggregate_encounters(raw_encounters: list[dict]) -> list[dict]:
|
||||
"""Aggregate encounter rates by pokemon + method (sum chances across level ranges)."""
|
||||
agg: dict[tuple[int, str], dict] = {}
|
||||
|
||||
for enc in raw_encounters:
|
||||
key = (enc["national_dex"], enc["method"])
|
||||
if key not in agg:
|
||||
agg[key] = {
|
||||
"national_dex": enc["national_dex"],
|
||||
"pokemon_name": enc["pokemon_name"],
|
||||
"method": enc["method"],
|
||||
"encounter_rate": 0,
|
||||
"min_level": enc["min_level"],
|
||||
"max_level": enc["max_level"],
|
||||
}
|
||||
agg[key]["encounter_rate"] += enc["chance"]
|
||||
agg[key]["min_level"] = min(agg[key]["min_level"], enc["min_level"])
|
||||
agg[key]["max_level"] = max(agg[key]["max_level"], enc["max_level"])
|
||||
|
||||
result = list(agg.values())
|
||||
for r in result:
|
||||
r["encounter_rate"] = min(r["encounter_rate"], 100)
|
||||
|
||||
return sorted(result, key=lambda x: (-x["encounter_rate"], x["pokemon_name"]))
|
||||
|
||||
|
||||
def process_version(version_name: str, vg_info: dict) -> list[dict]:
|
||||
"""Process all locations for a specific game version."""
|
||||
print(f"\n--- Processing {version_name} ---")
|
||||
|
||||
region = pb.region(vg_info["region_id"])
|
||||
location_refs = list(region.locations)
|
||||
|
||||
# For HGSS, also include Kanto locations
|
||||
if version_name in ("heartgold", "soulsilver"):
|
||||
kanto = pb.region(1)
|
||||
location_refs = location_refs + list(kanto.locations)
|
||||
|
||||
print(f" Found {len(location_refs)} locations")
|
||||
|
||||
routes = []
|
||||
order = 1
|
||||
|
||||
for loc_ref in location_refs:
|
||||
loc_name = loc_ref.name
|
||||
loc_id = int(loc_ref.url.rstrip("/").split("/")[-1])
|
||||
display_name = clean_location_name(loc_name)
|
||||
|
||||
location = pb.location(loc_id)
|
||||
areas = location.areas
|
||||
if not areas:
|
||||
continue
|
||||
|
||||
all_encounters: list[dict] = []
|
||||
area_specific: dict[str, list[dict]] = {}
|
||||
|
||||
for area_ref in areas:
|
||||
area_id = int(area_ref.url.rstrip("/").split("/")[-1])
|
||||
area_slug = area_ref.name
|
||||
area_suffix = clean_area_name(area_slug, loc_name)
|
||||
|
||||
encounters = get_encounters_for_area(area_id, version_name)
|
||||
if not encounters:
|
||||
continue
|
||||
|
||||
if area_suffix and len(areas) > 1:
|
||||
area_specific[area_suffix] = encounters
|
||||
else:
|
||||
all_encounters.extend(encounters)
|
||||
|
||||
# Area-specific encounters become separate routes
|
||||
if area_specific:
|
||||
for area_suffix, area_encs in area_specific.items():
|
||||
aggregated = aggregate_encounters(area_encs)
|
||||
if aggregated:
|
||||
route_name = f"{display_name} ({area_suffix})"
|
||||
for enc in aggregated:
|
||||
all_pokemon_dex.add(enc["national_dex"])
|
||||
routes.append({
|
||||
"name": route_name,
|
||||
"order": order,
|
||||
"encounters": aggregated,
|
||||
})
|
||||
order += 1
|
||||
|
||||
if all_encounters:
|
||||
aggregated = aggregate_encounters(all_encounters)
|
||||
if aggregated:
|
||||
for enc in aggregated:
|
||||
all_pokemon_dex.add(enc["national_dex"])
|
||||
routes.append({
|
||||
"name": display_name,
|
||||
"order": order,
|
||||
"encounters": aggregated,
|
||||
})
|
||||
order += 1
|
||||
|
||||
print(f" Routes with encounters: {len(routes)}")
|
||||
total_enc = sum(len(r["encounters"]) for r in routes)
|
||||
print(f" Total encounter entries: {total_enc}")
|
||||
|
||||
return routes
|
||||
|
||||
|
||||
def fetch_pokemon_data(dex_numbers: set[int]) -> list[dict]:
|
||||
"""Fetch Pokemon name/type data for all collected dex numbers."""
|
||||
print(f"\n--- Fetching {len(dex_numbers)} Pokemon ---")
|
||||
|
||||
pokemon_list = []
|
||||
dex_sorted = sorted(dex_numbers)
|
||||
|
||||
for i, dex in enumerate(dex_sorted, 1):
|
||||
poke = pb.pokemon(dex)
|
||||
types = [t.type.name for t in poke.types]
|
||||
pokemon_list.append({
|
||||
"national_dex": dex,
|
||||
"name": poke.name.title().replace("-", " "),
|
||||
"types": types,
|
||||
"sprite_url": f"https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/{dex}.png",
|
||||
})
|
||||
|
||||
if i % 50 == 0 or i == len(dex_sorted):
|
||||
print(f" Fetched {i}/{len(dex_sorted)}")
|
||||
|
||||
return sorted(pokemon_list, key=lambda x: x["national_dex"])
|
||||
|
||||
|
||||
def write_json(filename: str, data):
|
||||
path = DATA_DIR / filename
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
print(f" -> {path}")
|
||||
|
||||
|
||||
def main():
|
||||
# Check for custom base URL
|
||||
if "--base-url" in sys.argv:
|
||||
idx = sys.argv.index("--base-url")
|
||||
base_url = sys.argv[idx + 1]
|
||||
pb_common.BASE_URL = base_url + "/api/v2"
|
||||
print(f"Using custom PokeAPI: {base_url}")
|
||||
else:
|
||||
print("Using public PokeAPI (pokebase caches to disk after first fetch)")
|
||||
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Build games.json
|
||||
games = []
|
||||
for vg_info in VERSION_GROUPS.values():
|
||||
for game_info in vg_info["games"].values():
|
||||
games.append({
|
||||
"name": game_info["name"],
|
||||
"slug": game_info["slug"],
|
||||
"generation": vg_info["generation"],
|
||||
"region": vg_info["region"],
|
||||
"release_year": game_info["release_year"],
|
||||
})
|
||||
|
||||
write_json("games.json", games)
|
||||
print(f"Wrote {len(games)} games to games.json")
|
||||
|
||||
# Process each version
|
||||
for vg_info in VERSION_GROUPS.values():
|
||||
for ver_name in vg_info["versions"]:
|
||||
routes = process_version(ver_name, vg_info)
|
||||
write_json(f"{ver_name}.json", routes)
|
||||
|
||||
# Fetch all Pokemon data
|
||||
pokemon = fetch_pokemon_data(all_pokemon_dex)
|
||||
write_json("pokemon.json", pokemon)
|
||||
print(f"\nWrote {len(pokemon)} Pokemon to pokemon.json")
|
||||
|
||||
print("\nDone! JSON files written to seeds/data/")
|
||||
print("Review route ordering and curate as needed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user