"""Fetch game data from local PokeAPI submodule and write static JSON seed files. Reads from the PokeAPI/api-data git submodule at data/pokeapi/ — no network access or container needed. Only uses Python stdlib. Usage: python -m app.seeds.fetch_pokeapi Requires the submodule to be initialized: git submodule update --init """ import json import re import sys from pathlib import Path REPO_ROOT = Path(__file__).parents[4] # backend/src/app/seeds -> repo root POKEAPI_DIR = REPO_ROOT / "data" / "pokeapi" / "data" / "api" / "v2" DATA_DIR = Path(__file__).parent / "data" def load_resource(endpoint: str, resource_id: int) -> dict: """Load a PokeAPI resource from the local submodule data.""" path = POKEAPI_DIR / endpoint / str(resource_id) / "index.json" with open(path) as f: return json.load(f) def extract_id(url: str) -> int: """Extract the numeric ID from a PokeAPI URL (absolute or relative).""" return int(url.rstrip("/").split("/")[-1]) # Game definitions VERSION_GROUPS = { "firered-leafgreen": { "versions": ["firered", "leafgreen"], "generation": 3, "region": "kanto", "region_id": 1, "games": { "firered": { "name": "Pokemon FireRed", "slug": "firered", "release_year": 2004, }, "leafgreen": { "name": "Pokemon LeafGreen", "slug": "leafgreen", "release_year": 2004, }, }, }, "emerald": { "versions": ["emerald"], "generation": 3, "region": "hoenn", "region_id": 3, "games": { "emerald": { "name": "Pokemon Emerald", "slug": "emerald", "release_year": 2005, }, }, }, "heartgold-soulsilver": { "versions": ["heartgold", "soulsilver"], "generation": 4, "region": "johto", "region_id": 2, "games": { "heartgold": { "name": "Pokemon HeartGold", "slug": "heartgold", "release_year": 2010, }, "soulsilver": { "name": "Pokemon SoulSilver", "slug": "soulsilver", "release_year": 2010, }, }, }, } # Encounter methods to include (excludes gift, legendary-only, etc.) INCLUDED_METHODS = { "walk", "surf", "old-rod", "good-rod", "super-rod", "rock-smash", "headbutt", } # Collect all pokemon dex numbers across games all_pokemon_dex: set[int] = set() def clean_location_name(name: str) -> str: """Convert PokeAPI location slug to a clean display name. e.g. 'kanto-route-1' -> 'Route 1' 'pallet-town' -> 'Pallet Town' """ for prefix in [ "kanto-", "johto-", "hoenn-", "sinnoh-", "unova-", "kalos-", "alola-", "galar-", ]: if name.startswith(prefix): name = name[len(prefix):] break name = name.replace("-", " ").title() name = re.sub(r"Route (\d+)", r"Route \1", name) return name def clean_area_name(area_name: str, location_name: str) -> str | None: """Extract meaningful area suffix, or None if it's the default area.""" if area_name.startswith(location_name): suffix = area_name[len(location_name):].strip("-").strip() if not suffix or suffix == "area": return None return suffix.replace("-", " ").title() return area_name.replace("-", " ").title() def get_encounters_for_area(area_id: int, version_name: str) -> list[dict]: """Get encounter data for a location area, filtered by version.""" area = load_resource("location-area", area_id) encounters = [] for pe in area["pokemon_encounters"]: pokemon_url = pe["pokemon"]["url"] dex_num = extract_id(pokemon_url) pokemon_name = pe["pokemon"]["name"] for vd in pe["version_details"]: if vd["version"]["name"] != version_name: continue for enc in vd["encounter_details"]: method = enc["method"]["name"] if method not in INCLUDED_METHODS: continue encounters.append({ "pokemon_name": pokemon_name, "national_dex": dex_num, "method": method, "chance": enc["chance"], "min_level": enc["min_level"], "max_level": enc["max_level"], }) return encounters def aggregate_encounters(raw_encounters: list[dict]) -> list[dict]: """Aggregate encounter rates by pokemon + method (sum chances across level ranges).""" agg: dict[tuple[int, str], dict] = {} for enc in raw_encounters: key = (enc["national_dex"], enc["method"]) if key not in agg: agg[key] = { "national_dex": enc["national_dex"], "pokemon_name": enc["pokemon_name"], "method": enc["method"], "encounter_rate": 0, "min_level": enc["min_level"], "max_level": enc["max_level"], } agg[key]["encounter_rate"] += enc["chance"] agg[key]["min_level"] = min(agg[key]["min_level"], enc["min_level"]) agg[key]["max_level"] = max(agg[key]["max_level"], enc["max_level"]) result = list(agg.values()) for r in result: r["encounter_rate"] = min(r["encounter_rate"], 100) return sorted(result, key=lambda x: (-x["encounter_rate"], x["pokemon_name"])) def process_version(version_name: str, vg_info: dict) -> list[dict]: """Process all locations for a specific game version.""" print(f"\n--- Processing {version_name} ---") region = load_resource("region", vg_info["region_id"]) location_refs = list(region["locations"]) # For HGSS, also include Kanto locations if version_name in ("heartgold", "soulsilver"): kanto = load_resource("region", 1) location_refs = location_refs + list(kanto["locations"]) print(f" Found {len(location_refs)} locations") routes = [] order = 1 for loc_ref in location_refs: loc_name = loc_ref["name"] loc_id = extract_id(loc_ref["url"]) display_name = clean_location_name(loc_name) location = load_resource("location", loc_id) areas = location["areas"] if not areas: continue all_encounters: list[dict] = [] area_specific: dict[str, list[dict]] = {} for area_ref in areas: area_id = extract_id(area_ref["url"]) area_slug = area_ref["name"] area_suffix = clean_area_name(area_slug, loc_name) encounters = get_encounters_for_area(area_id, version_name) if not encounters: continue if area_suffix and len(areas) > 1: area_specific[area_suffix] = encounters else: all_encounters.extend(encounters) # Area-specific encounters become separate routes if area_specific: for area_suffix, area_encs in area_specific.items(): aggregated = aggregate_encounters(area_encs) if aggregated: route_name = f"{display_name} ({area_suffix})" for enc in aggregated: all_pokemon_dex.add(enc["national_dex"]) routes.append({ "name": route_name, "order": order, "encounters": aggregated, }) order += 1 if all_encounters: aggregated = aggregate_encounters(all_encounters) if aggregated: for enc in aggregated: all_pokemon_dex.add(enc["national_dex"]) routes.append({ "name": display_name, "order": order, "encounters": aggregated, }) order += 1 print(f" Routes with encounters: {len(routes)}") total_enc = sum(len(r["encounters"]) for r in routes) print(f" Total encounter entries: {total_enc}") return routes def fetch_pokemon_data(dex_numbers: set[int]) -> list[dict]: """Fetch Pokemon name/type data for all collected dex numbers.""" print(f"\n--- Fetching {len(dex_numbers)} Pokemon ---") pokemon_list = [] dex_sorted = sorted(dex_numbers) for i, dex in enumerate(dex_sorted, 1): poke = load_resource("pokemon", dex) types = [t["type"]["name"] for t in poke["types"]] pokemon_list.append({ "national_dex": dex, "name": poke["name"].title().replace("-", " "), "types": types, "sprite_url": f"https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/{dex}.png", }) if i % 50 == 0 or i == len(dex_sorted): print(f" Fetched {i}/{len(dex_sorted)}") return sorted(pokemon_list, key=lambda x: x["national_dex"]) def flatten_evolution_chain(chain: dict, seeded_dex: set[int]) -> list[dict]: """Recursively flatten a PokeAPI evolution chain into (from, to) pairs.""" pairs = [] from_dex = int(chain["species"]["url"].rstrip("/").split("/")[-1]) for evo in chain.get("evolves_to", []): to_dex = int(evo["species"]["url"].rstrip("/").split("/")[-1]) for detail in evo["evolution_details"]: trigger = detail["trigger"]["name"] min_level = detail.get("min_level") item = detail.get("item") if item: item = item["name"] held_item = detail.get("held_item") if held_item: held_item = held_item["name"] # Collect other conditions as a string conditions = [] if detail.get("min_happiness"): conditions.append(f"happiness >= {detail['min_happiness']}") if detail.get("min_affection"): conditions.append(f"affection >= {detail['min_affection']}") if detail.get("min_beauty"): conditions.append(f"beauty >= {detail['min_beauty']}") if detail.get("time_of_day"): conditions.append(detail["time_of_day"]) if detail.get("known_move"): conditions.append(f"knows {detail['known_move']['name']}") if detail.get("known_move_type"): conditions.append(f"knows {detail['known_move_type']['name']}-type move") if detail.get("location"): conditions.append(f"at {detail['location']['name']}") if detail.get("party_species"): conditions.append(f"with {detail['party_species']['name']} in party") if detail.get("party_type"): conditions.append(f"with {detail['party_type']['name']}-type in party") if detail.get("gender") is not None: conditions.append("female" if detail["gender"] == 1 else "male") if detail.get("needs_overworld_rain"): conditions.append("raining") if detail.get("turn_upside_down"): conditions.append("turn upside down") if detail.get("trade_species"): conditions.append(f"trade for {detail['trade_species']['name']}") if detail.get("relative_physical_stats") is not None: stat_map = {1: "atk > def", -1: "atk < def", 0: "atk = def"} conditions.append(stat_map.get(detail["relative_physical_stats"], "")) condition = ", ".join(conditions) if conditions else None if from_dex in seeded_dex and to_dex in seeded_dex: pairs.append({ "from_national_dex": from_dex, "to_national_dex": to_dex, "trigger": trigger, "min_level": min_level, "item": item, "held_item": held_item, "condition": condition, }) # Recurse into further evolutions pairs.extend(flatten_evolution_chain(evo, seeded_dex)) return pairs def fetch_evolution_data(seeded_dex: set[int]) -> list[dict]: """Fetch evolution chains from local PokeAPI data for all seeded pokemon.""" print(f"\n--- Fetching evolution chains ---") # First, get the evolution chain URL for each pokemon species chain_ids: set[int] = set() dex_sorted = sorted(seeded_dex) for i, dex in enumerate(dex_sorted, 1): species = load_resource("pokemon-species", dex) chain_url = species["evolution_chain"]["url"] chain_id = extract_id(chain_url) chain_ids.add(chain_id) if i % 50 == 0 or i == len(dex_sorted): print(f" Species fetched: {i}/{len(dex_sorted)}") print(f" Found {len(chain_ids)} unique evolution chains") # Fetch each chain and flatten all_pairs: list[dict] = [] seen: set[tuple[int, int, str]] = set() for chain_id in sorted(chain_ids): chain = load_resource("evolution-chain", chain_id) pairs = flatten_evolution_chain(chain["chain"], seeded_dex) for p in pairs: key = (p["from_national_dex"], p["to_national_dex"], p["trigger"]) if key not in seen: seen.add(key) all_pairs.append(p) print(f" Total evolution pairs: {len(all_pairs)}") return sorted(all_pairs, key=lambda x: (x["from_national_dex"], x["to_national_dex"])) def apply_evolution_overrides(evolutions: list[dict]) -> None: """Apply overrides from evolution_overrides.json if it exists.""" overrides_path = DATA_DIR / "evolution_overrides.json" if not overrides_path.exists(): return with open(overrides_path) as f: overrides = json.load(f) # Remove entries for removal in overrides.get("remove", []): evolutions[:] = [ e for e in evolutions if not (e["from_national_dex"] == removal["from_dex"] and e["to_national_dex"] == removal["to_dex"]) ] # Add entries for addition in overrides.get("add", []): evolutions.append({ "from_national_dex": addition["from_dex"], "to_national_dex": addition["to_dex"], "trigger": addition.get("trigger", "level-up"), "min_level": addition.get("min_level"), "item": addition.get("item"), "held_item": addition.get("held_item"), "condition": addition.get("condition"), }) # Modify entries for mod in overrides.get("modify", []): for e in evolutions: if (e["from_national_dex"] == mod["from_dex"] and e["to_national_dex"] == mod["to_dex"]): for key, value in mod.get("set", {}).items(): e[key] = value # Re-sort evolutions.sort(key=lambda x: (x["from_national_dex"], x["to_national_dex"])) print(f" Applied overrides: {len(evolutions)} pairs after overrides") def write_json(filename: str, data): path = DATA_DIR / filename with open(path, "w") as f: json.dump(data, f, indent=2) print(f" -> {path}") def main(): if not POKEAPI_DIR.is_dir(): print( f"Error: PokeAPI data not found at {POKEAPI_DIR}\n" "Initialize the submodule with: git submodule update --init", file=sys.stderr, ) sys.exit(1) DATA_DIR.mkdir(parents=True, exist_ok=True) # Build games.json games = [] for vg_info in VERSION_GROUPS.values(): for game_info in vg_info["games"].values(): games.append({ "name": game_info["name"], "slug": game_info["slug"], "generation": vg_info["generation"], "region": vg_info["region"], "release_year": game_info["release_year"], }) write_json("games.json", games) print(f"Wrote {len(games)} games to games.json") # Process each version for vg_info in VERSION_GROUPS.values(): for ver_name in vg_info["versions"]: routes = process_version(ver_name, vg_info) write_json(f"{ver_name}.json", routes) # Fetch all Pokemon data pokemon = fetch_pokemon_data(all_pokemon_dex) write_json("pokemon.json", pokemon) print(f"\nWrote {len(pokemon)} Pokemon to pokemon.json") # Fetch evolution chains evolutions = fetch_evolution_data(all_pokemon_dex) apply_evolution_overrides(evolutions) write_json("evolutions.json", evolutions) print(f"\nWrote {len(evolutions)} evolution pairs to evolutions.json") print("\nDone! JSON files written to seeds/data/") print("Review route ordering and curate as needed.") if __name__ == "__main__": main()