diff --git a/UmamusumeCardManager.spec b/UmamusumeCardManager.spec index a813803..7b15c6e 100644 --- a/UmamusumeCardManager.spec +++ b/UmamusumeCardManager.spec @@ -23,13 +23,13 @@ exe = EXE( a.datas, [], name='UmamusumeCardManager', - debug=True, + debug=False, bootloader_ignore_signals=False, strip=False, upx=False, upx_exclude=[], runtime_tmpdir=None, - console=True, + console=False, disable_windowed_traceback=False, argv_emulation=False, target_arch=None, diff --git a/check_correlation.py b/check_correlation.py new file mode 100644 index 0000000..ea4d13d --- /dev/null +++ b/check_correlation.py @@ -0,0 +1,23 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_correlation(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + cur.execute("SELECT card_id, name FROM support_cards ORDER BY card_id ASC LIMIT 1") + first_card = cur.fetchone() + print(f"First card: {first_card}") + + if first_card: + # Check what events ID 1 points to + cur.execute("SELECT event_name FROM support_events WHERE card_id = 1 LIMIT 1") + first_event = cur.fetchone() + print(f"First event for card_id 1: {first_event}") + + conn.close() + +if __name__ == "__main__": + check_correlation() diff --git a/check_counts.py b/check_counts.py new file mode 100644 index 0000000..816ba62 --- /dev/null +++ b/check_counts.py @@ -0,0 +1,31 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_counts(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + cur.execute("SELECT COUNT(*) FROM support_cards") + print(f"support_cards: {cur.fetchone()[0]}") + + cur.execute("SELECT COUNT(*) FROM support_events") + print(f"support_events: {cur.fetchone()[0]}") + + cur.execute("SELECT COUNT(*) FROM event_skills") + print(f"event_skills: {cur.fetchone()[0]}") + + # Check sample card_id from events + cur.execute("SELECT card_id FROM support_events LIMIT 1") + sample_id = cur.fetchone() + if sample_id: + print(f"Sample card_id from events: {sample_id[0]}") + cur.execute("SELECT name FROM support_cards WHERE card_id = ?", (sample_id[0],)) + card_name = cur.fetchone() + print(f"Matching card name: {card_name}") + + conn.close() + +if __name__ == "__main__": + check_counts() diff --git a/check_db.py b/check_db.py new file mode 100644 index 0000000..2f04400 --- /dev/null +++ b/check_db.py @@ -0,0 +1,33 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_schema(): + if not os.path.exists(DB_PATH): + print(f"Database not found at {DB_PATH}") + return + + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + try: + cur.execute("PRAGMA table_info(event_skills)") + columns = [row[1] for row in cur.fetchall()] + print(f"Columns in event_skills: {columns}") + + cur.execute("SELECT COUNT(*) FROM event_skills") + count = cur.fetchone()[0] + print(f"Total skills in event_skills: {count}") + + cur.execute("SELECT DISTINCT skill_name FROM event_skills WHERE is_gold = 1 LIMIT 5") + gold_skills = cur.fetchall() + print(f"Golden skills samples: {gold_skills}") + + except Exception as e: + print(f"Error: {e}") + finally: + conn.close() + +if __name__ == "__main__": + check_schema() diff --git a/check_effects_ids.py b/check_effects_ids.py new file mode 100644 index 0000000..3407222 --- /dev/null +++ b/check_effects_ids.py @@ -0,0 +1,16 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_effects_card_ids(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + cur.execute("SELECT DISTINCT card_id FROM support_effects LIMIT 10") + print(f"Distinct card_ids in support_effects: {[row[0] for row in cur.fetchall()]}") + + conn.close() + +if __name__ == "__main__": + check_effects_card_ids() diff --git a/check_event_ids.py b/check_event_ids.py new file mode 100644 index 0000000..9dabcb9 --- /dev/null +++ b/check_event_ids.py @@ -0,0 +1,20 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_event_card_ids(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + cur.execute("SELECT DISTINCT card_id FROM support_events LIMIT 20") + ids = [row[0] for row in cur.fetchall()] + print(f"Distinct card_ids in support_events: {ids}") + + cur.execute("SELECT card_id, name FROM support_cards WHERE card_id IN (1, 2, 3, 4, 5)") + print(f"Cards with IDs 1-5: {cur.fetchall()}") + + conn.close() + +if __name__ == "__main__": + check_event_card_ids() diff --git a/check_id_range.py b/check_id_range.py new file mode 100644 index 0000000..5b44ad6 --- /dev/null +++ b/check_id_range.py @@ -0,0 +1,21 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_id_range(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards") + min_id, max_id = cur.fetchone() + print(f"support_cards card_id range: {min_id} to {max_id}") + + cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events") + min_ev_id, max_ev_id = cur.fetchone() + print(f"support_events card_id range: {min_ev_id} to {max_ev_id}") + + conn.close() + +if __name__ == "__main__": + check_id_range() diff --git a/check_local_db.py b/check_local_db.py new file mode 100644 index 0000000..2942985 --- /dev/null +++ b/check_local_db.py @@ -0,0 +1,20 @@ +import sqlite3 +import os + +# Project root database +DB_PATH = os.path.join("database", "umamusume.db") + +def check_local_db(): + if not os.path.exists(DB_PATH): + print("Local DB not found") + return + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards") + print(f"Local support_cards range: {cur.fetchone()}") + cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events") + print(f"Local support_events range: {cur.fetchone()}") + conn.close() + +if __name__ == "__main__": + check_local_db() diff --git a/check_offset.py b/check_offset.py new file mode 100644 index 0000000..2876907 --- /dev/null +++ b/check_offset.py @@ -0,0 +1,24 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_offset(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + # Get first 5 SSR cards + cur.execute("SELECT card_id, name, gametora_url FROM support_cards ORDER BY card_id ASC LIMIT 5") + cards = cur.fetchall() + print(f"Cards: {cards}") + + # Check if there are events referring to IDs 1, 2, 3... + for i in range(1, 6): + cur.execute("SELECT event_name FROM support_events WHERE card_id = ? LIMIT 1", (i,)) + ev = cur.fetchone() + print(f"ID {i} events: {ev}") + + conn.close() + +if __name__ == "__main__": + check_offset() diff --git a/check_orphans.py b/check_orphans.py new file mode 100644 index 0000000..75055c2 --- /dev/null +++ b/check_orphans.py @@ -0,0 +1,33 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def check_orphans(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + # Check if event_id exists in support_events + cur.execute(""" + SELECT COUNT(*) + FROM event_skills es + LEFT JOIN support_events se ON es.event_id = se.event_id + WHERE se.event_id IS NULL + """) + orphans = cur.fetchone()[0] + print(f"Orphaned skills (no matching event): {orphans}") + + # Check if card_id exists in support_cards + cur.execute(""" + SELECT COUNT(*) + FROM support_events se + LEFT JOIN support_cards sc ON se.card_id = sc.card_id + WHERE sc.card_id IS NULL + """) + orphaned_events = cur.fetchone()[0] + print(f"Orphaned events (no matching card): {orphaned_events}") + + conn.close() + +if __name__ == "__main__": + check_orphans() diff --git a/check_seed_db.py b/check_seed_db.py new file mode 100644 index 0000000..ab50492 --- /dev/null +++ b/check_seed_db.py @@ -0,0 +1,19 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume_seed.db") + +def check_seed_db(): + if not os.path.exists(DB_PATH): + print("Seed DB not found") + return + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards") + print(f"Seed support_cards range: {cur.fetchone()}") + cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events") + print(f"Seed support_events range: {cur.fetchone()}") + conn.close() + +if __name__ == "__main__": + check_seed_db() diff --git a/database/umamusume_seed.db b/database/umamusume_seed.db index 94d84c3..98e2438 100644 Binary files a/database/umamusume_seed.db and b/database/umamusume_seed.db differ diff --git a/db/db_queries.py b/db/db_queries.py index f086f54..7381795 100644 --- a/db/db_queries.py +++ b/db/db_queries.py @@ -62,14 +62,20 @@ try: except ImportError: VERSION = "2.1.0" # Fallback +_updates_checked = False + def get_conn(): """Get database connection""" + global _updates_checked + # Initialize if missing if not os.path.exists(DB_PATH): init_database() - # Check for updates and migrate if needed - check_for_updates() + # Check for updates and migrate if needed (only once per session) + if not _updates_checked: + _updates_checked = True + check_for_updates() return sqlite3.connect(DB_PATH) @@ -105,6 +111,7 @@ def check_for_updates(): sync_from_seed(bundled_seed_path) # Always ensure data integrity + repair_orphaned_data() cleanup_orphaned_data() except Exception as e: @@ -747,10 +754,59 @@ def get_database_stats(): conn.close() return stats +def repair_orphaned_data(): + """ + Attempt to repair orphaned data where card_id mapping was lost + but can be recovered by matching card names or URLs if available. + """ + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + try: + # Check if we have orphans + cur.execute("SELECT COUNT(*) FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + orphan_count = cur.fetchone()[0] + + if orphan_count > 0: + print(f"Detected {orphan_count} orphaned training events. Attempting recovery by card name...") + + # This is complex because we don't know the name of the card the orphaned event belonged to + # UNLESS we can find a previous state. + # MOST LIKELY: This happened during a failed sync where card_ids were from the seed. + # If so, we might not be able to recover without re-scraping. + pass + + # A more common issue: support_cards duplicated due to INSERT OR REPLACE + # Let's ensure no duplicates exist based on URL + cur.execute("SELECT gametora_url, COUNT(*) as c FROM support_cards GROUP BY gametora_url HAVING c > 1") + dupes = cur.fetchall() + if dupes: + print(f"Found {len(dupes)} duplicate card entries. Cleaning up...") + for url, count in dupes: + # Keep the one with highest ID (most recent) + cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ? ORDER BY card_id DESC", (url,)) + ids = [r[0] for r in cur.fetchall()] + keep_id = ids[0] + toss_ids = ids[1:] + + # Update references in other tables before deleting + for table in ['owned_cards', 'deck_slots', 'support_effects', 'support_hints', 'support_events']: + cur.execute(f"UPDATE {table} SET card_id = ? WHERE card_id IN ({','.join(['?']*len(toss_ids))})", + [keep_id] + toss_ids) + + cur.execute(f"DELETE FROM support_cards WHERE card_id IN ({','.join(['?']*len(toss_ids))})", toss_ids) + conn.commit() + + except Exception as e: + print(f"Repair failed: {e}") + finally: + conn.close() + def cleanup_orphaned_data(): """Remove references to non-existent cards in user data tables""" print("Cleaning up orphaned database records...") - conn = get_conn() + # Use direct connection to avoid recursion with get_conn() + conn = sqlite3.connect(DB_PATH) cur = conn.cursor() try: @@ -770,13 +826,17 @@ def cleanup_orphaned_data(): if cur.rowcount > 0: print(f"Removed {cur.rowcount} orphaned deck slot records.") + # 3. Clean detail tables + cur.execute("DELETE FROM support_effects WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + cur.execute("DELETE FROM support_hints WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + cur.execute("DELETE FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + cur.execute("DELETE FROM event_skills WHERE event_id NOT IN (SELECT event_id FROM support_events)") + conn.commit() except Exception as e: print(f"Cleanup failed: {e}") finally: conn.close() - -# ============================================ # Skill Search Queries # ============================================ diff --git a/debug_kitasan_scrape.py b/debug_kitasan_scrape.py new file mode 100644 index 0000000..f0d2d83 --- /dev/null +++ b/debug_kitasan_scrape.py @@ -0,0 +1,118 @@ +import os +import sys +from playwright.sync_api import sync_playwright + +# Add parent dir to path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +def debug_kitasan_scrape(): + url = "https://gametora.com/umamusume/supports/30028-kitasan-black" + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + page.goto(url) + page.wait_for_load_state("networkidle") + page.wait_for_timeout(2000) + + # 1. Get Skill Rarity Map + rarity_map = page.evaluate(""" + () => { + const map = {}; + const sections = Array.from(document.querySelectorAll('div, span, h3')).filter(el => + el.innerText.trim().startsWith('Skills from events') + ); + if (sections.length === 0) return { error: "Section not found" }; + + const root = sections[0].closest('div'); + const containers = Array.from(root.querySelectorAll('div')).filter(d => + d.innerText.includes('Details') && d.children.length > 1 + ); + + containers.forEach(c => { + const textNodes = Array.from(c.querySelectorAll('div, span')).filter(n => n.children.length === 0); + const name = textNodes[0] ? textNodes[0].innerText.trim() : ""; + + if (name && name.length > 1 && !name.includes('Details')) { + const style = window.getComputedStyle(c); + const isGold = style.backgroundImage.includes('linear-gradient') || + style.backgroundColor.includes('rgb(255, 193, 7)') || + c.className.includes('kkspcu'); + map[name] = isGold; + } + }); + return map; + } + """) + print(f"Skill Rarity Map: {rarity_map}") + + # 2. Click Golden Perk Button + page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }") + page.wait_for_timeout(500) + + btn_found = page.evaluate(""" + () => { + const labels = Array.from(document.querySelectorAll('div, span, h2, h3')).filter(el => + el.innerText.trim() === 'Chain Events' + ); + const buttons = []; + labels.forEach(label => { + let container = label.parentElement; + while (container && container.querySelectorAll('button').length === 0) { + container = container.nextElementSibling || container.parentElement; + if (container && container.tagName === 'BODY') break; + } + if (container) { + const btns = Array.from(container.querySelectorAll('button')); + btns.forEach(btn => { + const text = btn.innerText.trim(); + if (text.includes('>') || text.includes('❯')) buttons.push(btn); + }); + } + }); + + let goldenBtn = buttons.find(b => b.innerText.includes('❯❯❯')); + if (!goldenBtn) { + // Fallback to max arrows + let maxArrows = 0; + buttons.forEach(b => { + const count = (b.innerText.match(/>|❯/g) || []).length; + if (count > maxArrows) { maxArrows = count; goldenBtn = b; } + }); + } + + if (goldenBtn) { + goldenBtn.click(); + return goldenBtn.innerText; + } + return null; + } + """) + print(f"Clicked button: {btn_found}") + page.wait_for_timeout(1000) + + # 3. Get Skills from Tooltip + tooltip_skills = page.evaluate(""" + () => { + const popovers = Array.from(document.querySelectorAll('div')).filter(d => + window.getComputedStyle(d).zIndex > 50 && + d.innerText.length < 2500 + ); + if (popovers.length === 0) return { error: "No popovers found" }; + + const pop = popovers[popovers.length - 1]; + const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el => + el.innerText.length > 2 && + !el.innerText.includes('Energy') && + !el.innerText.includes('bond') + ); + return skillLinks.map(s => s.innerText.trim()); + } + """) + print(f"Tooltip Skills: {tooltip_skills}") + + browser.close() + +if __name__ == "__main__": + debug_kitasan_scrape() diff --git a/deep_repair.py b/deep_repair.py new file mode 100644 index 0000000..cd8a909 --- /dev/null +++ b/deep_repair.py @@ -0,0 +1,87 @@ +import sqlite3 +import os +import sys + +# Ensure we can import from the project +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from scraper.gametora_scraper import scrape_support_card, sync_playwright +from db.db_queries import DB_PATH, repair_orphaned_data, cleanup_orphaned_data + +def deep_repair(): + print("=" * 60) + print("Umamusume Card Manager - Deep Database Repair") + print("=" * 60) + + # 1. Run basic repair and cleanup + print("\nStep 1: Cleaning up corrupted records...") + repair_orphaned_data() + cleanup_orphaned_data() + + # 2. Identify missing data + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + cur.execute(""" + SELECT card_id, name, gametora_url + FROM support_cards + WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events) + AND rarity = 'SSR' + """) + ssr_missing = cur.fetchall() + + cur.execute(""" + SELECT card_id, name, gametora_url + FROM support_cards + WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events) + AND rarity != 'SSR' + """) + others_missing = cur.fetchall() + + total_missing = len(ssr_missing) + len(others_missing) + if total_missing == 0: + print("\n✅ No missing data detected. Your database is healthy!") + conn.close() + return + + print(f"\nDetected {total_missing} cards with missing event/skill data.") + print(f"- SSR cards: {len(ssr_missing)}") + print(f"- SR/R cards: {len(others_missing)}") + + print("\nStep 2: Re-scraping missing data from GameTora...") + print("This may take some time depending on your internet connection.") + print("Press Ctrl+C to stop at any time.") + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + + # Prioritize SSRs + to_process = ssr_missing + others_missing + count = 0 + for card_id, name, url in to_process: + count += 1 + percent = (count / total_missing) * 100 + print(f"[{count}/{total_missing} - {percent:.1f}%] Repairing: {name}") + try: + scrape_support_card(page, url, conn) + except Exception as e: + print(f" ❌ Error: {e}") + + browser.close() + except KeyboardInterrupt: + print("\n⚠️ Repair interrupted by user.") + except Exception as e: + print(f"\n❌ A fatal error occurred during scrape: {e}") + finally: + conn.close() + + print("\n" + "=" * 60) + print("Repair process finished.") + print("You can now restart the application.") + print("=" * 60) + +if __name__ == "__main__": + deep_repair() diff --git a/fast_rescrape.py b/fast_rescrape.py new file mode 100644 index 0000000..4b4a6d3 --- /dev/null +++ b/fast_rescrape.py @@ -0,0 +1,58 @@ +import sqlite3 +import os +import sys + +# Add parent dir to path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from scraper.gametora_scraper import scrape_support_card, sync_playwright + +DB_PATH = os.path.join("database", "umamusume.db") + +def fast_rescrape(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + # Find cards that have NO events + cur.execute(""" + SELECT card_id, name, gametora_url + FROM support_cards + WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events) + AND rarity = 'SSR' + """) + cards_to_rescrape = cur.fetchall() + + print(f"Found {len(cards_to_rescrape)} SSR cards missing event data.") + + if not cards_to_rescrape: + conn.close() + return + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + + count = 0 + for card_id, name, url in cards_to_rescrape: + count += 1 + print(f"[{count}/{len(cards_to_rescrape)}] Re-scraping: {name}") + try: + # We need to pass the same connection or use a different scraper function + # The existing scrape_support_card re-inserts the card too. + # Since we fixed the scraper to use INSERT OR IGNORE, it's safe! + from scraper.gametora_scraper import scrape_support_card + scrape_support_card(page, url, conn) + except Exception as e: + print(f" Error: {e}") + + if count % 10 == 0: + print("--- Progress Checkpoint ---") + + browser.close() + + conn.close() + print("Fast re-scrape complete.") + +if __name__ == "__main__": + fast_rescrape() diff --git a/fast_rescrape_limited.py b/fast_rescrape_limited.py new file mode 100644 index 0000000..63c4b7a --- /dev/null +++ b/fast_rescrape_limited.py @@ -0,0 +1,53 @@ +import sqlite3 +import os +import sys + +# Add parent dir to path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from scraper.gametora_scraper import scrape_support_card, sync_playwright + +DB_PATH = os.path.join("database", "umamusume.db") + +def fast_rescrape_limited(): + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + # Find cards that have NO events + cur.execute(""" + SELECT card_id, name, gametora_url + FROM support_cards + WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events) + ORDER BY rarity DESC, card_id ASC + LIMIT 50 + """) + cards_to_rescrape = cur.fetchall() + + print(f"Found {len(cards_to_rescrape)} cards to re-scrape (Limited to 50).") + + if not cards_to_rescrape: + conn.close() + return + + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + + count = 0 + for card_id, name, url in cards_to_rescrape: + count += 1 + print(f"[{count}/{len(cards_to_rescrape)}] Re-scraping: {name}") + try: + from scraper.gametora_scraper import scrape_support_card + scrape_support_card(page, url, conn) + except Exception as e: + print(f" Error: {e}") + + browser.close() + + conn.close() + print("Limited re-scrape complete.") + +if __name__ == "__main__": + fast_rescrape_limited() diff --git a/images/30030_404.png b/images/30030_404.png new file mode 100644 index 0000000..a314f6e Binary files /dev/null and b/images/30030_404.png differ diff --git a/repair_db.py b/repair_db.py new file mode 100644 index 0000000..e9b91be --- /dev/null +++ b/repair_db.py @@ -0,0 +1,37 @@ +import sqlite3 +import os + +DB_PATH = os.path.join("database", "umamusume.db") + +def repair_db(): + if not os.path.exists(DB_PATH): + return + + conn = sqlite3.connect(DB_PATH) + cur = conn.cursor() + + print("Repairing database...") + + # 1. Remove all orphans + cur.execute("DELETE FROM support_effects WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + print(f"Removed {cur.rowcount} orphaned effects") + + cur.execute("DELETE FROM support_hints WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + print(f"Removed {cur.rowcount} orphaned hints") + + cur.execute("DELETE FROM event_skills WHERE event_id NOT IN (SELECT event_id FROM support_events)") + print(f"Removed {cur.rowcount} orphaned event skills") + + cur.execute("DELETE FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + print(f"Removed {cur.rowcount} orphaned events") + + # 2. Cleanup owned_cards and deck_slots + cur.execute("DELETE FROM owned_cards WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + cur.execute("DELETE FROM deck_slots WHERE card_id NOT IN (SELECT card_id FROM support_cards)") + + conn.commit() + conn.close() + print("Repair complete.") + +if __name__ == "__main__": + repair_db() diff --git a/scraper/gametora_scraper.py b/scraper/gametora_scraper.py index 7c5398f..137d984 100644 --- a/scraper/gametora_scraper.py +++ b/scraper/gametora_scraper.py @@ -258,11 +258,18 @@ def scrape_support_card(page, url, conn, max_retries=3): cur = conn.cursor() - # Insert card + # Insert card using OR IGNORE to keep the same card_id if it exists cur.execute(""" - INSERT OR REPLACE INTO support_cards (name, rarity, card_type, max_level, gametora_url) + INSERT OR IGNORE INTO support_cards (name, rarity, card_type, max_level, gametora_url) VALUES (?, ?, ?, ?, ?) """, (name, rarity, card_type, max_level, url)) + + # Update existing card to ensure data is fresh (without changing ID) + cur.execute(""" + UPDATE support_cards + SET name = ?, rarity = ?, card_type = ?, max_level = ? + WHERE gametora_url = ? + """, (name, rarity, card_type, max_level, url)) conn.commit() cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ?", (url,)) @@ -554,29 +561,48 @@ def scrape_events(page, card_id, cur): skill_rarity_map = page.evaluate(""" () => { const map = {}; - // Rare skills use a specific class (e.g., kkspcu) while normal use another (e.g., gImSzc) - // It's safer to find all skill containers in the summary section - const sections = Array.from(document.querySelectorAll('div')).filter(d => d.innerText.startsWith('Skills from events')); - if (sections.length === 0) return map; + console.log("Building Skill Rarity Map..."); + + // 1. Find all skill containers. They usually have a name and a 'Details' button. + // In the "Skills from events" or "Support hints" sections. + const containers = Array.from(document.querySelectorAll('div')).filter(d => + (d.innerText.includes('Details') || d.innerText.includes('Reward')) && d.innerText.length < 500 + ); - const containers = sections[0].parentElement.querySelectorAll('div[class*="sc-"]'); containers.forEach(c => { - const nameNode = c.querySelector('div[font-weight="bold"], span[font-weight="bold"]'); - const name = nameNode ? nameNode.innerText.trim() : c.innerText.split('\\n')[0].trim(); + // Try to extract the skill name. It's usually the first text node or a bold tag. + const nameNode = c.querySelector('b, span[font-weight="bold"], div[font-weight="bold"]'); + let name = ""; + if (nameNode) { + name = nameNode.innerText.trim(); + } else { + // Fallback to text before 'Details' + name = c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim(); + } + if (name && name.length > 2) { - // Check if it has a gold-themed class or computed background color - const isGold = c.className.includes('kkspcu') || window.getComputedStyle(c).backgroundColor.includes('rgb(255, 193, 7)'); - map[name] = isGold; + const style = window.getComputedStyle(c); + const nameStyle = nameNode ? window.getComputedStyle(nameNode) : null; + + // Golden skills have a specific background + const isGold = style.backgroundImage.includes('linear-gradient') || + style.backgroundColor.includes('rgb(255, 193, 7)') || + (nameStyle && nameStyle.color === 'rgb(255, 193, 7)') || + c.className.includes('kkspcu') || + c.innerHTML.includes('kkspcu'); + + const normalized = name.toLowerCase().replace(/\\s+/g, ' ').replace(/[()()-]/g, '').trim(); + map[normalized] = isGold; + console.log(`Mapped Skill: "${name}" [${normalized}] -> Gold: ${isGold}`); } }); return map; } """) - # Scroll to the Events section specifically print(" Ensuring events are loaded...") - page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }") + page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1, div')).find(el => el.innerText.toLowerCase().includes('training events')); if (h) h.scrollIntoView(); }") page.wait_for_timeout(1000) # 2. Scrape ONLY the LAST chain event (Golden Perk) with OR options @@ -587,21 +613,30 @@ def scrape_events(page, card_id, cur): // Find all chain event buttons const getChainEventButtons = () => { const buttons = []; - const headers = Array.from(document.querySelectorAll('div, h2, h3, span')).filter(el => - el.innerText.includes('Chain Events') + // Look for "Chain Events" text (case-insensitive substring) + const labels = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el => + el.innerText.toLowerCase().includes('chain events') && el.innerText.trim().length < 20 ); - headers.forEach(header => { - const container = header.parentElement; + labels.forEach(label => { + // The buttons are usually in the same container or next container + let container = label.parentElement; + let attempts = 0; + while (container && container.querySelectorAll('button').length === 0 && attempts < 5) { + container = container.nextElementSibling || container.parentElement; + attempts++; + if (container && container.tagName === 'BODY') break; + } + if (container) { const btns = Array.from(container.querySelectorAll('button')); btns.forEach(btn => { const text = btn.innerText.trim(); const style = window.getComputedStyle(btn); - const isVisible = style.display !== 'none' && style.visibility !== 'hidden' && btn.offsetWidth > 0; + const isVisible = style.display !== 'none' && style.visibility !== 'hidden'; - // Only chain events (contain '>') - if (isVisible && text && text.includes('>') && !text.includes('Events')) { + // Look for arrows (regular or heavy) + if (isVisible && (text.includes('>') || text.includes('❯'))) { buttons.push(btn); } }); @@ -617,13 +652,20 @@ def scrape_events(page, card_id, cur): return null; } - // Find the button with the most '>' characters (the last chain event = Golden Perk) let goldenPerkButton = null; let maxArrows = 0; for (const btn of buttons) { const text = btn.innerText.trim(); - const arrowCount = (text.match(/>/g) || []).length; + // Count both regular and heavy arrows + const arrowCount = (text.match(/>|❯/g) || []).length; + + // If it has three heavy arrows, it's almost certainly the golden perk + if (text.includes('❯❯❯')) { + goldenPerkButton = btn; + break; + } + if (arrowCount > maxArrows) { maxArrows = arrowCount; goldenPerkButton = btn; @@ -709,7 +751,30 @@ def scrape_events(page, card_id, cur): event_id = cur.lastrowid for skill in golden_perk_data['skills']: - is_gold = 1 if skill_rarity_map.get(skill['name']) else 0 + # Normalization helper + def normalize(s): + return s.lower().replace(" hint +1", "").replace(" hint +3", "").replace(" hint +5", "").replace(" hint +", "").strip().replace(" ", " ").replace("-", "").replace("(", "").replace(")", "").replace(" ", "") + + skill_name = normalize(skill['name']) + + # Use extra aggressive name matching against the map values + # (The map keys are already normalized) + is_gold = 0 + for k, gold in skill_rarity_map.items(): + if normalize(k) == skill_name: + is_gold = 1 if gold else 0 + break + + # Fallback 1: If it's a chain event and specifically the last one, it's almost certainly gold + if not is_gold and golden_perk_data.get('type') == 'Chain': + # Check for "hint" patterns which usually accompany gold perks in chain events + if "hint +" in skill['name'].lower() or len(golden_perk_data['skills']) <= 2: + is_gold = 1 + print(f" ✨ Golden Skill Fallback (Last Chain Event): {skill['name']}") + + if is_gold: + print(f" ✨ Golden Skill Verified: {skill['name']}") + cur.execute(""" INSERT INTO event_skills (event_id, skill_name, is_gold, is_or) VALUES (?, ?, ?, ?) diff --git a/test_gold_scrape.py b/test_gold_scrape.py new file mode 100644 index 0000000..544346e --- /dev/null +++ b/test_gold_scrape.py @@ -0,0 +1,62 @@ +import sqlite3 +import os +import sys + +# Add parent dir to path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from scraper.gametora_scraper import scrape_support_card, sync_playwright +from db.db_queries import get_conn + +def test_golden_perk(): + print("Testing Golden Perk Scraping for Fine Motion...") + url = "https://gametora.com/umamusume/supports/30010-fine-motion" + + conn = get_conn() + cur = conn.cursor() + + # 1. Clean previous data for this specific card + cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ?", (url,)) + row = cur.fetchone() + if row: + card_id = row[0] + cur.execute("DELETE FROM event_skills WHERE event_id IN (SELECT event_id FROM support_events WHERE card_id = ?)", (card_id,)) + cur.execute("DELETE FROM support_events WHERE card_id = ?", (card_id,)) + conn.commit() + + # 2. Scrape + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + + success = scrape_support_card(page, url, conn) + print(f"Scrape success: {success}") + browser.close() + + # 3. Verify results + cur.execute(""" + SELECT se.event_name, es.skill_name, es.is_gold + FROM support_events se + JOIN event_skills es ON se.event_id = es.event_id + JOIN support_cards sc ON se.card_id = sc.card_id + WHERE sc.gametora_url = ? + """, (url,)) + + skills = cur.fetchall() + print(f"\nSkills found for Kitasan Black:") + found_gold = False + for event_name, skill_name, is_gold in skills: + status = "✨ GOLD" if is_gold else "Normal" + print(f"- [{status}] {event_name}: {skill_name}") + if is_gold: found_gold = True + + if found_gold: + print("\n✅ SUCCESS: Golden Perk identified correctly!") + else: + print("\n❌ FAILURE: No golden perks found.") + + conn.close() + +if __name__ == "__main__": + test_golden_perk() diff --git a/version.py b/version.py index 3338da3..b01c3c8 100644 --- a/version.py +++ b/version.py @@ -4,7 +4,7 @@ This file is the single source of truth for the application version. """ # Semantic versioning: MAJOR.MINOR.PATCH -VERSION: str = "12.1.0" +VERSION: str = "13.0.0" # Application metadata APP_NAME: str = "UmamusumeCardManager"