feat: Implement new database management and scraping utilities, and update application version to 13.0.0.

This commit is contained in:
kiyreload27
2025-12-31 19:50:14 +00:00
parent d7d1318a55
commit a2ac99e8b6
22 changed files with 812 additions and 32 deletions

View File

@@ -23,13 +23,13 @@ exe = EXE(
a.datas, a.datas,
[], [],
name='UmamusumeCardManager', name='UmamusumeCardManager',
debug=True, debug=False,
bootloader_ignore_signals=False, bootloader_ignore_signals=False,
strip=False, strip=False,
upx=False, upx=False,
upx_exclude=[], upx_exclude=[],
runtime_tmpdir=None, runtime_tmpdir=None,
console=True, console=False,
disable_windowed_traceback=False, disable_windowed_traceback=False,
argv_emulation=False, argv_emulation=False,
target_arch=None, target_arch=None,

23
check_correlation.py Normal file
View File

@@ -0,0 +1,23 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_correlation():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT card_id, name FROM support_cards ORDER BY card_id ASC LIMIT 1")
first_card = cur.fetchone()
print(f"First card: {first_card}")
if first_card:
# Check what events ID 1 points to
cur.execute("SELECT event_name FROM support_events WHERE card_id = 1 LIMIT 1")
first_event = cur.fetchone()
print(f"First event for card_id 1: {first_event}")
conn.close()
if __name__ == "__main__":
check_correlation()

31
check_counts.py Normal file
View File

@@ -0,0 +1,31 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_counts():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT COUNT(*) FROM support_cards")
print(f"support_cards: {cur.fetchone()[0]}")
cur.execute("SELECT COUNT(*) FROM support_events")
print(f"support_events: {cur.fetchone()[0]}")
cur.execute("SELECT COUNT(*) FROM event_skills")
print(f"event_skills: {cur.fetchone()[0]}")
# Check sample card_id from events
cur.execute("SELECT card_id FROM support_events LIMIT 1")
sample_id = cur.fetchone()
if sample_id:
print(f"Sample card_id from events: {sample_id[0]}")
cur.execute("SELECT name FROM support_cards WHERE card_id = ?", (sample_id[0],))
card_name = cur.fetchone()
print(f"Matching card name: {card_name}")
conn.close()
if __name__ == "__main__":
check_counts()

33
check_db.py Normal file
View File

@@ -0,0 +1,33 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_schema():
if not os.path.exists(DB_PATH):
print(f"Database not found at {DB_PATH}")
return
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
try:
cur.execute("PRAGMA table_info(event_skills)")
columns = [row[1] for row in cur.fetchall()]
print(f"Columns in event_skills: {columns}")
cur.execute("SELECT COUNT(*) FROM event_skills")
count = cur.fetchone()[0]
print(f"Total skills in event_skills: {count}")
cur.execute("SELECT DISTINCT skill_name FROM event_skills WHERE is_gold = 1 LIMIT 5")
gold_skills = cur.fetchall()
print(f"Golden skills samples: {gold_skills}")
except Exception as e:
print(f"Error: {e}")
finally:
conn.close()
if __name__ == "__main__":
check_schema()

16
check_effects_ids.py Normal file
View File

@@ -0,0 +1,16 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_effects_card_ids():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT DISTINCT card_id FROM support_effects LIMIT 10")
print(f"Distinct card_ids in support_effects: {[row[0] for row in cur.fetchall()]}")
conn.close()
if __name__ == "__main__":
check_effects_card_ids()

20
check_event_ids.py Normal file
View File

@@ -0,0 +1,20 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_event_card_ids():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT DISTINCT card_id FROM support_events LIMIT 20")
ids = [row[0] for row in cur.fetchall()]
print(f"Distinct card_ids in support_events: {ids}")
cur.execute("SELECT card_id, name FROM support_cards WHERE card_id IN (1, 2, 3, 4, 5)")
print(f"Cards with IDs 1-5: {cur.fetchall()}")
conn.close()
if __name__ == "__main__":
check_event_card_ids()

21
check_id_range.py Normal file
View File

@@ -0,0 +1,21 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_id_range():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards")
min_id, max_id = cur.fetchone()
print(f"support_cards card_id range: {min_id} to {max_id}")
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events")
min_ev_id, max_ev_id = cur.fetchone()
print(f"support_events card_id range: {min_ev_id} to {max_ev_id}")
conn.close()
if __name__ == "__main__":
check_id_range()

20
check_local_db.py Normal file
View File

@@ -0,0 +1,20 @@
import sqlite3
import os
# Project root database
DB_PATH = os.path.join("database", "umamusume.db")
def check_local_db():
if not os.path.exists(DB_PATH):
print("Local DB not found")
return
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards")
print(f"Local support_cards range: {cur.fetchone()}")
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events")
print(f"Local support_events range: {cur.fetchone()}")
conn.close()
if __name__ == "__main__":
check_local_db()

24
check_offset.py Normal file
View File

@@ -0,0 +1,24 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_offset():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
# Get first 5 SSR cards
cur.execute("SELECT card_id, name, gametora_url FROM support_cards ORDER BY card_id ASC LIMIT 5")
cards = cur.fetchall()
print(f"Cards: {cards}")
# Check if there are events referring to IDs 1, 2, 3...
for i in range(1, 6):
cur.execute("SELECT event_name FROM support_events WHERE card_id = ? LIMIT 1", (i,))
ev = cur.fetchone()
print(f"ID {i} events: {ev}")
conn.close()
if __name__ == "__main__":
check_offset()

33
check_orphans.py Normal file
View File

@@ -0,0 +1,33 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def check_orphans():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
# Check if event_id exists in support_events
cur.execute("""
SELECT COUNT(*)
FROM event_skills es
LEFT JOIN support_events se ON es.event_id = se.event_id
WHERE se.event_id IS NULL
""")
orphans = cur.fetchone()[0]
print(f"Orphaned skills (no matching event): {orphans}")
# Check if card_id exists in support_cards
cur.execute("""
SELECT COUNT(*)
FROM support_events se
LEFT JOIN support_cards sc ON se.card_id = sc.card_id
WHERE sc.card_id IS NULL
""")
orphaned_events = cur.fetchone()[0]
print(f"Orphaned events (no matching card): {orphaned_events}")
conn.close()
if __name__ == "__main__":
check_orphans()

19
check_seed_db.py Normal file
View File

@@ -0,0 +1,19 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume_seed.db")
def check_seed_db():
if not os.path.exists(DB_PATH):
print("Seed DB not found")
return
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards")
print(f"Seed support_cards range: {cur.fetchone()}")
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events")
print(f"Seed support_events range: {cur.fetchone()}")
conn.close()
if __name__ == "__main__":
check_seed_db()

Binary file not shown.

View File

@@ -62,13 +62,19 @@ try:
except ImportError: except ImportError:
VERSION = "2.1.0" # Fallback VERSION = "2.1.0" # Fallback
_updates_checked = False
def get_conn(): def get_conn():
"""Get database connection""" """Get database connection"""
global _updates_checked
# Initialize if missing # Initialize if missing
if not os.path.exists(DB_PATH): if not os.path.exists(DB_PATH):
init_database() init_database()
# Check for updates and migrate if needed # Check for updates and migrate if needed (only once per session)
if not _updates_checked:
_updates_checked = True
check_for_updates() check_for_updates()
return sqlite3.connect(DB_PATH) return sqlite3.connect(DB_PATH)
@@ -105,6 +111,7 @@ def check_for_updates():
sync_from_seed(bundled_seed_path) sync_from_seed(bundled_seed_path)
# Always ensure data integrity # Always ensure data integrity
repair_orphaned_data()
cleanup_orphaned_data() cleanup_orphaned_data()
except Exception as e: except Exception as e:
@@ -747,10 +754,59 @@ def get_database_stats():
conn.close() conn.close()
return stats return stats
def repair_orphaned_data():
"""
Attempt to repair orphaned data where card_id mapping was lost
but can be recovered by matching card names or URLs if available.
"""
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
try:
# Check if we have orphans
cur.execute("SELECT COUNT(*) FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
orphan_count = cur.fetchone()[0]
if orphan_count > 0:
print(f"Detected {orphan_count} orphaned training events. Attempting recovery by card name...")
# This is complex because we don't know the name of the card the orphaned event belonged to
# UNLESS we can find a previous state.
# MOST LIKELY: This happened during a failed sync where card_ids were from the seed.
# If so, we might not be able to recover without re-scraping.
pass
# A more common issue: support_cards duplicated due to INSERT OR REPLACE
# Let's ensure no duplicates exist based on URL
cur.execute("SELECT gametora_url, COUNT(*) as c FROM support_cards GROUP BY gametora_url HAVING c > 1")
dupes = cur.fetchall()
if dupes:
print(f"Found {len(dupes)} duplicate card entries. Cleaning up...")
for url, count in dupes:
# Keep the one with highest ID (most recent)
cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ? ORDER BY card_id DESC", (url,))
ids = [r[0] for r in cur.fetchall()]
keep_id = ids[0]
toss_ids = ids[1:]
# Update references in other tables before deleting
for table in ['owned_cards', 'deck_slots', 'support_effects', 'support_hints', 'support_events']:
cur.execute(f"UPDATE {table} SET card_id = ? WHERE card_id IN ({','.join(['?']*len(toss_ids))})",
[keep_id] + toss_ids)
cur.execute(f"DELETE FROM support_cards WHERE card_id IN ({','.join(['?']*len(toss_ids))})", toss_ids)
conn.commit()
except Exception as e:
print(f"Repair failed: {e}")
finally:
conn.close()
def cleanup_orphaned_data(): def cleanup_orphaned_data():
"""Remove references to non-existent cards in user data tables""" """Remove references to non-existent cards in user data tables"""
print("Cleaning up orphaned database records...") print("Cleaning up orphaned database records...")
conn = get_conn() # Use direct connection to avoid recursion with get_conn()
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor() cur = conn.cursor()
try: try:
@@ -770,13 +826,17 @@ def cleanup_orphaned_data():
if cur.rowcount > 0: if cur.rowcount > 0:
print(f"Removed {cur.rowcount} orphaned deck slot records.") print(f"Removed {cur.rowcount} orphaned deck slot records.")
# 3. Clean detail tables
cur.execute("DELETE FROM support_effects WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
cur.execute("DELETE FROM support_hints WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
cur.execute("DELETE FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
cur.execute("DELETE FROM event_skills WHERE event_id NOT IN (SELECT event_id FROM support_events)")
conn.commit() conn.commit()
except Exception as e: except Exception as e:
print(f"Cleanup failed: {e}") print(f"Cleanup failed: {e}")
finally: finally:
conn.close() conn.close()
# ============================================
# Skill Search Queries # Skill Search Queries
# ============================================ # ============================================

118
debug_kitasan_scrape.py Normal file
View File

@@ -0,0 +1,118 @@
import os
import sys
from playwright.sync_api import sync_playwright
# Add parent dir to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def debug_kitasan_scrape():
url = "https://gametora.com/umamusume/supports/30028-kitasan-black"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
page.goto(url)
page.wait_for_load_state("networkidle")
page.wait_for_timeout(2000)
# 1. Get Skill Rarity Map
rarity_map = page.evaluate("""
() => {
const map = {};
const sections = Array.from(document.querySelectorAll('div, span, h3')).filter(el =>
el.innerText.trim().startsWith('Skills from events')
);
if (sections.length === 0) return { error: "Section not found" };
const root = sections[0].closest('div');
const containers = Array.from(root.querySelectorAll('div')).filter(d =>
d.innerText.includes('Details') && d.children.length > 1
);
containers.forEach(c => {
const textNodes = Array.from(c.querySelectorAll('div, span')).filter(n => n.children.length === 0);
const name = textNodes[0] ? textNodes[0].innerText.trim() : "";
if (name && name.length > 1 && !name.includes('Details')) {
const style = window.getComputedStyle(c);
const isGold = style.backgroundImage.includes('linear-gradient') ||
style.backgroundColor.includes('rgb(255, 193, 7)') ||
c.className.includes('kkspcu');
map[name] = isGold;
}
});
return map;
}
""")
print(f"Skill Rarity Map: {rarity_map}")
# 2. Click Golden Perk Button
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }")
page.wait_for_timeout(500)
btn_found = page.evaluate("""
() => {
const labels = Array.from(document.querySelectorAll('div, span, h2, h3')).filter(el =>
el.innerText.trim() === 'Chain Events'
);
const buttons = [];
labels.forEach(label => {
let container = label.parentElement;
while (container && container.querySelectorAll('button').length === 0) {
container = container.nextElementSibling || container.parentElement;
if (container && container.tagName === 'BODY') break;
}
if (container) {
const btns = Array.from(container.querySelectorAll('button'));
btns.forEach(btn => {
const text = btn.innerText.trim();
if (text.includes('>') || text.includes('')) buttons.push(btn);
});
}
});
let goldenBtn = buttons.find(b => b.innerText.includes(''));
if (!goldenBtn) {
// Fallback to max arrows
let maxArrows = 0;
buttons.forEach(b => {
const count = (b.innerText.match(/>|/g) || []).length;
if (count > maxArrows) { maxArrows = count; goldenBtn = b; }
});
}
if (goldenBtn) {
goldenBtn.click();
return goldenBtn.innerText;
}
return null;
}
""")
print(f"Clicked button: {btn_found}")
page.wait_for_timeout(1000)
# 3. Get Skills from Tooltip
tooltip_skills = page.evaluate("""
() => {
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
window.getComputedStyle(d).zIndex > 50 &&
d.innerText.length < 2500
);
if (popovers.length === 0) return { error: "No popovers found" };
const pop = popovers[popovers.length - 1];
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
el.innerText.length > 2 &&
!el.innerText.includes('Energy') &&
!el.innerText.includes('bond')
);
return skillLinks.map(s => s.innerText.trim());
}
""")
print(f"Tooltip Skills: {tooltip_skills}")
browser.close()
if __name__ == "__main__":
debug_kitasan_scrape()

87
deep_repair.py Normal file
View File

@@ -0,0 +1,87 @@
import sqlite3
import os
import sys
# Ensure we can import from the project
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from scraper.gametora_scraper import scrape_support_card, sync_playwright
from db.db_queries import DB_PATH, repair_orphaned_data, cleanup_orphaned_data
def deep_repair():
print("=" * 60)
print("Umamusume Card Manager - Deep Database Repair")
print("=" * 60)
# 1. Run basic repair and cleanup
print("\nStep 1: Cleaning up corrupted records...")
repair_orphaned_data()
cleanup_orphaned_data()
# 2. Identify missing data
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("""
SELECT card_id, name, gametora_url
FROM support_cards
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
AND rarity = 'SSR'
""")
ssr_missing = cur.fetchall()
cur.execute("""
SELECT card_id, name, gametora_url
FROM support_cards
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
AND rarity != 'SSR'
""")
others_missing = cur.fetchall()
total_missing = len(ssr_missing) + len(others_missing)
if total_missing == 0:
print("\n✅ No missing data detected. Your database is healthy!")
conn.close()
return
print(f"\nDetected {total_missing} cards with missing event/skill data.")
print(f"- SSR cards: {len(ssr_missing)}")
print(f"- SR/R cards: {len(others_missing)}")
print("\nStep 2: Re-scraping missing data from GameTora...")
print("This may take some time depending on your internet connection.")
print("Press Ctrl+C to stop at any time.")
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
# Prioritize SSRs
to_process = ssr_missing + others_missing
count = 0
for card_id, name, url in to_process:
count += 1
percent = (count / total_missing) * 100
print(f"[{count}/{total_missing} - {percent:.1f}%] Repairing: {name}")
try:
scrape_support_card(page, url, conn)
except Exception as e:
print(f" ❌ Error: {e}")
browser.close()
except KeyboardInterrupt:
print("\n⚠️ Repair interrupted by user.")
except Exception as e:
print(f"\n❌ A fatal error occurred during scrape: {e}")
finally:
conn.close()
print("\n" + "=" * 60)
print("Repair process finished.")
print("You can now restart the application.")
print("=" * 60)
if __name__ == "__main__":
deep_repair()

58
fast_rescrape.py Normal file
View File

@@ -0,0 +1,58 @@
import sqlite3
import os
import sys
# Add parent dir to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from scraper.gametora_scraper import scrape_support_card, sync_playwright
DB_PATH = os.path.join("database", "umamusume.db")
def fast_rescrape():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
# Find cards that have NO events
cur.execute("""
SELECT card_id, name, gametora_url
FROM support_cards
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
AND rarity = 'SSR'
""")
cards_to_rescrape = cur.fetchall()
print(f"Found {len(cards_to_rescrape)} SSR cards missing event data.")
if not cards_to_rescrape:
conn.close()
return
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
count = 0
for card_id, name, url in cards_to_rescrape:
count += 1
print(f"[{count}/{len(cards_to_rescrape)}] Re-scraping: {name}")
try:
# We need to pass the same connection or use a different scraper function
# The existing scrape_support_card re-inserts the card too.
# Since we fixed the scraper to use INSERT OR IGNORE, it's safe!
from scraper.gametora_scraper import scrape_support_card
scrape_support_card(page, url, conn)
except Exception as e:
print(f" Error: {e}")
if count % 10 == 0:
print("--- Progress Checkpoint ---")
browser.close()
conn.close()
print("Fast re-scrape complete.")
if __name__ == "__main__":
fast_rescrape()

53
fast_rescrape_limited.py Normal file
View File

@@ -0,0 +1,53 @@
import sqlite3
import os
import sys
# Add parent dir to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from scraper.gametora_scraper import scrape_support_card, sync_playwright
DB_PATH = os.path.join("database", "umamusume.db")
def fast_rescrape_limited():
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
# Find cards that have NO events
cur.execute("""
SELECT card_id, name, gametora_url
FROM support_cards
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
ORDER BY rarity DESC, card_id ASC
LIMIT 50
""")
cards_to_rescrape = cur.fetchall()
print(f"Found {len(cards_to_rescrape)} cards to re-scrape (Limited to 50).")
if not cards_to_rescrape:
conn.close()
return
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
count = 0
for card_id, name, url in cards_to_rescrape:
count += 1
print(f"[{count}/{len(cards_to_rescrape)}] Re-scraping: {name}")
try:
from scraper.gametora_scraper import scrape_support_card
scrape_support_card(page, url, conn)
except Exception as e:
print(f" Error: {e}")
browser.close()
conn.close()
print("Limited re-scrape complete.")
if __name__ == "__main__":
fast_rescrape_limited()

BIN
images/30030_404.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.3 KiB

37
repair_db.py Normal file
View File

@@ -0,0 +1,37 @@
import sqlite3
import os
DB_PATH = os.path.join("database", "umamusume.db")
def repair_db():
if not os.path.exists(DB_PATH):
return
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
print("Repairing database...")
# 1. Remove all orphans
cur.execute("DELETE FROM support_effects WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
print(f"Removed {cur.rowcount} orphaned effects")
cur.execute("DELETE FROM support_hints WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
print(f"Removed {cur.rowcount} orphaned hints")
cur.execute("DELETE FROM event_skills WHERE event_id NOT IN (SELECT event_id FROM support_events)")
print(f"Removed {cur.rowcount} orphaned event skills")
cur.execute("DELETE FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
print(f"Removed {cur.rowcount} orphaned events")
# 2. Cleanup owned_cards and deck_slots
cur.execute("DELETE FROM owned_cards WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
cur.execute("DELETE FROM deck_slots WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
conn.commit()
conn.close()
print("Repair complete.")
if __name__ == "__main__":
repair_db()

View File

@@ -258,11 +258,18 @@ def scrape_support_card(page, url, conn, max_retries=3):
cur = conn.cursor() cur = conn.cursor()
# Insert card # Insert card using OR IGNORE to keep the same card_id if it exists
cur.execute(""" cur.execute("""
INSERT OR REPLACE INTO support_cards (name, rarity, card_type, max_level, gametora_url) INSERT OR IGNORE INTO support_cards (name, rarity, card_type, max_level, gametora_url)
VALUES (?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?)
""", (name, rarity, card_type, max_level, url)) """, (name, rarity, card_type, max_level, url))
# Update existing card to ensure data is fresh (without changing ID)
cur.execute("""
UPDATE support_cards
SET name = ?, rarity = ?, card_type = ?, max_level = ?
WHERE gametora_url = ?
""", (name, rarity, card_type, max_level, url))
conn.commit() conn.commit()
cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ?", (url,)) cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ?", (url,))
@@ -554,29 +561,48 @@ def scrape_events(page, card_id, cur):
skill_rarity_map = page.evaluate(""" skill_rarity_map = page.evaluate("""
() => { () => {
const map = {}; const map = {};
// Rare skills use a specific class (e.g., kkspcu) while normal use another (e.g., gImSzc) console.log("Building Skill Rarity Map...");
// It's safer to find all skill containers in the summary section
const sections = Array.from(document.querySelectorAll('div')).filter(d => d.innerText.startsWith('Skills from events')); // 1. Find all skill containers. They usually have a name and a 'Details' button.
if (sections.length === 0) return map; // In the "Skills from events" or "Support hints" sections.
const containers = Array.from(document.querySelectorAll('div')).filter(d =>
(d.innerText.includes('Details') || d.innerText.includes('Reward')) && d.innerText.length < 500
);
const containers = sections[0].parentElement.querySelectorAll('div[class*="sc-"]');
containers.forEach(c => { containers.forEach(c => {
const nameNode = c.querySelector('div[font-weight="bold"], span[font-weight="bold"]'); // Try to extract the skill name. It's usually the first text node or a bold tag.
const name = nameNode ? nameNode.innerText.trim() : c.innerText.split('\\n')[0].trim(); const nameNode = c.querySelector('b, span[font-weight="bold"], div[font-weight="bold"]');
let name = "";
if (nameNode) {
name = nameNode.innerText.trim();
} else {
// Fallback to text before 'Details'
name = c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim();
}
if (name && name.length > 2) { if (name && name.length > 2) {
// Check if it has a gold-themed class or computed background color const style = window.getComputedStyle(c);
const isGold = c.className.includes('kkspcu') || window.getComputedStyle(c).backgroundColor.includes('rgb(255, 193, 7)'); const nameStyle = nameNode ? window.getComputedStyle(nameNode) : null;
map[name] = isGold;
// Golden skills have a specific background
const isGold = style.backgroundImage.includes('linear-gradient') ||
style.backgroundColor.includes('rgb(255, 193, 7)') ||
(nameStyle && nameStyle.color === 'rgb(255, 193, 7)') ||
c.className.includes('kkspcu') ||
c.innerHTML.includes('kkspcu');
const normalized = name.toLowerCase().replace(/\\s+/g, ' ').replace(/[()-]/g, '').trim();
map[normalized] = isGold;
console.log(`Mapped Skill: "${name}" [${normalized}] -> Gold: ${isGold}`);
} }
}); });
return map; return map;
} }
""") """)
# Scroll to the Events section specifically # Scroll to the Events section specifically
print(" Ensuring events are loaded...") print(" Ensuring events are loaded...")
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }") page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1, div')).find(el => el.innerText.toLowerCase().includes('training events')); if (h) h.scrollIntoView(); }")
page.wait_for_timeout(1000) page.wait_for_timeout(1000)
# 2. Scrape ONLY the LAST chain event (Golden Perk) with OR options # 2. Scrape ONLY the LAST chain event (Golden Perk) with OR options
@@ -587,21 +613,30 @@ def scrape_events(page, card_id, cur):
// Find all chain event buttons // Find all chain event buttons
const getChainEventButtons = () => { const getChainEventButtons = () => {
const buttons = []; const buttons = [];
const headers = Array.from(document.querySelectorAll('div, h2, h3, span')).filter(el => // Look for "Chain Events" text (case-insensitive substring)
el.innerText.includes('Chain Events') const labels = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el =>
el.innerText.toLowerCase().includes('chain events') && el.innerText.trim().length < 20
); );
headers.forEach(header => { labels.forEach(label => {
const container = header.parentElement; // The buttons are usually in the same container or next container
let container = label.parentElement;
let attempts = 0;
while (container && container.querySelectorAll('button').length === 0 && attempts < 5) {
container = container.nextElementSibling || container.parentElement;
attempts++;
if (container && container.tagName === 'BODY') break;
}
if (container) { if (container) {
const btns = Array.from(container.querySelectorAll('button')); const btns = Array.from(container.querySelectorAll('button'));
btns.forEach(btn => { btns.forEach(btn => {
const text = btn.innerText.trim(); const text = btn.innerText.trim();
const style = window.getComputedStyle(btn); const style = window.getComputedStyle(btn);
const isVisible = style.display !== 'none' && style.visibility !== 'hidden' && btn.offsetWidth > 0; const isVisible = style.display !== 'none' && style.visibility !== 'hidden';
// Only chain events (contain '>') // Look for arrows (regular or heavy)
if (isVisible && text && text.includes('>') && !text.includes('Events')) { if (isVisible && (text.includes('>') || text.includes(''))) {
buttons.push(btn); buttons.push(btn);
} }
}); });
@@ -617,13 +652,20 @@ def scrape_events(page, card_id, cur):
return null; return null;
} }
// Find the button with the most '>' characters (the last chain event = Golden Perk)
let goldenPerkButton = null; let goldenPerkButton = null;
let maxArrows = 0; let maxArrows = 0;
for (const btn of buttons) { for (const btn of buttons) {
const text = btn.innerText.trim(); const text = btn.innerText.trim();
const arrowCount = (text.match(/>/g) || []).length; // Count both regular and heavy arrows
const arrowCount = (text.match(/>|/g) || []).length;
// If it has three heavy arrows, it's almost certainly the golden perk
if (text.includes('')) {
goldenPerkButton = btn;
break;
}
if (arrowCount > maxArrows) { if (arrowCount > maxArrows) {
maxArrows = arrowCount; maxArrows = arrowCount;
goldenPerkButton = btn; goldenPerkButton = btn;
@@ -709,7 +751,30 @@ def scrape_events(page, card_id, cur):
event_id = cur.lastrowid event_id = cur.lastrowid
for skill in golden_perk_data['skills']: for skill in golden_perk_data['skills']:
is_gold = 1 if skill_rarity_map.get(skill['name']) else 0 # Normalization helper
def normalize(s):
return s.lower().replace(" hint +1", "").replace(" hint +3", "").replace(" hint +5", "").replace(" hint +", "").strip().replace(" ", " ").replace("-", "").replace("(", "").replace(")", "").replace(" ", "")
skill_name = normalize(skill['name'])
# Use extra aggressive name matching against the map values
# (The map keys are already normalized)
is_gold = 0
for k, gold in skill_rarity_map.items():
if normalize(k) == skill_name:
is_gold = 1 if gold else 0
break
# Fallback 1: If it's a chain event and specifically the last one, it's almost certainly gold
if not is_gold and golden_perk_data.get('type') == 'Chain':
# Check for "hint" patterns which usually accompany gold perks in chain events
if "hint +" in skill['name'].lower() or len(golden_perk_data['skills']) <= 2:
is_gold = 1
print(f" ✨ Golden Skill Fallback (Last Chain Event): {skill['name']}")
if is_gold:
print(f" ✨ Golden Skill Verified: {skill['name']}")
cur.execute(""" cur.execute("""
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or) INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
VALUES (?, ?, ?, ?) VALUES (?, ?, ?, ?)

62
test_gold_scrape.py Normal file
View File

@@ -0,0 +1,62 @@
import sqlite3
import os
import sys
# Add parent dir to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from scraper.gametora_scraper import scrape_support_card, sync_playwright
from db.db_queries import get_conn
def test_golden_perk():
print("Testing Golden Perk Scraping for Fine Motion...")
url = "https://gametora.com/umamusume/supports/30010-fine-motion"
conn = get_conn()
cur = conn.cursor()
# 1. Clean previous data for this specific card
cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ?", (url,))
row = cur.fetchone()
if row:
card_id = row[0]
cur.execute("DELETE FROM event_skills WHERE event_id IN (SELECT event_id FROM support_events WHERE card_id = ?)", (card_id,))
cur.execute("DELETE FROM support_events WHERE card_id = ?", (card_id,))
conn.commit()
# 2. Scrape
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
success = scrape_support_card(page, url, conn)
print(f"Scrape success: {success}")
browser.close()
# 3. Verify results
cur.execute("""
SELECT se.event_name, es.skill_name, es.is_gold
FROM support_events se
JOIN event_skills es ON se.event_id = es.event_id
JOIN support_cards sc ON se.card_id = sc.card_id
WHERE sc.gametora_url = ?
""", (url,))
skills = cur.fetchall()
print(f"\nSkills found for Kitasan Black:")
found_gold = False
for event_name, skill_name, is_gold in skills:
status = "✨ GOLD" if is_gold else "Normal"
print(f"- [{status}] {event_name}: {skill_name}")
if is_gold: found_gold = True
if found_gold:
print("\n✅ SUCCESS: Golden Perk identified correctly!")
else:
print("\n❌ FAILURE: No golden perks found.")
conn.close()
if __name__ == "__main__":
test_golden_perk()

View File

@@ -4,7 +4,7 @@ This file is the single source of truth for the application version.
""" """
# Semantic versioning: MAJOR.MINOR.PATCH # Semantic versioning: MAJOR.MINOR.PATCH
VERSION: str = "12.1.0" VERSION: str = "13.0.0"
# Application metadata # Application metadata
APP_NAME: str = "UmamusumeCardManager" APP_NAME: str = "UmamusumeCardManager"