feat: Implement new database management and scraping utilities, and update application version to 13.0.0.
This commit is contained in:
@@ -23,13 +23,13 @@ exe = EXE(
|
||||
a.datas,
|
||||
[],
|
||||
name='UmamusumeCardManager',
|
||||
debug=True,
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=False,
|
||||
upx_exclude=[],
|
||||
runtime_tmpdir=None,
|
||||
console=True,
|
||||
console=False,
|
||||
disable_windowed_traceback=False,
|
||||
argv_emulation=False,
|
||||
target_arch=None,
|
||||
|
||||
23
check_correlation.py
Normal file
23
check_correlation.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_correlation():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("SELECT card_id, name FROM support_cards ORDER BY card_id ASC LIMIT 1")
|
||||
first_card = cur.fetchone()
|
||||
print(f"First card: {first_card}")
|
||||
|
||||
if first_card:
|
||||
# Check what events ID 1 points to
|
||||
cur.execute("SELECT event_name FROM support_events WHERE card_id = 1 LIMIT 1")
|
||||
first_event = cur.fetchone()
|
||||
print(f"First event for card_id 1: {first_event}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_correlation()
|
||||
31
check_counts.py
Normal file
31
check_counts.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_counts():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM support_cards")
|
||||
print(f"support_cards: {cur.fetchone()[0]}")
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM support_events")
|
||||
print(f"support_events: {cur.fetchone()[0]}")
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM event_skills")
|
||||
print(f"event_skills: {cur.fetchone()[0]}")
|
||||
|
||||
# Check sample card_id from events
|
||||
cur.execute("SELECT card_id FROM support_events LIMIT 1")
|
||||
sample_id = cur.fetchone()
|
||||
if sample_id:
|
||||
print(f"Sample card_id from events: {sample_id[0]}")
|
||||
cur.execute("SELECT name FROM support_cards WHERE card_id = ?", (sample_id[0],))
|
||||
card_name = cur.fetchone()
|
||||
print(f"Matching card name: {card_name}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_counts()
|
||||
33
check_db.py
Normal file
33
check_db.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_schema():
|
||||
if not os.path.exists(DB_PATH):
|
||||
print(f"Database not found at {DB_PATH}")
|
||||
return
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
cur.execute("PRAGMA table_info(event_skills)")
|
||||
columns = [row[1] for row in cur.fetchall()]
|
||||
print(f"Columns in event_skills: {columns}")
|
||||
|
||||
cur.execute("SELECT COUNT(*) FROM event_skills")
|
||||
count = cur.fetchone()[0]
|
||||
print(f"Total skills in event_skills: {count}")
|
||||
|
||||
cur.execute("SELECT DISTINCT skill_name FROM event_skills WHERE is_gold = 1 LIMIT 5")
|
||||
gold_skills = cur.fetchall()
|
||||
print(f"Golden skills samples: {gold_skills}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_schema()
|
||||
16
check_effects_ids.py
Normal file
16
check_effects_ids.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_effects_card_ids():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("SELECT DISTINCT card_id FROM support_effects LIMIT 10")
|
||||
print(f"Distinct card_ids in support_effects: {[row[0] for row in cur.fetchall()]}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_effects_card_ids()
|
||||
20
check_event_ids.py
Normal file
20
check_event_ids.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_event_card_ids():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("SELECT DISTINCT card_id FROM support_events LIMIT 20")
|
||||
ids = [row[0] for row in cur.fetchall()]
|
||||
print(f"Distinct card_ids in support_events: {ids}")
|
||||
|
||||
cur.execute("SELECT card_id, name FROM support_cards WHERE card_id IN (1, 2, 3, 4, 5)")
|
||||
print(f"Cards with IDs 1-5: {cur.fetchall()}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_event_card_ids()
|
||||
21
check_id_range.py
Normal file
21
check_id_range.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_id_range():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards")
|
||||
min_id, max_id = cur.fetchone()
|
||||
print(f"support_cards card_id range: {min_id} to {max_id}")
|
||||
|
||||
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events")
|
||||
min_ev_id, max_ev_id = cur.fetchone()
|
||||
print(f"support_events card_id range: {min_ev_id} to {max_ev_id}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_id_range()
|
||||
20
check_local_db.py
Normal file
20
check_local_db.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
# Project root database
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_local_db():
|
||||
if not os.path.exists(DB_PATH):
|
||||
print("Local DB not found")
|
||||
return
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards")
|
||||
print(f"Local support_cards range: {cur.fetchone()}")
|
||||
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events")
|
||||
print(f"Local support_events range: {cur.fetchone()}")
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_local_db()
|
||||
24
check_offset.py
Normal file
24
check_offset.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_offset():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Get first 5 SSR cards
|
||||
cur.execute("SELECT card_id, name, gametora_url FROM support_cards ORDER BY card_id ASC LIMIT 5")
|
||||
cards = cur.fetchall()
|
||||
print(f"Cards: {cards}")
|
||||
|
||||
# Check if there are events referring to IDs 1, 2, 3...
|
||||
for i in range(1, 6):
|
||||
cur.execute("SELECT event_name FROM support_events WHERE card_id = ? LIMIT 1", (i,))
|
||||
ev = cur.fetchone()
|
||||
print(f"ID {i} events: {ev}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_offset()
|
||||
33
check_orphans.py
Normal file
33
check_orphans.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def check_orphans():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Check if event_id exists in support_events
|
||||
cur.execute("""
|
||||
SELECT COUNT(*)
|
||||
FROM event_skills es
|
||||
LEFT JOIN support_events se ON es.event_id = se.event_id
|
||||
WHERE se.event_id IS NULL
|
||||
""")
|
||||
orphans = cur.fetchone()[0]
|
||||
print(f"Orphaned skills (no matching event): {orphans}")
|
||||
|
||||
# Check if card_id exists in support_cards
|
||||
cur.execute("""
|
||||
SELECT COUNT(*)
|
||||
FROM support_events se
|
||||
LEFT JOIN support_cards sc ON se.card_id = sc.card_id
|
||||
WHERE sc.card_id IS NULL
|
||||
""")
|
||||
orphaned_events = cur.fetchone()[0]
|
||||
print(f"Orphaned events (no matching card): {orphaned_events}")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_orphans()
|
||||
19
check_seed_db.py
Normal file
19
check_seed_db.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume_seed.db")
|
||||
|
||||
def check_seed_db():
|
||||
if not os.path.exists(DB_PATH):
|
||||
print("Seed DB not found")
|
||||
return
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_cards")
|
||||
print(f"Seed support_cards range: {cur.fetchone()}")
|
||||
cur.execute("SELECT MIN(card_id), MAX(card_id) FROM support_events")
|
||||
print(f"Seed support_events range: {cur.fetchone()}")
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_seed_db()
|
||||
Binary file not shown.
@@ -62,13 +62,19 @@ try:
|
||||
except ImportError:
|
||||
VERSION = "2.1.0" # Fallback
|
||||
|
||||
_updates_checked = False
|
||||
|
||||
def get_conn():
|
||||
"""Get database connection"""
|
||||
global _updates_checked
|
||||
|
||||
# Initialize if missing
|
||||
if not os.path.exists(DB_PATH):
|
||||
init_database()
|
||||
|
||||
# Check for updates and migrate if needed
|
||||
# Check for updates and migrate if needed (only once per session)
|
||||
if not _updates_checked:
|
||||
_updates_checked = True
|
||||
check_for_updates()
|
||||
|
||||
return sqlite3.connect(DB_PATH)
|
||||
@@ -105,6 +111,7 @@ def check_for_updates():
|
||||
sync_from_seed(bundled_seed_path)
|
||||
|
||||
# Always ensure data integrity
|
||||
repair_orphaned_data()
|
||||
cleanup_orphaned_data()
|
||||
|
||||
except Exception as e:
|
||||
@@ -747,10 +754,59 @@ def get_database_stats():
|
||||
conn.close()
|
||||
return stats
|
||||
|
||||
def repair_orphaned_data():
|
||||
"""
|
||||
Attempt to repair orphaned data where card_id mapping was lost
|
||||
but can be recovered by matching card names or URLs if available.
|
||||
"""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
# Check if we have orphans
|
||||
cur.execute("SELECT COUNT(*) FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
orphan_count = cur.fetchone()[0]
|
||||
|
||||
if orphan_count > 0:
|
||||
print(f"Detected {orphan_count} orphaned training events. Attempting recovery by card name...")
|
||||
|
||||
# This is complex because we don't know the name of the card the orphaned event belonged to
|
||||
# UNLESS we can find a previous state.
|
||||
# MOST LIKELY: This happened during a failed sync where card_ids were from the seed.
|
||||
# If so, we might not be able to recover without re-scraping.
|
||||
pass
|
||||
|
||||
# A more common issue: support_cards duplicated due to INSERT OR REPLACE
|
||||
# Let's ensure no duplicates exist based on URL
|
||||
cur.execute("SELECT gametora_url, COUNT(*) as c FROM support_cards GROUP BY gametora_url HAVING c > 1")
|
||||
dupes = cur.fetchall()
|
||||
if dupes:
|
||||
print(f"Found {len(dupes)} duplicate card entries. Cleaning up...")
|
||||
for url, count in dupes:
|
||||
# Keep the one with highest ID (most recent)
|
||||
cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ? ORDER BY card_id DESC", (url,))
|
||||
ids = [r[0] for r in cur.fetchall()]
|
||||
keep_id = ids[0]
|
||||
toss_ids = ids[1:]
|
||||
|
||||
# Update references in other tables before deleting
|
||||
for table in ['owned_cards', 'deck_slots', 'support_effects', 'support_hints', 'support_events']:
|
||||
cur.execute(f"UPDATE {table} SET card_id = ? WHERE card_id IN ({','.join(['?']*len(toss_ids))})",
|
||||
[keep_id] + toss_ids)
|
||||
|
||||
cur.execute(f"DELETE FROM support_cards WHERE card_id IN ({','.join(['?']*len(toss_ids))})", toss_ids)
|
||||
conn.commit()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Repair failed: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def cleanup_orphaned_data():
|
||||
"""Remove references to non-existent cards in user data tables"""
|
||||
print("Cleaning up orphaned database records...")
|
||||
conn = get_conn()
|
||||
# Use direct connection to avoid recursion with get_conn()
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
try:
|
||||
@@ -770,13 +826,17 @@ def cleanup_orphaned_data():
|
||||
if cur.rowcount > 0:
|
||||
print(f"Removed {cur.rowcount} orphaned deck slot records.")
|
||||
|
||||
# 3. Clean detail tables
|
||||
cur.execute("DELETE FROM support_effects WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
cur.execute("DELETE FROM support_hints WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
cur.execute("DELETE FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
cur.execute("DELETE FROM event_skills WHERE event_id NOT IN (SELECT event_id FROM support_events)")
|
||||
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
print(f"Cleanup failed: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ============================================
|
||||
# Skill Search Queries
|
||||
# ============================================
|
||||
|
||||
|
||||
118
debug_kitasan_scrape.py
Normal file
118
debug_kitasan_scrape.py
Normal file
@@ -0,0 +1,118 @@
|
||||
import os
|
||||
import sys
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
# Add parent dir to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
def debug_kitasan_scrape():
|
||||
url = "https://gametora.com/umamusume/supports/30028-kitasan-black"
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
page.goto(url)
|
||||
page.wait_for_load_state("networkidle")
|
||||
page.wait_for_timeout(2000)
|
||||
|
||||
# 1. Get Skill Rarity Map
|
||||
rarity_map = page.evaluate("""
|
||||
() => {
|
||||
const map = {};
|
||||
const sections = Array.from(document.querySelectorAll('div, span, h3')).filter(el =>
|
||||
el.innerText.trim().startsWith('Skills from events')
|
||||
);
|
||||
if (sections.length === 0) return { error: "Section not found" };
|
||||
|
||||
const root = sections[0].closest('div');
|
||||
const containers = Array.from(root.querySelectorAll('div')).filter(d =>
|
||||
d.innerText.includes('Details') && d.children.length > 1
|
||||
);
|
||||
|
||||
containers.forEach(c => {
|
||||
const textNodes = Array.from(c.querySelectorAll('div, span')).filter(n => n.children.length === 0);
|
||||
const name = textNodes[0] ? textNodes[0].innerText.trim() : "";
|
||||
|
||||
if (name && name.length > 1 && !name.includes('Details')) {
|
||||
const style = window.getComputedStyle(c);
|
||||
const isGold = style.backgroundImage.includes('linear-gradient') ||
|
||||
style.backgroundColor.includes('rgb(255, 193, 7)') ||
|
||||
c.className.includes('kkspcu');
|
||||
map[name] = isGold;
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
""")
|
||||
print(f"Skill Rarity Map: {rarity_map}")
|
||||
|
||||
# 2. Click Golden Perk Button
|
||||
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }")
|
||||
page.wait_for_timeout(500)
|
||||
|
||||
btn_found = page.evaluate("""
|
||||
() => {
|
||||
const labels = Array.from(document.querySelectorAll('div, span, h2, h3')).filter(el =>
|
||||
el.innerText.trim() === 'Chain Events'
|
||||
);
|
||||
const buttons = [];
|
||||
labels.forEach(label => {
|
||||
let container = label.parentElement;
|
||||
while (container && container.querySelectorAll('button').length === 0) {
|
||||
container = container.nextElementSibling || container.parentElement;
|
||||
if (container && container.tagName === 'BODY') break;
|
||||
}
|
||||
if (container) {
|
||||
const btns = Array.from(container.querySelectorAll('button'));
|
||||
btns.forEach(btn => {
|
||||
const text = btn.innerText.trim();
|
||||
if (text.includes('>') || text.includes('❯')) buttons.push(btn);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
let goldenBtn = buttons.find(b => b.innerText.includes('❯❯❯'));
|
||||
if (!goldenBtn) {
|
||||
// Fallback to max arrows
|
||||
let maxArrows = 0;
|
||||
buttons.forEach(b => {
|
||||
const count = (b.innerText.match(/>|❯/g) || []).length;
|
||||
if (count > maxArrows) { maxArrows = count; goldenBtn = b; }
|
||||
});
|
||||
}
|
||||
|
||||
if (goldenBtn) {
|
||||
goldenBtn.click();
|
||||
return goldenBtn.innerText;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
""")
|
||||
print(f"Clicked button: {btn_found}")
|
||||
page.wait_for_timeout(1000)
|
||||
|
||||
# 3. Get Skills from Tooltip
|
||||
tooltip_skills = page.evaluate("""
|
||||
() => {
|
||||
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||
window.getComputedStyle(d).zIndex > 50 &&
|
||||
d.innerText.length < 2500
|
||||
);
|
||||
if (popovers.length === 0) return { error: "No popovers found" };
|
||||
|
||||
const pop = popovers[popovers.length - 1];
|
||||
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
|
||||
el.innerText.length > 2 &&
|
||||
!el.innerText.includes('Energy') &&
|
||||
!el.innerText.includes('bond')
|
||||
);
|
||||
return skillLinks.map(s => s.innerText.trim());
|
||||
}
|
||||
""")
|
||||
print(f"Tooltip Skills: {tooltip_skills}")
|
||||
|
||||
browser.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug_kitasan_scrape()
|
||||
87
deep_repair.py
Normal file
87
deep_repair.py
Normal file
@@ -0,0 +1,87 @@
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Ensure we can import from the project
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from scraper.gametora_scraper import scrape_support_card, sync_playwright
|
||||
from db.db_queries import DB_PATH, repair_orphaned_data, cleanup_orphaned_data
|
||||
|
||||
def deep_repair():
|
||||
print("=" * 60)
|
||||
print("Umamusume Card Manager - Deep Database Repair")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. Run basic repair and cleanup
|
||||
print("\nStep 1: Cleaning up corrupted records...")
|
||||
repair_orphaned_data()
|
||||
cleanup_orphaned_data()
|
||||
|
||||
# 2. Identify missing data
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("""
|
||||
SELECT card_id, name, gametora_url
|
||||
FROM support_cards
|
||||
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
|
||||
AND rarity = 'SSR'
|
||||
""")
|
||||
ssr_missing = cur.fetchall()
|
||||
|
||||
cur.execute("""
|
||||
SELECT card_id, name, gametora_url
|
||||
FROM support_cards
|
||||
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
|
||||
AND rarity != 'SSR'
|
||||
""")
|
||||
others_missing = cur.fetchall()
|
||||
|
||||
total_missing = len(ssr_missing) + len(others_missing)
|
||||
if total_missing == 0:
|
||||
print("\n✅ No missing data detected. Your database is healthy!")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
print(f"\nDetected {total_missing} cards with missing event/skill data.")
|
||||
print(f"- SSR cards: {len(ssr_missing)}")
|
||||
print(f"- SR/R cards: {len(others_missing)}")
|
||||
|
||||
print("\nStep 2: Re-scraping missing data from GameTora...")
|
||||
print("This may take some time depending on your internet connection.")
|
||||
print("Press Ctrl+C to stop at any time.")
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
# Prioritize SSRs
|
||||
to_process = ssr_missing + others_missing
|
||||
count = 0
|
||||
for card_id, name, url in to_process:
|
||||
count += 1
|
||||
percent = (count / total_missing) * 100
|
||||
print(f"[{count}/{total_missing} - {percent:.1f}%] Repairing: {name}")
|
||||
try:
|
||||
scrape_support_card(page, url, conn)
|
||||
except Exception as e:
|
||||
print(f" ❌ Error: {e}")
|
||||
|
||||
browser.close()
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Repair interrupted by user.")
|
||||
except Exception as e:
|
||||
print(f"\n❌ A fatal error occurred during scrape: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Repair process finished.")
|
||||
print("You can now restart the application.")
|
||||
print("=" * 60)
|
||||
|
||||
if __name__ == "__main__":
|
||||
deep_repair()
|
||||
58
fast_rescrape.py
Normal file
58
fast_rescrape.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent dir to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from scraper.gametora_scraper import scrape_support_card, sync_playwright
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def fast_rescrape():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Find cards that have NO events
|
||||
cur.execute("""
|
||||
SELECT card_id, name, gametora_url
|
||||
FROM support_cards
|
||||
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
|
||||
AND rarity = 'SSR'
|
||||
""")
|
||||
cards_to_rescrape = cur.fetchall()
|
||||
|
||||
print(f"Found {len(cards_to_rescrape)} SSR cards missing event data.")
|
||||
|
||||
if not cards_to_rescrape:
|
||||
conn.close()
|
||||
return
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
count = 0
|
||||
for card_id, name, url in cards_to_rescrape:
|
||||
count += 1
|
||||
print(f"[{count}/{len(cards_to_rescrape)}] Re-scraping: {name}")
|
||||
try:
|
||||
# We need to pass the same connection or use a different scraper function
|
||||
# The existing scrape_support_card re-inserts the card too.
|
||||
# Since we fixed the scraper to use INSERT OR IGNORE, it's safe!
|
||||
from scraper.gametora_scraper import scrape_support_card
|
||||
scrape_support_card(page, url, conn)
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
|
||||
if count % 10 == 0:
|
||||
print("--- Progress Checkpoint ---")
|
||||
|
||||
browser.close()
|
||||
|
||||
conn.close()
|
||||
print("Fast re-scrape complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
fast_rescrape()
|
||||
53
fast_rescrape_limited.py
Normal file
53
fast_rescrape_limited.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent dir to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from scraper.gametora_scraper import scrape_support_card, sync_playwright
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def fast_rescrape_limited():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Find cards that have NO events
|
||||
cur.execute("""
|
||||
SELECT card_id, name, gametora_url
|
||||
FROM support_cards
|
||||
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
|
||||
ORDER BY rarity DESC, card_id ASC
|
||||
LIMIT 50
|
||||
""")
|
||||
cards_to_rescrape = cur.fetchall()
|
||||
|
||||
print(f"Found {len(cards_to_rescrape)} cards to re-scrape (Limited to 50).")
|
||||
|
||||
if not cards_to_rescrape:
|
||||
conn.close()
|
||||
return
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
count = 0
|
||||
for card_id, name, url in cards_to_rescrape:
|
||||
count += 1
|
||||
print(f"[{count}/{len(cards_to_rescrape)}] Re-scraping: {name}")
|
||||
try:
|
||||
from scraper.gametora_scraper import scrape_support_card
|
||||
scrape_support_card(page, url, conn)
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
|
||||
browser.close()
|
||||
|
||||
conn.close()
|
||||
print("Limited re-scrape complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
fast_rescrape_limited()
|
||||
BIN
images/30030_404.png
Normal file
BIN
images/30030_404.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 9.3 KiB |
37
repair_db.py
Normal file
37
repair_db.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def repair_db():
|
||||
if not os.path.exists(DB_PATH):
|
||||
return
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
print("Repairing database...")
|
||||
|
||||
# 1. Remove all orphans
|
||||
cur.execute("DELETE FROM support_effects WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
print(f"Removed {cur.rowcount} orphaned effects")
|
||||
|
||||
cur.execute("DELETE FROM support_hints WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
print(f"Removed {cur.rowcount} orphaned hints")
|
||||
|
||||
cur.execute("DELETE FROM event_skills WHERE event_id NOT IN (SELECT event_id FROM support_events)")
|
||||
print(f"Removed {cur.rowcount} orphaned event skills")
|
||||
|
||||
cur.execute("DELETE FROM support_events WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
print(f"Removed {cur.rowcount} orphaned events")
|
||||
|
||||
# 2. Cleanup owned_cards and deck_slots
|
||||
cur.execute("DELETE FROM owned_cards WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
cur.execute("DELETE FROM deck_slots WHERE card_id NOT IN (SELECT card_id FROM support_cards)")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print("Repair complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
repair_db()
|
||||
@@ -258,11 +258,18 @@ def scrape_support_card(page, url, conn, max_retries=3):
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
# Insert card
|
||||
# Insert card using OR IGNORE to keep the same card_id if it exists
|
||||
cur.execute("""
|
||||
INSERT OR REPLACE INTO support_cards (name, rarity, card_type, max_level, gametora_url)
|
||||
INSERT OR IGNORE INTO support_cards (name, rarity, card_type, max_level, gametora_url)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (name, rarity, card_type, max_level, url))
|
||||
|
||||
# Update existing card to ensure data is fresh (without changing ID)
|
||||
cur.execute("""
|
||||
UPDATE support_cards
|
||||
SET name = ?, rarity = ?, card_type = ?, max_level = ?
|
||||
WHERE gametora_url = ?
|
||||
""", (name, rarity, card_type, max_level, url))
|
||||
conn.commit()
|
||||
|
||||
cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ?", (url,))
|
||||
@@ -554,29 +561,48 @@ def scrape_events(page, card_id, cur):
|
||||
skill_rarity_map = page.evaluate("""
|
||||
() => {
|
||||
const map = {};
|
||||
// Rare skills use a specific class (e.g., kkspcu) while normal use another (e.g., gImSzc)
|
||||
// It's safer to find all skill containers in the summary section
|
||||
const sections = Array.from(document.querySelectorAll('div')).filter(d => d.innerText.startsWith('Skills from events'));
|
||||
if (sections.length === 0) return map;
|
||||
console.log("Building Skill Rarity Map...");
|
||||
|
||||
// 1. Find all skill containers. They usually have a name and a 'Details' button.
|
||||
// In the "Skills from events" or "Support hints" sections.
|
||||
const containers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||
(d.innerText.includes('Details') || d.innerText.includes('Reward')) && d.innerText.length < 500
|
||||
);
|
||||
|
||||
const containers = sections[0].parentElement.querySelectorAll('div[class*="sc-"]');
|
||||
containers.forEach(c => {
|
||||
const nameNode = c.querySelector('div[font-weight="bold"], span[font-weight="bold"]');
|
||||
const name = nameNode ? nameNode.innerText.trim() : c.innerText.split('\\n')[0].trim();
|
||||
// Try to extract the skill name. It's usually the first text node or a bold tag.
|
||||
const nameNode = c.querySelector('b, span[font-weight="bold"], div[font-weight="bold"]');
|
||||
let name = "";
|
||||
if (nameNode) {
|
||||
name = nameNode.innerText.trim();
|
||||
} else {
|
||||
// Fallback to text before 'Details'
|
||||
name = c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim();
|
||||
}
|
||||
|
||||
if (name && name.length > 2) {
|
||||
// Check if it has a gold-themed class or computed background color
|
||||
const isGold = c.className.includes('kkspcu') || window.getComputedStyle(c).backgroundColor.includes('rgb(255, 193, 7)');
|
||||
map[name] = isGold;
|
||||
const style = window.getComputedStyle(c);
|
||||
const nameStyle = nameNode ? window.getComputedStyle(nameNode) : null;
|
||||
|
||||
// Golden skills have a specific background
|
||||
const isGold = style.backgroundImage.includes('linear-gradient') ||
|
||||
style.backgroundColor.includes('rgb(255, 193, 7)') ||
|
||||
(nameStyle && nameStyle.color === 'rgb(255, 193, 7)') ||
|
||||
c.className.includes('kkspcu') ||
|
||||
c.innerHTML.includes('kkspcu');
|
||||
|
||||
const normalized = name.toLowerCase().replace(/\\s+/g, ' ').replace(/[()()-]/g, '').trim();
|
||||
map[normalized] = isGold;
|
||||
console.log(`Mapped Skill: "${name}" [${normalized}] -> Gold: ${isGold}`);
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
""")
|
||||
|
||||
|
||||
# Scroll to the Events section specifically
|
||||
print(" Ensuring events are loaded...")
|
||||
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }")
|
||||
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1, div')).find(el => el.innerText.toLowerCase().includes('training events')); if (h) h.scrollIntoView(); }")
|
||||
page.wait_for_timeout(1000)
|
||||
|
||||
# 2. Scrape ONLY the LAST chain event (Golden Perk) with OR options
|
||||
@@ -587,21 +613,30 @@ def scrape_events(page, card_id, cur):
|
||||
// Find all chain event buttons
|
||||
const getChainEventButtons = () => {
|
||||
const buttons = [];
|
||||
const headers = Array.from(document.querySelectorAll('div, h2, h3, span')).filter(el =>
|
||||
el.innerText.includes('Chain Events')
|
||||
// Look for "Chain Events" text (case-insensitive substring)
|
||||
const labels = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el =>
|
||||
el.innerText.toLowerCase().includes('chain events') && el.innerText.trim().length < 20
|
||||
);
|
||||
|
||||
headers.forEach(header => {
|
||||
const container = header.parentElement;
|
||||
labels.forEach(label => {
|
||||
// The buttons are usually in the same container or next container
|
||||
let container = label.parentElement;
|
||||
let attempts = 0;
|
||||
while (container && container.querySelectorAll('button').length === 0 && attempts < 5) {
|
||||
container = container.nextElementSibling || container.parentElement;
|
||||
attempts++;
|
||||
if (container && container.tagName === 'BODY') break;
|
||||
}
|
||||
|
||||
if (container) {
|
||||
const btns = Array.from(container.querySelectorAll('button'));
|
||||
btns.forEach(btn => {
|
||||
const text = btn.innerText.trim();
|
||||
const style = window.getComputedStyle(btn);
|
||||
const isVisible = style.display !== 'none' && style.visibility !== 'hidden' && btn.offsetWidth > 0;
|
||||
const isVisible = style.display !== 'none' && style.visibility !== 'hidden';
|
||||
|
||||
// Only chain events (contain '>')
|
||||
if (isVisible && text && text.includes('>') && !text.includes('Events')) {
|
||||
// Look for arrows (regular or heavy)
|
||||
if (isVisible && (text.includes('>') || text.includes('❯'))) {
|
||||
buttons.push(btn);
|
||||
}
|
||||
});
|
||||
@@ -617,13 +652,20 @@ def scrape_events(page, card_id, cur):
|
||||
return null;
|
||||
}
|
||||
|
||||
// Find the button with the most '>' characters (the last chain event = Golden Perk)
|
||||
let goldenPerkButton = null;
|
||||
let maxArrows = 0;
|
||||
|
||||
for (const btn of buttons) {
|
||||
const text = btn.innerText.trim();
|
||||
const arrowCount = (text.match(/>/g) || []).length;
|
||||
// Count both regular and heavy arrows
|
||||
const arrowCount = (text.match(/>|❯/g) || []).length;
|
||||
|
||||
// If it has three heavy arrows, it's almost certainly the golden perk
|
||||
if (text.includes('❯❯❯')) {
|
||||
goldenPerkButton = btn;
|
||||
break;
|
||||
}
|
||||
|
||||
if (arrowCount > maxArrows) {
|
||||
maxArrows = arrowCount;
|
||||
goldenPerkButton = btn;
|
||||
@@ -709,7 +751,30 @@ def scrape_events(page, card_id, cur):
|
||||
event_id = cur.lastrowid
|
||||
|
||||
for skill in golden_perk_data['skills']:
|
||||
is_gold = 1 if skill_rarity_map.get(skill['name']) else 0
|
||||
# Normalization helper
|
||||
def normalize(s):
|
||||
return s.lower().replace(" hint +1", "").replace(" hint +3", "").replace(" hint +5", "").replace(" hint +", "").strip().replace(" ", " ").replace("-", "").replace("(", "").replace(")", "").replace(" ", "")
|
||||
|
||||
skill_name = normalize(skill['name'])
|
||||
|
||||
# Use extra aggressive name matching against the map values
|
||||
# (The map keys are already normalized)
|
||||
is_gold = 0
|
||||
for k, gold in skill_rarity_map.items():
|
||||
if normalize(k) == skill_name:
|
||||
is_gold = 1 if gold else 0
|
||||
break
|
||||
|
||||
# Fallback 1: If it's a chain event and specifically the last one, it's almost certainly gold
|
||||
if not is_gold and golden_perk_data.get('type') == 'Chain':
|
||||
# Check for "hint" patterns which usually accompany gold perks in chain events
|
||||
if "hint +" in skill['name'].lower() or len(golden_perk_data['skills']) <= 2:
|
||||
is_gold = 1
|
||||
print(f" ✨ Golden Skill Fallback (Last Chain Event): {skill['name']}")
|
||||
|
||||
if is_gold:
|
||||
print(f" ✨ Golden Skill Verified: {skill['name']}")
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
|
||||
VALUES (?, ?, ?, ?)
|
||||
|
||||
62
test_gold_scrape.py
Normal file
62
test_gold_scrape.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent dir to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from scraper.gametora_scraper import scrape_support_card, sync_playwright
|
||||
from db.db_queries import get_conn
|
||||
|
||||
def test_golden_perk():
|
||||
print("Testing Golden Perk Scraping for Fine Motion...")
|
||||
url = "https://gametora.com/umamusume/supports/30010-fine-motion"
|
||||
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
# 1. Clean previous data for this specific card
|
||||
cur.execute("SELECT card_id FROM support_cards WHERE gametora_url = ?", (url,))
|
||||
row = cur.fetchone()
|
||||
if row:
|
||||
card_id = row[0]
|
||||
cur.execute("DELETE FROM event_skills WHERE event_id IN (SELECT event_id FROM support_events WHERE card_id = ?)", (card_id,))
|
||||
cur.execute("DELETE FROM support_events WHERE card_id = ?", (card_id,))
|
||||
conn.commit()
|
||||
|
||||
# 2. Scrape
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
success = scrape_support_card(page, url, conn)
|
||||
print(f"Scrape success: {success}")
|
||||
browser.close()
|
||||
|
||||
# 3. Verify results
|
||||
cur.execute("""
|
||||
SELECT se.event_name, es.skill_name, es.is_gold
|
||||
FROM support_events se
|
||||
JOIN event_skills es ON se.event_id = es.event_id
|
||||
JOIN support_cards sc ON se.card_id = sc.card_id
|
||||
WHERE sc.gametora_url = ?
|
||||
""", (url,))
|
||||
|
||||
skills = cur.fetchall()
|
||||
print(f"\nSkills found for Kitasan Black:")
|
||||
found_gold = False
|
||||
for event_name, skill_name, is_gold in skills:
|
||||
status = "✨ GOLD" if is_gold else "Normal"
|
||||
print(f"- [{status}] {event_name}: {skill_name}")
|
||||
if is_gold: found_gold = True
|
||||
|
||||
if found_gold:
|
||||
print("\n✅ SUCCESS: Golden Perk identified correctly!")
|
||||
else:
|
||||
print("\n❌ FAILURE: No golden perks found.")
|
||||
|
||||
conn.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_golden_perk()
|
||||
@@ -4,7 +4,7 @@ This file is the single source of truth for the application version.
|
||||
"""
|
||||
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
VERSION: str = "12.1.0"
|
||||
VERSION: str = "13.0.0"
|
||||
|
||||
# Application metadata
|
||||
APP_NAME: str = "UmamusumeCardManager"
|
||||
|
||||
Reference in New Issue
Block a user