diff --git a/database/umamusume_seed.db b/database/umamusume_seed.db index 58dd918..837d6d3 100644 Binary files a/database/umamusume_seed.db and b/database/umamusume_seed.db differ diff --git a/db/db_queries.py b/db/db_queries.py index 3a1eb3a..d8802c3 100644 --- a/db/db_queries.py +++ b/db/db_queries.py @@ -108,8 +108,44 @@ def run_migrations(): pass # Column already exists conn.commit() + repair_image_paths(conn) conn.close() +def repair_image_paths(conn): + """Attempt to populate missing image_path for existing cards in old databases""" + print("Checking for missing image paths to repair...") + cur = conn.cursor() + + # Find cards with missing image paths but have a URL + cur.execute("SELECT card_id, name, gametora_url FROM support_cards WHERE image_path IS NULL OR image_path = ''") + to_repair = cur.fetchall() + + if not to_repair: + return + + import re + repaired_count = 0 + + for card_id, name, url in to_repair: + if not url: continue + + # Extract ID from URL (e.g., 30154 from .../supports/30154-mejiro-ramonu) + match = re.search(r'/supports/(\d+)-', url) + if match: + stable_id = match.group(1) + # Create safe filename matching scraper logic + safe_name = re.sub(r'[<>:"/\\\\|?*]', '_', name) + filename = f"{stable_id}_{safe_name}.png" + + # Update DB with images/filename + cur.execute("UPDATE support_cards SET image_path = ? WHERE card_id = ?", + (f"images/{filename}", card_id)) + repaired_count += 1 + + if repaired_count > 0: + conn.commit() + print(f"Successfully repaired {repaired_count} image paths!") + def check_for_updates(): """Check if database version matches app version, sync if outdated""" if getattr(sys, 'frozen', False): diff --git a/scraper/gametora_scraper.py b/scraper/gametora_scraper.py index 137d984..b792ba6 100644 --- a/scraper/gametora_scraper.py +++ b/scraper/gametora_scraper.py @@ -549,242 +549,200 @@ def scrape_hints(page, card_id, cur): print(f" Found {len(hints)} hints") def scrape_events(page, card_id, cur): - """Scrape the LAST chain event (Golden Perk) with OR options""" + """Scrape all events including Chain, Dates, Random and Special""" # Use a flag to avoid adding multiple console listeners if not hasattr(page, "_console_attached"): page.on("console", lambda msg: print(f" [JS Console] {msg.text}") if "scrapping" not in msg.text.lower() else None) page._console_attached = True - # 1. First, build a map of skills from the 'Skills from events' summary section - # This helps us identify which skills are Rare (Gold) + # 1. Build a map of skills from the 'Skills from events' summary section + # This remains useful for identifying golden skills. skill_rarity_map = page.evaluate(""" () => { const map = {}; - console.log("Building Skill Rarity Map..."); - - // 1. Find all skill containers. They usually have a name and a 'Details' button. - // In the "Skills from events" or "Support hints" sections. const containers = Array.from(document.querySelectorAll('div')).filter(d => (d.innerText.includes('Details') || d.innerText.includes('Reward')) && d.innerText.length < 500 ); containers.forEach(c => { - // Try to extract the skill name. It's usually the first text node or a bold tag. const nameNode = c.querySelector('b, span[font-weight="bold"], div[font-weight="bold"]'); - let name = ""; - if (nameNode) { - name = nameNode.innerText.trim(); - } else { - // Fallback to text before 'Details' - name = c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim(); - } + let name = nameNode ? nameNode.innerText.trim() : c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim(); if (name && name.length > 2) { const style = window.getComputedStyle(c); const nameStyle = nameNode ? window.getComputedStyle(nameNode) : null; - - // Golden skills have a specific background const isGold = style.backgroundImage.includes('linear-gradient') || style.backgroundColor.includes('rgb(255, 193, 7)') || (nameStyle && nameStyle.color === 'rgb(255, 193, 7)') || - c.className.includes('kkspcu') || - c.innerHTML.includes('kkspcu'); + c.className.includes('kkspcu'); const normalized = name.toLowerCase().replace(/\\s+/g, ' ').replace(/[()()-]/g, '').trim(); map[normalized] = isGold; - console.log(`Mapped Skill: "${name}" [${normalized}] -> Gold: ${isGold}`); } }); return map; } """) - # Scroll to the Events section specifically - print(" Ensuring events are loaded...") - page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1, div')).find(el => el.innerText.toLowerCase().includes('training events')); if (h) h.scrollIntoView(); }") - page.wait_for_timeout(1000) - - # 2. Scrape ONLY the LAST chain event (Golden Perk) with OR options - golden_perk_data = page.evaluate(""" + # 2. Scrape all event types + print(" Scraping all event categories...") + all_events_data = page.evaluate(""" async () => { - console.log("Scraping Golden Perk (last chain event)..."); + const results = []; - // Find all chain event buttons - const getChainEventButtons = () => { - const buttons = []; - // Look for "Chain Events" text (case-insensitive substring) - const labels = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el => - el.innerText.toLowerCase().includes('chain events') && el.innerText.trim().length < 20 + // Define categories to look for + const categories = [ + { label: 'Chain Events', type: 'Chain' }, + { label: 'Dates', type: 'Date' }, + { label: 'Random Events', type: 'Random' }, + { label: 'Special Events', type: 'Special' } + ]; + + for (const cat of categories) { + // Find category headers + const headers = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el => + el.innerText.trim() === cat.label && el.children.length === 0 ); - labels.forEach(label => { - // The buttons are usually in the same container or next container - let container = label.parentElement; + for (const header of headers) { + // Find buttons in the following siblings or parent siblings + let container = header.parentElement; + let foundButtons = []; let attempts = 0; - while (container && container.querySelectorAll('button').length === 0 && attempts < 5) { - container = container.nextElementSibling || container.parentElement; + + while (container && foundButtons.length === 0 && attempts < 5) { + foundButtons = Array.from(container.querySelectorAll('button')).filter(btn => { + const style = window.getComputedStyle(btn); + return style.display !== 'none' && style.visibility !== 'hidden'; + }); + + if (foundButtons.length === 0) { + // Check next siblings of the header's ancestors + let sibling = header; + let parent = header.parentElement; + while(parent && parent.tagName !== 'BODY') { + if (parent.innerText.includes(cat.label)) { + const next = parent.nextElementSibling; + if (next) { + const nextBtns = Array.from(next.querySelectorAll('button')); + if (nextBtns.length > 0) { + foundButtons = nextBtns; + break; + } + } + } + parent = parent.parentElement; + } + } + container = container.parentElement; attempts++; - if (container && container.tagName === 'BODY') break; } - if (container) { - const btns = Array.from(container.querySelectorAll('button')); - btns.forEach(btn => { - const text = btn.innerText.trim(); - const style = window.getComputedStyle(btn); - const isVisible = style.display !== 'none' && style.visibility !== 'hidden'; + // Scrape each button in the category + for (const btn of foundButtons) { + const eventName = btn.innerText.trim(); + if (!eventName || results.some(r => r.name === eventName)) continue; + + // Count arrows for chain/date importance + const arrows = (eventName.match(/>|❯/g) || []).length; + + try { + btn.scrollIntoViewIfNeeded ? btn.scrollIntoViewIfNeeded() : null; + await new Promise(r => setTimeout(r, 100)); + btn.click(); + await new Promise(r => setTimeout(r, 500)); - // Look for arrows (regular or heavy) - if (isVisible && (text.includes('>') || text.includes('❯'))) { - buttons.push(btn); + const popovers = Array.from(document.querySelectorAll('div')).filter(d => + d.innerText.includes(eventName) && + window.getComputedStyle(d).zIndex > 50 && + d.innerText.length < 2500 + ); + + if (popovers.length > 0) { + const pop = popovers[popovers.length - 1]; + const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null || + pop.innerText.includes('Randomly either') || + pop.innerText.toLowerCase().includes(' or '); + + const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el => + el.innerText.length > 2 && + !el.innerText.includes('Energy') && + (window.getComputedStyle(el).color === 'rgb(102, 107, 255)' || + el.className.includes('linkcolor')) + ); + + const skills = []; + skillLinks.forEach(link => { + const sName = link.innerText.trim(); + if (sName && !skills.some(s => s.name === sName)) { + skills.push({ name: sName, is_or: hasOrDivider }); + } + }); + + results.push({ + name: eventName, + type: cat.type, + skills: skills, + arrows: arrows + }); } - }); + + document.body.click(); + await new Promise(r => setTimeout(r, 100)); + } catch (err) { + console.log(`Failed to scrape event ${eventName}: ${err.message}`); + } } - }); - return buttons; - }; - - const buttons = getChainEventButtons(); - console.log(`Found ${buttons.length} chain event buttons`); - - if (buttons.length === 0) { - return null; - } - - let goldenPerkButton = null; - let maxArrows = 0; - - for (const btn of buttons) { - const text = btn.innerText.trim(); - // Count both regular and heavy arrows - const arrowCount = (text.match(/>|❯/g) || []).length; - - // If it has three heavy arrows, it's almost certainly the golden perk - if (text.includes('❯❯❯')) { - goldenPerkButton = btn; - break; - } - - if (arrowCount > maxArrows) { - maxArrows = arrowCount; - goldenPerkButton = btn; } } - - if (!goldenPerkButton) { - console.log("No golden perk button found"); - return null; - } - - const eventName = goldenPerkButton.innerText.trim(); - console.log(`Found Golden Perk: ${eventName} (${maxArrows} arrows)`); - - try { - // Click to open popover - goldenPerkButton.scrollIntoViewIfNeeded ? goldenPerkButton.scrollIntoViewIfNeeded() : null; - await new Promise(r => setTimeout(r, 100)); - goldenPerkButton.click(); - await new Promise(r => setTimeout(r, 600)); - - // Find popover - const popovers = Array.from(document.querySelectorAll('div')).filter(d => - d.innerText.includes(eventName) && - window.getComputedStyle(d).zIndex > 50 && - d.innerText.length < 2500 - ); - - if (popovers.length === 0) { - console.log(`Popover NOT found for ${eventName}`); - document.body.click(); - return { name: eventName, type: 'Chain', skills: [] }; - } - - const pop = popovers[popovers.length - 1]; - console.log(`Found popover for ${eventName}`); - - // Check for OR structure - look for "Randomly either" or "or" divider - const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null || - pop.innerText.includes('Randomly either') || - pop.innerText.toLowerCase().includes(' or '); - - // Find all skill names (purple/blue links) - const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el => - el.innerText.length > 2 && - !el.innerText.includes('Energy') && - !el.innerText.includes('bond') && - (window.getComputedStyle(el).color === 'rgb(102, 107, 255)' || - el.className.includes('linkcolor')) - ); - - console.log(`Found ${skillLinks.length} potential skills in popover`); - - const skills = []; - skillLinks.forEach(link => { - const skillName = link.innerText.trim(); - if (skillName && skillName.length > 2 && !skills.some(s => s.name === skillName)) { - // If there's an OR divider, all skills in this popover are part of OR groups - const isOr = hasOrDivider; - skills.push({ name: skillName, is_or: isOr }); - } - }); - - // Close popover - document.body.click(); - await new Promise(r => setTimeout(r, 200)); - - return { name: eventName, type: 'Chain', skills: skills }; - - } catch (err) { - console.log(`Error clicking ${eventName}: ${err.message}`); - return { name: eventName, type: 'Chain', skills: [] }; - } + return results; } """) - # 3. Store ONLY the golden perk in database - if golden_perk_data: - cur.execute(""" - INSERT INTO support_events (card_id, event_name, event_type) - VALUES (?, ?, ?) - """, (card_id, golden_perk_data['name'], golden_perk_data['type'])) - event_id = cur.lastrowid + # 3. Store all found events and identify golden skills + if all_events_data: + # Determine max arrows for Chain and Dates to identify final step + max_arrows = { + 'Chain': max([e['arrows'] for e in all_events_data if e['type'] == 'Chain'] + [0]), + 'Date': max([e['arrows'] for e in all_events_data if e['type'] == 'Date'] + [0]) + } - for skill in golden_perk_data['skills']: - # Normalization helper - def normalize(s): - return s.lower().replace(" hint +1", "").replace(" hint +3", "").replace(" hint +5", "").replace(" hint +", "").strip().replace(" ", " ").replace("-", "").replace("(", "").replace(")", "").replace(" ", "") + for event in all_events_data: + cur.execute("INSERT INTO support_events (card_id, event_name, event_type) VALUES (?, ?, ?)", + (card_id, event['name'], event['type'])) + event_id = cur.lastrowid - skill_name = normalize(skill['name']) - - # Use extra aggressive name matching against the map values - # (The map keys are already normalized) - is_gold = 0 - for k, gold in skill_rarity_map.items(): - if normalize(k) == skill_name: - is_gold = 1 if gold else 0 - break - - # Fallback 1: If it's a chain event and specifically the last one, it's almost certainly gold - if not is_gold and golden_perk_data.get('type') == 'Chain': - # Check for "hint" patterns which usually accompany gold perks in chain events - if "hint +" in skill['name'].lower() or len(golden_perk_data['skills']) <= 2: - is_gold = 1 - print(f" ✨ Golden Skill Fallback (Last Chain Event): {skill['name']}") - - if is_gold: - print(f" ✨ Golden Skill Verified: {skill['name']}") - - cur.execute(""" - INSERT INTO event_skills (event_id, skill_name, is_gold, is_or) - VALUES (?, ?, ?, ?) - """, (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0)) - - skill_count = len(golden_perk_data['skills']) - or_count = sum(1 for s in golden_perk_data['skills'] if s['is_or']) - print(f" Golden Perk: {golden_perk_data['name']} ({skill_count} skills, {or_count} with OR)") + for skill in event['skills']: + def normalize(s): + # Remove hint suffix and special characters + s = s.lower().split(' hint +')[0] + return re.sub(r'[()()\-\s\+]', '', s).strip() + + n_name = normalize(skill['name']) + is_gold = 0 + for k, gold in skill_rarity_map.items(): + if normalize(k) == n_name: + is_gold = 1 if gold else 0 + break + + # Heuristic: If it's the last step of a Chain or Date, it's likely gold + if not is_gold and event['type'] in ['Chain', 'Date']: + if event['arrows'] >= 3 and event['arrows'] == max_arrows[event['type']]: + if len(event['skills']) <= 2 or "hint +" in skill['name'].lower(): + is_gold = 1 + print(f" ✨ Heuristic Gold: {skill['name']} in {event['name']}") + + if is_gold: print(f" ✨ Verified Gold: {skill['name']}") + + cur.execute(""" + INSERT INTO event_skills (event_id, skill_name, is_gold, is_or) + VALUES (?, ?, ?, ?) + """, (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0)) + + print(f" Scraped {len(all_events_data)} total events.") else: - print(f" No Golden Perk found for this card") + print(f" No events found.") def run_scraper(): """ Run the web scraper to fetch card data from GameTora.com """ diff --git a/version.py b/version.py index 3852d32..bd109e0 100644 --- a/version.py +++ b/version.py @@ -4,7 +4,7 @@ This file is the single source of truth for the application version. """ # Semantic versioning: MAJOR.MINOR.PATCH -VERSION: str = "13.0.3" +VERSION: str = "13.0.6" # Application metadata APP_NAME: str = "UmamusumeCardManager"