feat: Implement GameTora scraper for Umamusume support cards, including image download and database storage.

This commit is contained in:
kiyreload27
2025-12-28 21:11:34 +00:00
parent 5d9d0a6392
commit 4fa8b7ee19
18 changed files with 218 additions and 42 deletions

View File

@@ -285,10 +285,22 @@ def init_database():
skill_id INTEGER PRIMARY KEY AUTOINCREMENT, skill_id INTEGER PRIMARY KEY AUTOINCREMENT,
event_id INTEGER, event_id INTEGER,
skill_name TEXT, skill_name TEXT,
is_gold INTEGER DEFAULT 0,
is_or INTEGER DEFAULT 0,
FOREIGN KEY (event_id) REFERENCES support_events(event_id) FOREIGN KEY (event_id) REFERENCES support_events(event_id)
) )
""") """)
# Migration: Add columns to event_skills if they don't exist
try:
cur.execute("ALTER TABLE event_skills ADD COLUMN is_gold INTEGER DEFAULT 0")
except sqlite3.OperationalError:
pass # Column already exists
try:
cur.execute("ALTER TABLE event_skills ADD COLUMN is_or INTEGER DEFAULT 0")
except sqlite3.OperationalError:
pass # Column already exists
# User tables # User tables
cur.execute(""" cur.execute("""
CREATE TABLE IF NOT EXISTS owned_cards ( CREATE TABLE IF NOT EXISTS owned_cards (
@@ -479,26 +491,46 @@ def get_events(card_id):
return rows return rows
def get_all_event_skills(card_id): def get_all_event_skills(card_id):
"""Get all events and their skills for a card""" """Get all skills from training events for a card"""
conn = get_conn() conn = get_conn()
cur = conn.cursor() cur = conn.cursor()
cur.execute(""" cur.execute("""
SELECT se.event_name, es.skill_name SELECT se.event_name, es.skill_name, es.is_gold, es.is_or
FROM support_events se FROM support_events se
LEFT JOIN event_skills es ON se.event_id = es.event_id JOIN event_skills es ON se.event_id = es.event_id
WHERE se.card_id = ? WHERE se.card_id = ?
ORDER BY se.event_name, es.skill_name
""", (card_id,)) """, (card_id,))
result = {} # Group by event
for event_name, skill_name in cur.fetchall(): events = {}
if event_name not in result: for event_name, skill_name, is_gold, is_or in cur.fetchall():
result[event_name] = [] if event_name not in events:
if skill_name: events[event_name] = {'skills': [], 'or_skills': []}
result[event_name].append(skill_name)
prefix = "" if is_gold else ""
if is_or:
events[event_name]['or_skills'].append(f"{prefix}{skill_name}")
else:
events[event_name]['skills'].append(f"{prefix}{skill_name}")
results = []
for event_name, data in events.items():
event_skills = []
if data['or_skills']:
event_skills.append(" (OR) ".join(data['or_skills']))
event_skills.extend(data['skills'])
details = f"({', '.join(event_skills)})" if event_skills else ""
results.append({
'card_id': card_id,
'source': 'Event',
'skill_name': event_name,
'details': details
})
conn.close() conn.close()
return result return results
# ============================================ # ============================================
# Owned Cards (Collection) Queries # Owned Cards (Collection) Queries
@@ -827,10 +859,32 @@ def get_cards_with_skill(skill_name):
card_id, name, rarity, card_type, image_path, event_name, event_id, is_owned = row card_id, name, rarity, card_type, image_path, event_name, event_id, is_owned = row
event_name = event_name.replace('\n', ' ').strip() event_name = event_name.replace('\n', ' ').strip()
# Get ALL skills for this event to show in details # Format event skills (handle OR groups and gold skills)
cur.execute("SELECT skill_name FROM event_skills WHERE event_id = ?", (event_id,)) formatted_event_skills = []
other_skills = [r[0] for r in cur.fetchall()] cur.execute("""
skills_summary = ", ".join(other_skills) SELECT skill_name, is_gold, is_or
FROM event_skills
WHERE event_id = ?
""", (event_id,))
skills_data = cur.fetchall()
or_group_skills = []
other_event_skills = []
for s_name, s_is_gold, s_is_or in skills_data:
prefix = "" if s_is_gold else ""
if s_is_or:
or_group_skills.append(f"{prefix}{s_name}")
else:
other_event_skills.append(f"{prefix}{s_name}")
if or_group_skills:
formatted_event_skills.append(" (OR) ".join(or_group_skills))
formatted_event_skills.extend(other_event_skills)
# Create a nice string like "Event Name (Skill1, Skill2)"
details = f"{event_name} ({', '.join(formatted_event_skills)})" if formatted_event_skills else event_name
entry_key = (card_id, f'Event: {event_name}') entry_key = (card_id, f'Event: {event_name}')
@@ -842,7 +896,7 @@ def get_cards_with_skill(skill_name):
'type': card_type, 'type': card_type,
'image_path': image_path, 'image_path': image_path,
'source': 'Event', 'source': 'Event',
'details': f"{event_name} ({skills_summary})", 'details': details,
'is_owned': bool(is_owned) 'is_owned': bool(is_owned)
}) })
seen_entries.add(entry_key) seen_entries.add(entry_key)

BIN
images/1019_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/1021_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/1023_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/1025_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/1027_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/1029_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/1031_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

BIN
images/1033_Fine Motion.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

View File

@@ -475,40 +475,162 @@ def scrape_hints(page, card_id, cur):
print(f" Found {len(hints)} hints") print(f" Found {len(hints)} hints")
def scrape_events(page, card_id, cur): def scrape_events(page, card_id, cur):
"""Scrape training events""" """Scrape training events with detailed skill rewards (Gold/White/OR)"""
events = page.evaluate("""
# 1. First, build a map of skills from the 'Skills from events' summary section
# This helps us identify which skills are Rare (Gold)
skill_rarity_map = page.evaluate("""
() => { () => {
const events = []; const map = {};
const text = document.body.innerText; // Rare skills use a specific class (e.g., kkspcu) while normal use another (e.g., gImSzc)
// It's safer to find all skill containers in the summary section
const sections = Array.from(document.querySelectorAll('div')).filter(d => d.innerText.startsWith('Skills from events'));
if (sections.length === 0) return map;
const eventsMatch = text.match(/Training [Ee]vents([\\s\\S]*?)(?:$)/); const containers = sections[0].parentElement.querySelectorAll('div[class*="sc-"]');
if (!eventsMatch) return events; containers.forEach(c => {
const nameNode = c.querySelector('div[font-weight="bold"], span[font-weight="bold"]');
const eventsSection = eventsMatch[1]; const name = nameNode ? nameNode.innerText.trim() : c.innerText.split('\\n')[0].trim();
const lines = eventsSection.split('\\n'); if (name && name.length > 2) {
// Check if it has a gold-themed class or computed background color
for (const line of lines) { const isGold = c.className.includes('kkspcu') || window.getComputedStyle(c).backgroundColor.includes('rgb(255, 193, 7)');
const trimmed = line.trim(); map[name] = isGold;
if (trimmed.length > 5 && trimmed.length < 80 &&
!trimmed.includes('%') && !trimmed.includes('Energy') &&
!trimmed.includes('bond') && !trimmed.includes('+') &&
trimmed[0] === trimmed[0].toUpperCase()) {
events.push({ name: trimmed, type: 'Event' });
} }
} });
return map;
return events.slice(0, 15);
} }
""") """)
for event in events: # Enable console logging for debugging
page.on("console", lambda msg: print(f" [JS Console] {msg.text}"))
# Scroll to the Events section specifically
print(" Ensuring events are loaded...")
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }")
page.wait_for_timeout(1000)
# 2. Scrape the individual events and their popover rewards
events_data = page.evaluate("""
async () => {
console.log("Starting event scraping overhaul...");
const events = [];
// Targeted search for event triggers based on section headers
const getTriggers = () => {
const triggers = [];
const headers = Array.from(document.querySelectorAll('div, h2, h3, span')).filter(el =>
el.innerText.includes('Chain Events') || el.innerText.includes('Random Events')
);
headers.forEach(header => {
// Look for buttons in the siblings or children of the parent container
const container = header.parentElement;
if (container) {
const buttons = Array.from(container.querySelectorAll('button'));
buttons.forEach(btn => {
const text = btn.innerText.trim();
if (text && text.length > 5 && !text.includes('Events')) {
triggers.push(btn);
}
});
}
});
return triggers;
};
const buttons = getTriggers();
console.log(`Found ${buttons.length} candidate triggers: ${buttons.map(b => b.innerText.trim()).join(', ')}`);
// Dedup targets by name
const seenNames = new Set();
for (const btn of buttons) {
const eventName = btn.innerText.trim();
if (!eventName || seenNames.has(eventName)) continue;
seenNames.add(eventName);
const eventType = eventName.includes('>') ? 'Chain' : 'Random';
console.log(`Processing event: ${eventName}`);
try {
// Click to open popover
btn.scrollIntoViewIfNeeded ? btn.scrollIntoViewIfNeeded() : null;
await new Promise(r => setTimeout(r, 100));
btn.click();
await new Promise(r => setTimeout(r, 600)); // Wait a bit more
// Find popover - look for any dialogue/popover with the event name
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
d.innerText.includes(eventName) &&
window.getComputedStyle(d).zIndex > 50 &&
d.innerText.length < 2500
);
if (popovers.length > 0) {
const pop = popovers[popovers.length - 1];
console.log(`Found popover for ${eventName}`);
const skills = [];
// Look for 'OR' dividers
const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null ||
pop.innerText.includes('Randomly either') ||
pop.innerText.includes(' or ');
// Find all skill names
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
el.innerText.length > 2 &&
!el.innerText.includes('Energy') &&
!el.innerText.includes('bond') &&
(window.getComputedStyle(el).color === 'rgb(102, 107, 255)' ||
el.className.includes('linkcolor'))
);
console.log(`Found ${skillLinks.length} potential skills in popover`);
skillLinks.forEach(link => {
const skillName = link.innerText.trim();
if (skillName && skillName.length > 2 && !skills.some(s => s.name === skillName)) {
// Check for inline ' or ' text nearby
const textAround = link.parentElement ? link.parentElement.innerText : "";
const isOr = hasOrDivider || textAround.toLowerCase().includes(' or ');
skills.push({ name: skillName, is_or: isOr });
}
});
events.push({ name: eventName, type: eventType, skills: skills });
// Close popover
document.body.click();
await new Promise(r => setTimeout(r, 200));
} else {
console.log(`Popover NOT found for ${eventName}`);
events.push({ name: eventName, type: eventType, skills: [] });
}
} catch (err) {
console.log(`Error clicking ${eventName}: ${err.message}`);
}
}
return events;
}
""")
# 3. Store in database
for event in events_data:
cur.execute(""" cur.execute("""
INSERT INTO support_events (card_id, event_name, event_type) INSERT INTO support_events (card_id, event_name, event_type)
VALUES (?, ?, ?) VALUES (?, ?, ?)
""", (card_id, event.get('name', ''), event.get('type', 'Unknown'))) """, (card_id, event['name'], event['type']))
event_id = cur.lastrowid
if events: for skill in event['skills']:
print(f" Found {len(events)} events") is_gold = 1 if skill_rarity_map.get(skill['name']) else 0
cur.execute("""
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
VALUES (?, ?, ?, ?)
""", (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0))
if events_data:
print(f" Processed {len(events_data)} events with {sum(len(e['skills']) for e in events_data)} skill rewards")
def run_scraper(): def run_scraper():
"""Main scraper function""" """Main scraper function"""