feat: Implement GameTora scraper for Umamusume support cards, including image download and database storage.
@@ -285,10 +285,22 @@ def init_database():
|
|||||||
skill_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
skill_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
event_id INTEGER,
|
event_id INTEGER,
|
||||||
skill_name TEXT,
|
skill_name TEXT,
|
||||||
|
is_gold INTEGER DEFAULT 0,
|
||||||
|
is_or INTEGER DEFAULT 0,
|
||||||
FOREIGN KEY (event_id) REFERENCES support_events(event_id)
|
FOREIGN KEY (event_id) REFERENCES support_events(event_id)
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
# Migration: Add columns to event_skills if they don't exist
|
||||||
|
try:
|
||||||
|
cur.execute("ALTER TABLE event_skills ADD COLUMN is_gold INTEGER DEFAULT 0")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass # Column already exists
|
||||||
|
try:
|
||||||
|
cur.execute("ALTER TABLE event_skills ADD COLUMN is_or INTEGER DEFAULT 0")
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass # Column already exists
|
||||||
|
|
||||||
# User tables
|
# User tables
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS owned_cards (
|
CREATE TABLE IF NOT EXISTS owned_cards (
|
||||||
@@ -479,26 +491,46 @@ def get_events(card_id):
|
|||||||
return rows
|
return rows
|
||||||
|
|
||||||
def get_all_event_skills(card_id):
|
def get_all_event_skills(card_id):
|
||||||
"""Get all events and their skills for a card"""
|
"""Get all skills from training events for a card"""
|
||||||
conn = get_conn()
|
conn = get_conn()
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
|
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
SELECT se.event_name, es.skill_name
|
SELECT se.event_name, es.skill_name, es.is_gold, es.is_or
|
||||||
FROM support_events se
|
FROM support_events se
|
||||||
LEFT JOIN event_skills es ON se.event_id = es.event_id
|
JOIN event_skills es ON se.event_id = es.event_id
|
||||||
WHERE se.card_id = ?
|
WHERE se.card_id = ?
|
||||||
ORDER BY se.event_name, es.skill_name
|
|
||||||
""", (card_id,))
|
""", (card_id,))
|
||||||
|
|
||||||
result = {}
|
# Group by event
|
||||||
for event_name, skill_name in cur.fetchall():
|
events = {}
|
||||||
if event_name not in result:
|
for event_name, skill_name, is_gold, is_or in cur.fetchall():
|
||||||
result[event_name] = []
|
if event_name not in events:
|
||||||
if skill_name:
|
events[event_name] = {'skills': [], 'or_skills': []}
|
||||||
result[event_name].append(skill_name)
|
|
||||||
|
prefix = "✨ " if is_gold else ""
|
||||||
|
if is_or:
|
||||||
|
events[event_name]['or_skills'].append(f"{prefix}{skill_name}")
|
||||||
|
else:
|
||||||
|
events[event_name]['skills'].append(f"{prefix}{skill_name}")
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for event_name, data in events.items():
|
||||||
|
event_skills = []
|
||||||
|
if data['or_skills']:
|
||||||
|
event_skills.append(" (OR) ".join(data['or_skills']))
|
||||||
|
event_skills.extend(data['skills'])
|
||||||
|
|
||||||
|
details = f"({', '.join(event_skills)})" if event_skills else ""
|
||||||
|
results.append({
|
||||||
|
'card_id': card_id,
|
||||||
|
'source': 'Event',
|
||||||
|
'skill_name': event_name,
|
||||||
|
'details': details
|
||||||
|
})
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
return result
|
return results
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# Owned Cards (Collection) Queries
|
# Owned Cards (Collection) Queries
|
||||||
@@ -827,10 +859,32 @@ def get_cards_with_skill(skill_name):
|
|||||||
card_id, name, rarity, card_type, image_path, event_name, event_id, is_owned = row
|
card_id, name, rarity, card_type, image_path, event_name, event_id, is_owned = row
|
||||||
event_name = event_name.replace('\n', ' ').strip()
|
event_name = event_name.replace('\n', ' ').strip()
|
||||||
|
|
||||||
# Get ALL skills for this event to show in details
|
# Format event skills (handle OR groups and gold skills)
|
||||||
cur.execute("SELECT skill_name FROM event_skills WHERE event_id = ?", (event_id,))
|
formatted_event_skills = []
|
||||||
other_skills = [r[0] for r in cur.fetchall()]
|
cur.execute("""
|
||||||
skills_summary = ", ".join(other_skills)
|
SELECT skill_name, is_gold, is_or
|
||||||
|
FROM event_skills
|
||||||
|
WHERE event_id = ?
|
||||||
|
""", (event_id,))
|
||||||
|
|
||||||
|
skills_data = cur.fetchall()
|
||||||
|
|
||||||
|
or_group_skills = []
|
||||||
|
other_event_skills = []
|
||||||
|
|
||||||
|
for s_name, s_is_gold, s_is_or in skills_data:
|
||||||
|
prefix = "✨ " if s_is_gold else ""
|
||||||
|
if s_is_or:
|
||||||
|
or_group_skills.append(f"{prefix}{s_name}")
|
||||||
|
else:
|
||||||
|
other_event_skills.append(f"{prefix}{s_name}")
|
||||||
|
|
||||||
|
if or_group_skills:
|
||||||
|
formatted_event_skills.append(" (OR) ".join(or_group_skills))
|
||||||
|
formatted_event_skills.extend(other_event_skills)
|
||||||
|
|
||||||
|
# Create a nice string like "Event Name (Skill1, Skill2)"
|
||||||
|
details = f"{event_name} ({', '.join(formatted_event_skills)})" if formatted_event_skills else event_name
|
||||||
|
|
||||||
entry_key = (card_id, f'Event: {event_name}')
|
entry_key = (card_id, f'Event: {event_name}')
|
||||||
|
|
||||||
@@ -842,7 +896,7 @@ def get_cards_with_skill(skill_name):
|
|||||||
'type': card_type,
|
'type': card_type,
|
||||||
'image_path': image_path,
|
'image_path': image_path,
|
||||||
'source': 'Event',
|
'source': 'Event',
|
||||||
'details': f"{event_name} ({skills_summary})",
|
'details': details,
|
||||||
'is_owned': bool(is_owned)
|
'is_owned': bool(is_owned)
|
||||||
})
|
})
|
||||||
seen_entries.add(entry_key)
|
seen_entries.add(entry_key)
|
||||||
|
|||||||
BIN
images/1019_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1020_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1021_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1022_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1023_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1024_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1025_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1026_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1027_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1028_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1029_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1030_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1031_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1032_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1033_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1034_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
@@ -475,40 +475,162 @@ def scrape_hints(page, card_id, cur):
|
|||||||
print(f" Found {len(hints)} hints")
|
print(f" Found {len(hints)} hints")
|
||||||
|
|
||||||
def scrape_events(page, card_id, cur):
|
def scrape_events(page, card_id, cur):
|
||||||
"""Scrape training events"""
|
"""Scrape training events with detailed skill rewards (Gold/White/OR)"""
|
||||||
events = page.evaluate("""
|
|
||||||
|
# 1. First, build a map of skills from the 'Skills from events' summary section
|
||||||
|
# This helps us identify which skills are Rare (Gold)
|
||||||
|
skill_rarity_map = page.evaluate("""
|
||||||
() => {
|
() => {
|
||||||
const events = [];
|
const map = {};
|
||||||
const text = document.body.innerText;
|
// Rare skills use a specific class (e.g., kkspcu) while normal use another (e.g., gImSzc)
|
||||||
|
// It's safer to find all skill containers in the summary section
|
||||||
|
const sections = Array.from(document.querySelectorAll('div')).filter(d => d.innerText.startsWith('Skills from events'));
|
||||||
|
if (sections.length === 0) return map;
|
||||||
|
|
||||||
const eventsMatch = text.match(/Training [Ee]vents([\\s\\S]*?)(?:$)/);
|
const containers = sections[0].parentElement.querySelectorAll('div[class*="sc-"]');
|
||||||
if (!eventsMatch) return events;
|
containers.forEach(c => {
|
||||||
|
const nameNode = c.querySelector('div[font-weight="bold"], span[font-weight="bold"]');
|
||||||
const eventsSection = eventsMatch[1];
|
const name = nameNode ? nameNode.innerText.trim() : c.innerText.split('\\n')[0].trim();
|
||||||
const lines = eventsSection.split('\\n');
|
if (name && name.length > 2) {
|
||||||
|
// Check if it has a gold-themed class or computed background color
|
||||||
for (const line of lines) {
|
const isGold = c.className.includes('kkspcu') || window.getComputedStyle(c).backgroundColor.includes('rgb(255, 193, 7)');
|
||||||
const trimmed = line.trim();
|
map[name] = isGold;
|
||||||
if (trimmed.length > 5 && trimmed.length < 80 &&
|
|
||||||
!trimmed.includes('%') && !trimmed.includes('Energy') &&
|
|
||||||
!trimmed.includes('bond') && !trimmed.includes('+') &&
|
|
||||||
trimmed[0] === trimmed[0].toUpperCase()) {
|
|
||||||
events.push({ name: trimmed, type: 'Event' });
|
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
return map;
|
||||||
return events.slice(0, 15);
|
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
for event in events:
|
# Enable console logging for debugging
|
||||||
|
page.on("console", lambda msg: print(f" [JS Console] {msg.text}"))
|
||||||
|
|
||||||
|
# Scroll to the Events section specifically
|
||||||
|
print(" Ensuring events are loaded...")
|
||||||
|
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }")
|
||||||
|
page.wait_for_timeout(1000)
|
||||||
|
|
||||||
|
# 2. Scrape the individual events and their popover rewards
|
||||||
|
events_data = page.evaluate("""
|
||||||
|
async () => {
|
||||||
|
console.log("Starting event scraping overhaul...");
|
||||||
|
const events = [];
|
||||||
|
|
||||||
|
// Targeted search for event triggers based on section headers
|
||||||
|
const getTriggers = () => {
|
||||||
|
const triggers = [];
|
||||||
|
const headers = Array.from(document.querySelectorAll('div, h2, h3, span')).filter(el =>
|
||||||
|
el.innerText.includes('Chain Events') || el.innerText.includes('Random Events')
|
||||||
|
);
|
||||||
|
|
||||||
|
headers.forEach(header => {
|
||||||
|
// Look for buttons in the siblings or children of the parent container
|
||||||
|
const container = header.parentElement;
|
||||||
|
if (container) {
|
||||||
|
const buttons = Array.from(container.querySelectorAll('button'));
|
||||||
|
buttons.forEach(btn => {
|
||||||
|
const text = btn.innerText.trim();
|
||||||
|
if (text && text.length > 5 && !text.includes('Events')) {
|
||||||
|
triggers.push(btn);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return triggers;
|
||||||
|
};
|
||||||
|
|
||||||
|
const buttons = getTriggers();
|
||||||
|
console.log(`Found ${buttons.length} candidate triggers: ${buttons.map(b => b.innerText.trim()).join(', ')}`);
|
||||||
|
|
||||||
|
// Dedup targets by name
|
||||||
|
const seenNames = new Set();
|
||||||
|
|
||||||
|
for (const btn of buttons) {
|
||||||
|
const eventName = btn.innerText.trim();
|
||||||
|
if (!eventName || seenNames.has(eventName)) continue;
|
||||||
|
seenNames.add(eventName);
|
||||||
|
|
||||||
|
const eventType = eventName.includes('>') ? 'Chain' : 'Random';
|
||||||
|
console.log(`Processing event: ${eventName}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Click to open popover
|
||||||
|
btn.scrollIntoViewIfNeeded ? btn.scrollIntoViewIfNeeded() : null;
|
||||||
|
await new Promise(r => setTimeout(r, 100));
|
||||||
|
btn.click();
|
||||||
|
await new Promise(r => setTimeout(r, 600)); // Wait a bit more
|
||||||
|
|
||||||
|
// Find popover - look for any dialogue/popover with the event name
|
||||||
|
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||||
|
d.innerText.includes(eventName) &&
|
||||||
|
window.getComputedStyle(d).zIndex > 50 &&
|
||||||
|
d.innerText.length < 2500
|
||||||
|
);
|
||||||
|
|
||||||
|
if (popovers.length > 0) {
|
||||||
|
const pop = popovers[popovers.length - 1];
|
||||||
|
console.log(`Found popover for ${eventName}`);
|
||||||
|
const skills = [];
|
||||||
|
|
||||||
|
// Look for 'OR' dividers
|
||||||
|
const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null ||
|
||||||
|
pop.innerText.includes('Randomly either') ||
|
||||||
|
pop.innerText.includes(' or ');
|
||||||
|
|
||||||
|
// Find all skill names
|
||||||
|
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
|
||||||
|
el.innerText.length > 2 &&
|
||||||
|
!el.innerText.includes('Energy') &&
|
||||||
|
!el.innerText.includes('bond') &&
|
||||||
|
(window.getComputedStyle(el).color === 'rgb(102, 107, 255)' ||
|
||||||
|
el.className.includes('linkcolor'))
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`Found ${skillLinks.length} potential skills in popover`);
|
||||||
|
|
||||||
|
skillLinks.forEach(link => {
|
||||||
|
const skillName = link.innerText.trim();
|
||||||
|
if (skillName && skillName.length > 2 && !skills.some(s => s.name === skillName)) {
|
||||||
|
// Check for inline ' or ' text nearby
|
||||||
|
const textAround = link.parentElement ? link.parentElement.innerText : "";
|
||||||
|
const isOr = hasOrDivider || textAround.toLowerCase().includes(' or ');
|
||||||
|
skills.push({ name: skillName, is_or: isOr });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
events.push({ name: eventName, type: eventType, skills: skills });
|
||||||
|
|
||||||
|
// Close popover
|
||||||
|
document.body.click();
|
||||||
|
await new Promise(r => setTimeout(r, 200));
|
||||||
|
} else {
|
||||||
|
console.log(`Popover NOT found for ${eventName}`);
|
||||||
|
events.push({ name: eventName, type: eventType, skills: [] });
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`Error clicking ${eventName}: ${err.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return events;
|
||||||
|
}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# 3. Store in database
|
||||||
|
for event in events_data:
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
INSERT INTO support_events (card_id, event_name, event_type)
|
INSERT INTO support_events (card_id, event_name, event_type)
|
||||||
VALUES (?, ?, ?)
|
VALUES (?, ?, ?)
|
||||||
""", (card_id, event.get('name', ''), event.get('type', 'Unknown')))
|
""", (card_id, event['name'], event['type']))
|
||||||
|
event_id = cur.lastrowid
|
||||||
|
|
||||||
if events:
|
for skill in event['skills']:
|
||||||
print(f" Found {len(events)} events")
|
is_gold = 1 if skill_rarity_map.get(skill['name']) else 0
|
||||||
|
cur.execute("""
|
||||||
|
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
""", (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0))
|
||||||
|
|
||||||
|
if events_data:
|
||||||
|
print(f" Processed {len(events_data)} events with {sum(len(e['skills']) for e in events_data)} skill rewards")
|
||||||
|
|
||||||
def run_scraper():
|
def run_scraper():
|
||||||
"""Main scraper function"""
|
"""Main scraper function"""
|
||||||
|
|||||||