feat: Implement GameTora scraper for Umamusume support cards, including image download and database storage.
@@ -285,10 +285,22 @@ def init_database():
|
||||
skill_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
event_id INTEGER,
|
||||
skill_name TEXT,
|
||||
is_gold INTEGER DEFAULT 0,
|
||||
is_or INTEGER DEFAULT 0,
|
||||
FOREIGN KEY (event_id) REFERENCES support_events(event_id)
|
||||
)
|
||||
""")
|
||||
|
||||
# Migration: Add columns to event_skills if they don't exist
|
||||
try:
|
||||
cur.execute("ALTER TABLE event_skills ADD COLUMN is_gold INTEGER DEFAULT 0")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
try:
|
||||
cur.execute("ALTER TABLE event_skills ADD COLUMN is_or INTEGER DEFAULT 0")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
|
||||
# User tables
|
||||
cur.execute("""
|
||||
CREATE TABLE IF NOT EXISTS owned_cards (
|
||||
@@ -479,26 +491,46 @@ def get_events(card_id):
|
||||
return rows
|
||||
|
||||
def get_all_event_skills(card_id):
|
||||
"""Get all events and their skills for a card"""
|
||||
"""Get all skills from training events for a card"""
|
||||
conn = get_conn()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute("""
|
||||
SELECT se.event_name, es.skill_name
|
||||
SELECT se.event_name, es.skill_name, es.is_gold, es.is_or
|
||||
FROM support_events se
|
||||
LEFT JOIN event_skills es ON se.event_id = es.event_id
|
||||
JOIN event_skills es ON se.event_id = es.event_id
|
||||
WHERE se.card_id = ?
|
||||
ORDER BY se.event_name, es.skill_name
|
||||
""", (card_id,))
|
||||
|
||||
result = {}
|
||||
for event_name, skill_name in cur.fetchall():
|
||||
if event_name not in result:
|
||||
result[event_name] = []
|
||||
if skill_name:
|
||||
result[event_name].append(skill_name)
|
||||
|
||||
# Group by event
|
||||
events = {}
|
||||
for event_name, skill_name, is_gold, is_or in cur.fetchall():
|
||||
if event_name not in events:
|
||||
events[event_name] = {'skills': [], 'or_skills': []}
|
||||
|
||||
prefix = "✨ " if is_gold else ""
|
||||
if is_or:
|
||||
events[event_name]['or_skills'].append(f"{prefix}{skill_name}")
|
||||
else:
|
||||
events[event_name]['skills'].append(f"{prefix}{skill_name}")
|
||||
|
||||
results = []
|
||||
for event_name, data in events.items():
|
||||
event_skills = []
|
||||
if data['or_skills']:
|
||||
event_skills.append(" (OR) ".join(data['or_skills']))
|
||||
event_skills.extend(data['skills'])
|
||||
|
||||
details = f"({', '.join(event_skills)})" if event_skills else ""
|
||||
results.append({
|
||||
'card_id': card_id,
|
||||
'source': 'Event',
|
||||
'skill_name': event_name,
|
||||
'details': details
|
||||
})
|
||||
|
||||
conn.close()
|
||||
return result
|
||||
return results
|
||||
|
||||
# ============================================
|
||||
# Owned Cards (Collection) Queries
|
||||
@@ -827,10 +859,32 @@ def get_cards_with_skill(skill_name):
|
||||
card_id, name, rarity, card_type, image_path, event_name, event_id, is_owned = row
|
||||
event_name = event_name.replace('\n', ' ').strip()
|
||||
|
||||
# Get ALL skills for this event to show in details
|
||||
cur.execute("SELECT skill_name FROM event_skills WHERE event_id = ?", (event_id,))
|
||||
other_skills = [r[0] for r in cur.fetchall()]
|
||||
skills_summary = ", ".join(other_skills)
|
||||
# Format event skills (handle OR groups and gold skills)
|
||||
formatted_event_skills = []
|
||||
cur.execute("""
|
||||
SELECT skill_name, is_gold, is_or
|
||||
FROM event_skills
|
||||
WHERE event_id = ?
|
||||
""", (event_id,))
|
||||
|
||||
skills_data = cur.fetchall()
|
||||
|
||||
or_group_skills = []
|
||||
other_event_skills = []
|
||||
|
||||
for s_name, s_is_gold, s_is_or in skills_data:
|
||||
prefix = "✨ " if s_is_gold else ""
|
||||
if s_is_or:
|
||||
or_group_skills.append(f"{prefix}{s_name}")
|
||||
else:
|
||||
other_event_skills.append(f"{prefix}{s_name}")
|
||||
|
||||
if or_group_skills:
|
||||
formatted_event_skills.append(" (OR) ".join(or_group_skills))
|
||||
formatted_event_skills.extend(other_event_skills)
|
||||
|
||||
# Create a nice string like "Event Name (Skill1, Skill2)"
|
||||
details = f"{event_name} ({', '.join(formatted_event_skills)})" if formatted_event_skills else event_name
|
||||
|
||||
entry_key = (card_id, f'Event: {event_name}')
|
||||
|
||||
@@ -842,7 +896,7 @@ def get_cards_with_skill(skill_name):
|
||||
'type': card_type,
|
||||
'image_path': image_path,
|
||||
'source': 'Event',
|
||||
'details': f"{event_name} ({skills_summary})",
|
||||
'details': details,
|
||||
'is_owned': bool(is_owned)
|
||||
})
|
||||
seen_entries.add(entry_key)
|
||||
|
||||
BIN
images/1019_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1020_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1021_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1022_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1023_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1024_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1025_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1026_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1027_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1028_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1029_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1030_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1031_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1032_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1033_Fine Motion.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
BIN
images/1034_Kitasan Black.png
Normal file
|
After Width: | Height: | Size: 112 KiB |
@@ -475,40 +475,162 @@ def scrape_hints(page, card_id, cur):
|
||||
print(f" Found {len(hints)} hints")
|
||||
|
||||
def scrape_events(page, card_id, cur):
|
||||
"""Scrape training events"""
|
||||
events = page.evaluate("""
|
||||
"""Scrape training events with detailed skill rewards (Gold/White/OR)"""
|
||||
|
||||
# 1. First, build a map of skills from the 'Skills from events' summary section
|
||||
# This helps us identify which skills are Rare (Gold)
|
||||
skill_rarity_map = page.evaluate("""
|
||||
() => {
|
||||
const events = [];
|
||||
const text = document.body.innerText;
|
||||
const map = {};
|
||||
// Rare skills use a specific class (e.g., kkspcu) while normal use another (e.g., gImSzc)
|
||||
// It's safer to find all skill containers in the summary section
|
||||
const sections = Array.from(document.querySelectorAll('div')).filter(d => d.innerText.startsWith('Skills from events'));
|
||||
if (sections.length === 0) return map;
|
||||
|
||||
const eventsMatch = text.match(/Training [Ee]vents([\\s\\S]*?)(?:$)/);
|
||||
if (!eventsMatch) return events;
|
||||
|
||||
const eventsSection = eventsMatch[1];
|
||||
const lines = eventsSection.split('\\n');
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.length > 5 && trimmed.length < 80 &&
|
||||
!trimmed.includes('%') && !trimmed.includes('Energy') &&
|
||||
!trimmed.includes('bond') && !trimmed.includes('+') &&
|
||||
trimmed[0] === trimmed[0].toUpperCase()) {
|
||||
events.push({ name: trimmed, type: 'Event' });
|
||||
const containers = sections[0].parentElement.querySelectorAll('div[class*="sc-"]');
|
||||
containers.forEach(c => {
|
||||
const nameNode = c.querySelector('div[font-weight="bold"], span[font-weight="bold"]');
|
||||
const name = nameNode ? nameNode.innerText.trim() : c.innerText.split('\\n')[0].trim();
|
||||
if (name && name.length > 2) {
|
||||
// Check if it has a gold-themed class or computed background color
|
||||
const isGold = c.className.includes('kkspcu') || window.getComputedStyle(c).backgroundColor.includes('rgb(255, 193, 7)');
|
||||
map[name] = isGold;
|
||||
}
|
||||
}
|
||||
|
||||
return events.slice(0, 15);
|
||||
});
|
||||
return map;
|
||||
}
|
||||
""")
|
||||
|
||||
for event in events:
|
||||
# Enable console logging for debugging
|
||||
page.on("console", lambda msg: print(f" [JS Console] {msg.text}"))
|
||||
|
||||
# Scroll to the Events section specifically
|
||||
print(" Ensuring events are loaded...")
|
||||
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1')).find(el => el.innerText.includes('Training Events')); if (h) h.scrollIntoView(); }")
|
||||
page.wait_for_timeout(1000)
|
||||
|
||||
# 2. Scrape the individual events and their popover rewards
|
||||
events_data = page.evaluate("""
|
||||
async () => {
|
||||
console.log("Starting event scraping overhaul...");
|
||||
const events = [];
|
||||
|
||||
// Targeted search for event triggers based on section headers
|
||||
const getTriggers = () => {
|
||||
const triggers = [];
|
||||
const headers = Array.from(document.querySelectorAll('div, h2, h3, span')).filter(el =>
|
||||
el.innerText.includes('Chain Events') || el.innerText.includes('Random Events')
|
||||
);
|
||||
|
||||
headers.forEach(header => {
|
||||
// Look for buttons in the siblings or children of the parent container
|
||||
const container = header.parentElement;
|
||||
if (container) {
|
||||
const buttons = Array.from(container.querySelectorAll('button'));
|
||||
buttons.forEach(btn => {
|
||||
const text = btn.innerText.trim();
|
||||
if (text && text.length > 5 && !text.includes('Events')) {
|
||||
triggers.push(btn);
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
return triggers;
|
||||
};
|
||||
|
||||
const buttons = getTriggers();
|
||||
console.log(`Found ${buttons.length} candidate triggers: ${buttons.map(b => b.innerText.trim()).join(', ')}`);
|
||||
|
||||
// Dedup targets by name
|
||||
const seenNames = new Set();
|
||||
|
||||
for (const btn of buttons) {
|
||||
const eventName = btn.innerText.trim();
|
||||
if (!eventName || seenNames.has(eventName)) continue;
|
||||
seenNames.add(eventName);
|
||||
|
||||
const eventType = eventName.includes('>') ? 'Chain' : 'Random';
|
||||
console.log(`Processing event: ${eventName}`);
|
||||
|
||||
try {
|
||||
// Click to open popover
|
||||
btn.scrollIntoViewIfNeeded ? btn.scrollIntoViewIfNeeded() : null;
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
btn.click();
|
||||
await new Promise(r => setTimeout(r, 600)); // Wait a bit more
|
||||
|
||||
// Find popover - look for any dialogue/popover with the event name
|
||||
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||
d.innerText.includes(eventName) &&
|
||||
window.getComputedStyle(d).zIndex > 50 &&
|
||||
d.innerText.length < 2500
|
||||
);
|
||||
|
||||
if (popovers.length > 0) {
|
||||
const pop = popovers[popovers.length - 1];
|
||||
console.log(`Found popover for ${eventName}`);
|
||||
const skills = [];
|
||||
|
||||
// Look for 'OR' dividers
|
||||
const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null ||
|
||||
pop.innerText.includes('Randomly either') ||
|
||||
pop.innerText.includes(' or ');
|
||||
|
||||
// Find all skill names
|
||||
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
|
||||
el.innerText.length > 2 &&
|
||||
!el.innerText.includes('Energy') &&
|
||||
!el.innerText.includes('bond') &&
|
||||
(window.getComputedStyle(el).color === 'rgb(102, 107, 255)' ||
|
||||
el.className.includes('linkcolor'))
|
||||
);
|
||||
|
||||
console.log(`Found ${skillLinks.length} potential skills in popover`);
|
||||
|
||||
skillLinks.forEach(link => {
|
||||
const skillName = link.innerText.trim();
|
||||
if (skillName && skillName.length > 2 && !skills.some(s => s.name === skillName)) {
|
||||
// Check for inline ' or ' text nearby
|
||||
const textAround = link.parentElement ? link.parentElement.innerText : "";
|
||||
const isOr = hasOrDivider || textAround.toLowerCase().includes(' or ');
|
||||
skills.push({ name: skillName, is_or: isOr });
|
||||
}
|
||||
});
|
||||
|
||||
events.push({ name: eventName, type: eventType, skills: skills });
|
||||
|
||||
// Close popover
|
||||
document.body.click();
|
||||
await new Promise(r => setTimeout(r, 200));
|
||||
} else {
|
||||
console.log(`Popover NOT found for ${eventName}`);
|
||||
events.push({ name: eventName, type: eventType, skills: [] });
|
||||
}
|
||||
} catch (err) {
|
||||
console.log(`Error clicking ${eventName}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
return events;
|
||||
}
|
||||
""")
|
||||
|
||||
# 3. Store in database
|
||||
for event in events_data:
|
||||
cur.execute("""
|
||||
INSERT INTO support_events (card_id, event_name, event_type)
|
||||
VALUES (?, ?, ?)
|
||||
""", (card_id, event.get('name', ''), event.get('type', 'Unknown')))
|
||||
|
||||
if events:
|
||||
print(f" Found {len(events)} events")
|
||||
""", (card_id, event['name'], event['type']))
|
||||
event_id = cur.lastrowid
|
||||
|
||||
for skill in event['skills']:
|
||||
is_gold = 1 if skill_rarity_map.get(skill['name']) else 0
|
||||
cur.execute("""
|
||||
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0))
|
||||
|
||||
if events_data:
|
||||
print(f" Processed {len(events_data)} events with {sum(len(e['skills']) for e in events_data)} skill rewards")
|
||||
|
||||
def run_scraper():
|
||||
"""Main scraper function"""
|
||||
|
||||