feat: Add database query functions, schema migrations, version-based syncing, a seed database, and a Gametora scraper.
This commit is contained in:
Binary file not shown.
@@ -108,8 +108,44 @@ def run_migrations():
|
||||
pass # Column already exists
|
||||
|
||||
conn.commit()
|
||||
repair_image_paths(conn)
|
||||
conn.close()
|
||||
|
||||
def repair_image_paths(conn):
|
||||
"""Attempt to populate missing image_path for existing cards in old databases"""
|
||||
print("Checking for missing image paths to repair...")
|
||||
cur = conn.cursor()
|
||||
|
||||
# Find cards with missing image paths but have a URL
|
||||
cur.execute("SELECT card_id, name, gametora_url FROM support_cards WHERE image_path IS NULL OR image_path = ''")
|
||||
to_repair = cur.fetchall()
|
||||
|
||||
if not to_repair:
|
||||
return
|
||||
|
||||
import re
|
||||
repaired_count = 0
|
||||
|
||||
for card_id, name, url in to_repair:
|
||||
if not url: continue
|
||||
|
||||
# Extract ID from URL (e.g., 30154 from .../supports/30154-mejiro-ramonu)
|
||||
match = re.search(r'/supports/(\d+)-', url)
|
||||
if match:
|
||||
stable_id = match.group(1)
|
||||
# Create safe filename matching scraper logic
|
||||
safe_name = re.sub(r'[<>:"/\\\\|?*]', '_', name)
|
||||
filename = f"{stable_id}_{safe_name}.png"
|
||||
|
||||
# Update DB with images/filename
|
||||
cur.execute("UPDATE support_cards SET image_path = ? WHERE card_id = ?",
|
||||
(f"images/{filename}", card_id))
|
||||
repaired_count += 1
|
||||
|
||||
if repaired_count > 0:
|
||||
conn.commit()
|
||||
print(f"Successfully repaired {repaired_count} image paths!")
|
||||
|
||||
def check_for_updates():
|
||||
"""Check if database version matches app version, sync if outdated"""
|
||||
if getattr(sys, 'frozen', False):
|
||||
|
||||
@@ -549,242 +549,200 @@ def scrape_hints(page, card_id, cur):
|
||||
print(f" Found {len(hints)} hints")
|
||||
|
||||
def scrape_events(page, card_id, cur):
|
||||
"""Scrape the LAST chain event (Golden Perk) with OR options"""
|
||||
"""Scrape all events including Chain, Dates, Random and Special"""
|
||||
|
||||
# Use a flag to avoid adding multiple console listeners
|
||||
if not hasattr(page, "_console_attached"):
|
||||
page.on("console", lambda msg: print(f" [JS Console] {msg.text}") if "scrapping" not in msg.text.lower() else None)
|
||||
page._console_attached = True
|
||||
|
||||
# 1. First, build a map of skills from the 'Skills from events' summary section
|
||||
# This helps us identify which skills are Rare (Gold)
|
||||
# 1. Build a map of skills from the 'Skills from events' summary section
|
||||
# This remains useful for identifying golden skills.
|
||||
skill_rarity_map = page.evaluate("""
|
||||
() => {
|
||||
const map = {};
|
||||
console.log("Building Skill Rarity Map...");
|
||||
|
||||
// 1. Find all skill containers. They usually have a name and a 'Details' button.
|
||||
// In the "Skills from events" or "Support hints" sections.
|
||||
const containers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||
(d.innerText.includes('Details') || d.innerText.includes('Reward')) && d.innerText.length < 500
|
||||
);
|
||||
|
||||
containers.forEach(c => {
|
||||
// Try to extract the skill name. It's usually the first text node or a bold tag.
|
||||
const nameNode = c.querySelector('b, span[font-weight="bold"], div[font-weight="bold"]');
|
||||
let name = "";
|
||||
if (nameNode) {
|
||||
name = nameNode.innerText.trim();
|
||||
} else {
|
||||
// Fallback to text before 'Details'
|
||||
name = c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim();
|
||||
}
|
||||
let name = nameNode ? nameNode.innerText.trim() : c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim();
|
||||
|
||||
if (name && name.length > 2) {
|
||||
const style = window.getComputedStyle(c);
|
||||
const nameStyle = nameNode ? window.getComputedStyle(nameNode) : null;
|
||||
|
||||
// Golden skills have a specific background
|
||||
const isGold = style.backgroundImage.includes('linear-gradient') ||
|
||||
style.backgroundColor.includes('rgb(255, 193, 7)') ||
|
||||
(nameStyle && nameStyle.color === 'rgb(255, 193, 7)') ||
|
||||
c.className.includes('kkspcu') ||
|
||||
c.innerHTML.includes('kkspcu');
|
||||
c.className.includes('kkspcu');
|
||||
|
||||
const normalized = name.toLowerCase().replace(/\\s+/g, ' ').replace(/[()()-]/g, '').trim();
|
||||
map[normalized] = isGold;
|
||||
console.log(`Mapped Skill: "${name}" [${normalized}] -> Gold: ${isGold}`);
|
||||
}
|
||||
});
|
||||
return map;
|
||||
}
|
||||
""")
|
||||
|
||||
# Scroll to the Events section specifically
|
||||
print(" Ensuring events are loaded...")
|
||||
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1, div')).find(el => el.innerText.toLowerCase().includes('training events')); if (h) h.scrollIntoView(); }")
|
||||
page.wait_for_timeout(1000)
|
||||
|
||||
# 2. Scrape ONLY the LAST chain event (Golden Perk) with OR options
|
||||
golden_perk_data = page.evaluate("""
|
||||
# 2. Scrape all event types
|
||||
print(" Scraping all event categories...")
|
||||
all_events_data = page.evaluate("""
|
||||
async () => {
|
||||
console.log("Scraping Golden Perk (last chain event)...");
|
||||
const results = [];
|
||||
|
||||
// Find all chain event buttons
|
||||
const getChainEventButtons = () => {
|
||||
const buttons = [];
|
||||
// Look for "Chain Events" text (case-insensitive substring)
|
||||
const labels = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el =>
|
||||
el.innerText.toLowerCase().includes('chain events') && el.innerText.trim().length < 20
|
||||
// Define categories to look for
|
||||
const categories = [
|
||||
{ label: 'Chain Events', type: 'Chain' },
|
||||
{ label: 'Dates', type: 'Date' },
|
||||
{ label: 'Random Events', type: 'Random' },
|
||||
{ label: 'Special Events', type: 'Special' }
|
||||
];
|
||||
|
||||
for (const cat of categories) {
|
||||
// Find category headers
|
||||
const headers = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el =>
|
||||
el.innerText.trim() === cat.label && el.children.length === 0
|
||||
);
|
||||
|
||||
labels.forEach(label => {
|
||||
// The buttons are usually in the same container or next container
|
||||
let container = label.parentElement;
|
||||
for (const header of headers) {
|
||||
// Find buttons in the following siblings or parent siblings
|
||||
let container = header.parentElement;
|
||||
let foundButtons = [];
|
||||
let attempts = 0;
|
||||
while (container && container.querySelectorAll('button').length === 0 && attempts < 5) {
|
||||
container = container.nextElementSibling || container.parentElement;
|
||||
attempts++;
|
||||
if (container && container.tagName === 'BODY') break;
|
||||
}
|
||||
|
||||
if (container) {
|
||||
const btns = Array.from(container.querySelectorAll('button'));
|
||||
btns.forEach(btn => {
|
||||
const text = btn.innerText.trim();
|
||||
while (container && foundButtons.length === 0 && attempts < 5) {
|
||||
foundButtons = Array.from(container.querySelectorAll('button')).filter(btn => {
|
||||
const style = window.getComputedStyle(btn);
|
||||
const isVisible = style.display !== 'none' && style.visibility !== 'hidden';
|
||||
|
||||
// Look for arrows (regular or heavy)
|
||||
if (isVisible && (text.includes('>') || text.includes('❯'))) {
|
||||
buttons.push(btn);
|
||||
}
|
||||
return style.display !== 'none' && style.visibility !== 'hidden';
|
||||
});
|
||||
|
||||
if (foundButtons.length === 0) {
|
||||
// Check next siblings of the header's ancestors
|
||||
let sibling = header;
|
||||
let parent = header.parentElement;
|
||||
while(parent && parent.tagName !== 'BODY') {
|
||||
if (parent.innerText.includes(cat.label)) {
|
||||
const next = parent.nextElementSibling;
|
||||
if (next) {
|
||||
const nextBtns = Array.from(next.querySelectorAll('button'));
|
||||
if (nextBtns.length > 0) {
|
||||
foundButtons = nextBtns;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
parent = parent.parentElement;
|
||||
}
|
||||
}
|
||||
container = container.parentElement;
|
||||
attempts++;
|
||||
}
|
||||
});
|
||||
return buttons;
|
||||
};
|
||||
|
||||
const buttons = getChainEventButtons();
|
||||
console.log(`Found ${buttons.length} chain event buttons`);
|
||||
// Scrape each button in the category
|
||||
for (const btn of foundButtons) {
|
||||
const eventName = btn.innerText.trim();
|
||||
if (!eventName || results.some(r => r.name === eventName)) continue;
|
||||
|
||||
if (buttons.length === 0) {
|
||||
return null;
|
||||
}
|
||||
// Count arrows for chain/date importance
|
||||
const arrows = (eventName.match(/>|❯/g) || []).length;
|
||||
|
||||
let goldenPerkButton = null;
|
||||
let maxArrows = 0;
|
||||
try {
|
||||
btn.scrollIntoViewIfNeeded ? btn.scrollIntoViewIfNeeded() : null;
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
btn.click();
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
|
||||
for (const btn of buttons) {
|
||||
const text = btn.innerText.trim();
|
||||
// Count both regular and heavy arrows
|
||||
const arrowCount = (text.match(/>|❯/g) || []).length;
|
||||
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||
d.innerText.includes(eventName) &&
|
||||
window.getComputedStyle(d).zIndex > 50 &&
|
||||
d.innerText.length < 2500
|
||||
);
|
||||
|
||||
// If it has three heavy arrows, it's almost certainly the golden perk
|
||||
if (text.includes('❯❯❯')) {
|
||||
goldenPerkButton = btn;
|
||||
break;
|
||||
}
|
||||
if (popovers.length > 0) {
|
||||
const pop = popovers[popovers.length - 1];
|
||||
const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null ||
|
||||
pop.innerText.includes('Randomly either') ||
|
||||
pop.innerText.toLowerCase().includes(' or ');
|
||||
|
||||
if (arrowCount > maxArrows) {
|
||||
maxArrows = arrowCount;
|
||||
goldenPerkButton = btn;
|
||||
}
|
||||
}
|
||||
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
|
||||
el.innerText.length > 2 &&
|
||||
!el.innerText.includes('Energy') &&
|
||||
(window.getComputedStyle(el).color === 'rgb(102, 107, 255)' ||
|
||||
el.className.includes('linkcolor'))
|
||||
);
|
||||
|
||||
if (!goldenPerkButton) {
|
||||
console.log("No golden perk button found");
|
||||
return null;
|
||||
}
|
||||
const skills = [];
|
||||
skillLinks.forEach(link => {
|
||||
const sName = link.innerText.trim();
|
||||
if (sName && !skills.some(s => s.name === sName)) {
|
||||
skills.push({ name: sName, is_or: hasOrDivider });
|
||||
}
|
||||
});
|
||||
|
||||
const eventName = goldenPerkButton.innerText.trim();
|
||||
console.log(`Found Golden Perk: ${eventName} (${maxArrows} arrows)`);
|
||||
results.push({
|
||||
name: eventName,
|
||||
type: cat.type,
|
||||
skills: skills,
|
||||
arrows: arrows
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
// Click to open popover
|
||||
goldenPerkButton.scrollIntoViewIfNeeded ? goldenPerkButton.scrollIntoViewIfNeeded() : null;
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
goldenPerkButton.click();
|
||||
await new Promise(r => setTimeout(r, 600));
|
||||
|
||||
// Find popover
|
||||
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||
d.innerText.includes(eventName) &&
|
||||
window.getComputedStyle(d).zIndex > 50 &&
|
||||
d.innerText.length < 2500
|
||||
);
|
||||
|
||||
if (popovers.length === 0) {
|
||||
console.log(`Popover NOT found for ${eventName}`);
|
||||
document.body.click();
|
||||
return { name: eventName, type: 'Chain', skills: [] };
|
||||
}
|
||||
|
||||
const pop = popovers[popovers.length - 1];
|
||||
console.log(`Found popover for ${eventName}`);
|
||||
|
||||
// Check for OR structure - look for "Randomly either" or "or" divider
|
||||
const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null ||
|
||||
pop.innerText.includes('Randomly either') ||
|
||||
pop.innerText.toLowerCase().includes(' or ');
|
||||
|
||||
// Find all skill names (purple/blue links)
|
||||
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
|
||||
el.innerText.length > 2 &&
|
||||
!el.innerText.includes('Energy') &&
|
||||
!el.innerText.includes('bond') &&
|
||||
(window.getComputedStyle(el).color === 'rgb(102, 107, 255)' ||
|
||||
el.className.includes('linkcolor'))
|
||||
);
|
||||
|
||||
console.log(`Found ${skillLinks.length} potential skills in popover`);
|
||||
|
||||
const skills = [];
|
||||
skillLinks.forEach(link => {
|
||||
const skillName = link.innerText.trim();
|
||||
if (skillName && skillName.length > 2 && !skills.some(s => s.name === skillName)) {
|
||||
// If there's an OR divider, all skills in this popover are part of OR groups
|
||||
const isOr = hasOrDivider;
|
||||
skills.push({ name: skillName, is_or: isOr });
|
||||
document.body.click();
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
} catch (err) {
|
||||
console.log(`Failed to scrape event ${eventName}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Close popover
|
||||
document.body.click();
|
||||
await new Promise(r => setTimeout(r, 200));
|
||||
|
||||
return { name: eventName, type: 'Chain', skills: skills };
|
||||
|
||||
} catch (err) {
|
||||
console.log(`Error clicking ${eventName}: ${err.message}`);
|
||||
return { name: eventName, type: 'Chain', skills: [] };
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
""")
|
||||
|
||||
# 3. Store ONLY the golden perk in database
|
||||
if golden_perk_data:
|
||||
cur.execute("""
|
||||
INSERT INTO support_events (card_id, event_name, event_type)
|
||||
VALUES (?, ?, ?)
|
||||
""", (card_id, golden_perk_data['name'], golden_perk_data['type']))
|
||||
event_id = cur.lastrowid
|
||||
# 3. Store all found events and identify golden skills
|
||||
if all_events_data:
|
||||
# Determine max arrows for Chain and Dates to identify final step
|
||||
max_arrows = {
|
||||
'Chain': max([e['arrows'] for e in all_events_data if e['type'] == 'Chain'] + [0]),
|
||||
'Date': max([e['arrows'] for e in all_events_data if e['type'] == 'Date'] + [0])
|
||||
}
|
||||
|
||||
for skill in golden_perk_data['skills']:
|
||||
# Normalization helper
|
||||
def normalize(s):
|
||||
return s.lower().replace(" hint +1", "").replace(" hint +3", "").replace(" hint +5", "").replace(" hint +", "").strip().replace(" ", " ").replace("-", "").replace("(", "").replace(")", "").replace(" ", "")
|
||||
for event in all_events_data:
|
||||
cur.execute("INSERT INTO support_events (card_id, event_name, event_type) VALUES (?, ?, ?)",
|
||||
(card_id, event['name'], event['type']))
|
||||
event_id = cur.lastrowid
|
||||
|
||||
skill_name = normalize(skill['name'])
|
||||
for skill in event['skills']:
|
||||
def normalize(s):
|
||||
# Remove hint suffix and special characters
|
||||
s = s.lower().split(' hint +')[0]
|
||||
return re.sub(r'[()()\-\s\+]', '', s).strip()
|
||||
|
||||
# Use extra aggressive name matching against the map values
|
||||
# (The map keys are already normalized)
|
||||
is_gold = 0
|
||||
for k, gold in skill_rarity_map.items():
|
||||
if normalize(k) == skill_name:
|
||||
is_gold = 1 if gold else 0
|
||||
break
|
||||
n_name = normalize(skill['name'])
|
||||
is_gold = 0
|
||||
for k, gold in skill_rarity_map.items():
|
||||
if normalize(k) == n_name:
|
||||
is_gold = 1 if gold else 0
|
||||
break
|
||||
|
||||
# Fallback 1: If it's a chain event and specifically the last one, it's almost certainly gold
|
||||
if not is_gold and golden_perk_data.get('type') == 'Chain':
|
||||
# Check for "hint" patterns which usually accompany gold perks in chain events
|
||||
if "hint +" in skill['name'].lower() or len(golden_perk_data['skills']) <= 2:
|
||||
is_gold = 1
|
||||
print(f" ✨ Golden Skill Fallback (Last Chain Event): {skill['name']}")
|
||||
# Heuristic: If it's the last step of a Chain or Date, it's likely gold
|
||||
if not is_gold and event['type'] in ['Chain', 'Date']:
|
||||
if event['arrows'] >= 3 and event['arrows'] == max_arrows[event['type']]:
|
||||
if len(event['skills']) <= 2 or "hint +" in skill['name'].lower():
|
||||
is_gold = 1
|
||||
print(f" ✨ Heuristic Gold: {skill['name']} in {event['name']}")
|
||||
|
||||
if is_gold:
|
||||
print(f" ✨ Golden Skill Verified: {skill['name']}")
|
||||
if is_gold: print(f" ✨ Verified Gold: {skill['name']}")
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0))
|
||||
cur.execute("""
|
||||
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0))
|
||||
|
||||
skill_count = len(golden_perk_data['skills'])
|
||||
or_count = sum(1 for s in golden_perk_data['skills'] if s['is_or'])
|
||||
print(f" Golden Perk: {golden_perk_data['name']} ({skill_count} skills, {or_count} with OR)")
|
||||
print(f" Scraped {len(all_events_data)} total events.")
|
||||
else:
|
||||
print(f" No Golden Perk found for this card")
|
||||
print(f" No events found.")
|
||||
|
||||
def run_scraper():
|
||||
""" Run the web scraper to fetch card data from GameTora.com """
|
||||
|
||||
@@ -4,7 +4,7 @@ This file is the single source of truth for the application version.
|
||||
"""
|
||||
|
||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||
VERSION: str = "13.0.3"
|
||||
VERSION: str = "13.0.6"
|
||||
|
||||
# Application metadata
|
||||
APP_NAME: str = "UmamusumeCardManager"
|
||||
|
||||
Reference in New Issue
Block a user