feat: Add database query functions, schema migrations, version-based syncing, a seed database, and a Gametora scraper.
This commit is contained in:
Binary file not shown.
@@ -108,8 +108,44 @@ def run_migrations():
|
|||||||
pass # Column already exists
|
pass # Column already exists
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
repair_image_paths(conn)
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
def repair_image_paths(conn):
|
||||||
|
"""Attempt to populate missing image_path for existing cards in old databases"""
|
||||||
|
print("Checking for missing image paths to repair...")
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
# Find cards with missing image paths but have a URL
|
||||||
|
cur.execute("SELECT card_id, name, gametora_url FROM support_cards WHERE image_path IS NULL OR image_path = ''")
|
||||||
|
to_repair = cur.fetchall()
|
||||||
|
|
||||||
|
if not to_repair:
|
||||||
|
return
|
||||||
|
|
||||||
|
import re
|
||||||
|
repaired_count = 0
|
||||||
|
|
||||||
|
for card_id, name, url in to_repair:
|
||||||
|
if not url: continue
|
||||||
|
|
||||||
|
# Extract ID from URL (e.g., 30154 from .../supports/30154-mejiro-ramonu)
|
||||||
|
match = re.search(r'/supports/(\d+)-', url)
|
||||||
|
if match:
|
||||||
|
stable_id = match.group(1)
|
||||||
|
# Create safe filename matching scraper logic
|
||||||
|
safe_name = re.sub(r'[<>:"/\\\\|?*]', '_', name)
|
||||||
|
filename = f"{stable_id}_{safe_name}.png"
|
||||||
|
|
||||||
|
# Update DB with images/filename
|
||||||
|
cur.execute("UPDATE support_cards SET image_path = ? WHERE card_id = ?",
|
||||||
|
(f"images/{filename}", card_id))
|
||||||
|
repaired_count += 1
|
||||||
|
|
||||||
|
if repaired_count > 0:
|
||||||
|
conn.commit()
|
||||||
|
print(f"Successfully repaired {repaired_count} image paths!")
|
||||||
|
|
||||||
def check_for_updates():
|
def check_for_updates():
|
||||||
"""Check if database version matches app version, sync if outdated"""
|
"""Check if database version matches app version, sync if outdated"""
|
||||||
if getattr(sys, 'frozen', False):
|
if getattr(sys, 'frozen', False):
|
||||||
|
|||||||
@@ -549,242 +549,200 @@ def scrape_hints(page, card_id, cur):
|
|||||||
print(f" Found {len(hints)} hints")
|
print(f" Found {len(hints)} hints")
|
||||||
|
|
||||||
def scrape_events(page, card_id, cur):
|
def scrape_events(page, card_id, cur):
|
||||||
"""Scrape the LAST chain event (Golden Perk) with OR options"""
|
"""Scrape all events including Chain, Dates, Random and Special"""
|
||||||
|
|
||||||
# Use a flag to avoid adding multiple console listeners
|
# Use a flag to avoid adding multiple console listeners
|
||||||
if not hasattr(page, "_console_attached"):
|
if not hasattr(page, "_console_attached"):
|
||||||
page.on("console", lambda msg: print(f" [JS Console] {msg.text}") if "scrapping" not in msg.text.lower() else None)
|
page.on("console", lambda msg: print(f" [JS Console] {msg.text}") if "scrapping" not in msg.text.lower() else None)
|
||||||
page._console_attached = True
|
page._console_attached = True
|
||||||
|
|
||||||
# 1. First, build a map of skills from the 'Skills from events' summary section
|
# 1. Build a map of skills from the 'Skills from events' summary section
|
||||||
# This helps us identify which skills are Rare (Gold)
|
# This remains useful for identifying golden skills.
|
||||||
skill_rarity_map = page.evaluate("""
|
skill_rarity_map = page.evaluate("""
|
||||||
() => {
|
() => {
|
||||||
const map = {};
|
const map = {};
|
||||||
console.log("Building Skill Rarity Map...");
|
|
||||||
|
|
||||||
// 1. Find all skill containers. They usually have a name and a 'Details' button.
|
|
||||||
// In the "Skills from events" or "Support hints" sections.
|
|
||||||
const containers = Array.from(document.querySelectorAll('div')).filter(d =>
|
const containers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||||
(d.innerText.includes('Details') || d.innerText.includes('Reward')) && d.innerText.length < 500
|
(d.innerText.includes('Details') || d.innerText.includes('Reward')) && d.innerText.length < 500
|
||||||
);
|
);
|
||||||
|
|
||||||
containers.forEach(c => {
|
containers.forEach(c => {
|
||||||
// Try to extract the skill name. It's usually the first text node or a bold tag.
|
|
||||||
const nameNode = c.querySelector('b, span[font-weight="bold"], div[font-weight="bold"]');
|
const nameNode = c.querySelector('b, span[font-weight="bold"], div[font-weight="bold"]');
|
||||||
let name = "";
|
let name = nameNode ? nameNode.innerText.trim() : c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim();
|
||||||
if (nameNode) {
|
|
||||||
name = nameNode.innerText.trim();
|
|
||||||
} else {
|
|
||||||
// Fallback to text before 'Details'
|
|
||||||
name = c.innerText.split('Details')[0].replace(/\\n/g, ' ').trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (name && name.length > 2) {
|
if (name && name.length > 2) {
|
||||||
const style = window.getComputedStyle(c);
|
const style = window.getComputedStyle(c);
|
||||||
const nameStyle = nameNode ? window.getComputedStyle(nameNode) : null;
|
const nameStyle = nameNode ? window.getComputedStyle(nameNode) : null;
|
||||||
|
|
||||||
// Golden skills have a specific background
|
|
||||||
const isGold = style.backgroundImage.includes('linear-gradient') ||
|
const isGold = style.backgroundImage.includes('linear-gradient') ||
|
||||||
style.backgroundColor.includes('rgb(255, 193, 7)') ||
|
style.backgroundColor.includes('rgb(255, 193, 7)') ||
|
||||||
(nameStyle && nameStyle.color === 'rgb(255, 193, 7)') ||
|
(nameStyle && nameStyle.color === 'rgb(255, 193, 7)') ||
|
||||||
c.className.includes('kkspcu') ||
|
c.className.includes('kkspcu');
|
||||||
c.innerHTML.includes('kkspcu');
|
|
||||||
|
|
||||||
const normalized = name.toLowerCase().replace(/\\s+/g, ' ').replace(/[()()-]/g, '').trim();
|
const normalized = name.toLowerCase().replace(/\\s+/g, ' ').replace(/[()()-]/g, '').trim();
|
||||||
map[normalized] = isGold;
|
map[normalized] = isGold;
|
||||||
console.log(`Mapped Skill: "${name}" [${normalized}] -> Gold: ${isGold}`);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Scroll to the Events section specifically
|
# 2. Scrape all event types
|
||||||
print(" Ensuring events are loaded...")
|
print(" Scraping all event categories...")
|
||||||
page.evaluate("() => { const h = Array.from(document.querySelectorAll('h2, h1, div')).find(el => el.innerText.toLowerCase().includes('training events')); if (h) h.scrollIntoView(); }")
|
all_events_data = page.evaluate("""
|
||||||
page.wait_for_timeout(1000)
|
|
||||||
|
|
||||||
# 2. Scrape ONLY the LAST chain event (Golden Perk) with OR options
|
|
||||||
golden_perk_data = page.evaluate("""
|
|
||||||
async () => {
|
async () => {
|
||||||
console.log("Scraping Golden Perk (last chain event)...");
|
const results = [];
|
||||||
|
|
||||||
// Find all chain event buttons
|
// Define categories to look for
|
||||||
const getChainEventButtons = () => {
|
const categories = [
|
||||||
const buttons = [];
|
{ label: 'Chain Events', type: 'Chain' },
|
||||||
// Look for "Chain Events" text (case-insensitive substring)
|
{ label: 'Dates', type: 'Date' },
|
||||||
const labels = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el =>
|
{ label: 'Random Events', type: 'Random' },
|
||||||
el.innerText.toLowerCase().includes('chain events') && el.innerText.trim().length < 20
|
{ label: 'Special Events', type: 'Special' }
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const cat of categories) {
|
||||||
|
// Find category headers
|
||||||
|
const headers = Array.from(document.querySelectorAll('div, span, h2, h3, h4')).filter(el =>
|
||||||
|
el.innerText.trim() === cat.label && el.children.length === 0
|
||||||
);
|
);
|
||||||
|
|
||||||
labels.forEach(label => {
|
for (const header of headers) {
|
||||||
// The buttons are usually in the same container or next container
|
// Find buttons in the following siblings or parent siblings
|
||||||
let container = label.parentElement;
|
let container = header.parentElement;
|
||||||
|
let foundButtons = [];
|
||||||
let attempts = 0;
|
let attempts = 0;
|
||||||
while (container && container.querySelectorAll('button').length === 0 && attempts < 5) {
|
|
||||||
container = container.nextElementSibling || container.parentElement;
|
|
||||||
attempts++;
|
|
||||||
if (container && container.tagName === 'BODY') break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (container) {
|
while (container && foundButtons.length === 0 && attempts < 5) {
|
||||||
const btns = Array.from(container.querySelectorAll('button'));
|
foundButtons = Array.from(container.querySelectorAll('button')).filter(btn => {
|
||||||
btns.forEach(btn => {
|
|
||||||
const text = btn.innerText.trim();
|
|
||||||
const style = window.getComputedStyle(btn);
|
const style = window.getComputedStyle(btn);
|
||||||
const isVisible = style.display !== 'none' && style.visibility !== 'hidden';
|
return style.display !== 'none' && style.visibility !== 'hidden';
|
||||||
|
|
||||||
// Look for arrows (regular or heavy)
|
|
||||||
if (isVisible && (text.includes('>') || text.includes('❯'))) {
|
|
||||||
buttons.push(btn);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
|
||||||
});
|
|
||||||
return buttons;
|
|
||||||
};
|
|
||||||
|
|
||||||
const buttons = getChainEventButtons();
|
if (foundButtons.length === 0) {
|
||||||
console.log(`Found ${buttons.length} chain event buttons`);
|
// Check next siblings of the header's ancestors
|
||||||
|
let sibling = header;
|
||||||
if (buttons.length === 0) {
|
let parent = header.parentElement;
|
||||||
return null;
|
while(parent && parent.tagName !== 'BODY') {
|
||||||
}
|
if (parent.innerText.includes(cat.label)) {
|
||||||
|
const next = parent.nextElementSibling;
|
||||||
let goldenPerkButton = null;
|
if (next) {
|
||||||
let maxArrows = 0;
|
const nextBtns = Array.from(next.querySelectorAll('button'));
|
||||||
|
if (nextBtns.length > 0) {
|
||||||
for (const btn of buttons) {
|
foundButtons = nextBtns;
|
||||||
const text = btn.innerText.trim();
|
|
||||||
// Count both regular and heavy arrows
|
|
||||||
const arrowCount = (text.match(/>|❯/g) || []).length;
|
|
||||||
|
|
||||||
// If it has three heavy arrows, it's almost certainly the golden perk
|
|
||||||
if (text.includes('❯❯❯')) {
|
|
||||||
goldenPerkButton = btn;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arrowCount > maxArrows) {
|
|
||||||
maxArrows = arrowCount;
|
|
||||||
goldenPerkButton = btn;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
parent = parent.parentElement;
|
||||||
if (!goldenPerkButton) {
|
}
|
||||||
console.log("No golden perk button found");
|
}
|
||||||
return null;
|
container = container.parentElement;
|
||||||
|
attempts++;
|
||||||
}
|
}
|
||||||
|
|
||||||
const eventName = goldenPerkButton.innerText.trim();
|
// Scrape each button in the category
|
||||||
console.log(`Found Golden Perk: ${eventName} (${maxArrows} arrows)`);
|
for (const btn of foundButtons) {
|
||||||
|
const eventName = btn.innerText.trim();
|
||||||
|
if (!eventName || results.some(r => r.name === eventName)) continue;
|
||||||
|
|
||||||
|
// Count arrows for chain/date importance
|
||||||
|
const arrows = (eventName.match(/>|❯/g) || []).length;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Click to open popover
|
btn.scrollIntoViewIfNeeded ? btn.scrollIntoViewIfNeeded() : null;
|
||||||
goldenPerkButton.scrollIntoViewIfNeeded ? goldenPerkButton.scrollIntoViewIfNeeded() : null;
|
|
||||||
await new Promise(r => setTimeout(r, 100));
|
await new Promise(r => setTimeout(r, 100));
|
||||||
goldenPerkButton.click();
|
btn.click();
|
||||||
await new Promise(r => setTimeout(r, 600));
|
await new Promise(r => setTimeout(r, 500));
|
||||||
|
|
||||||
// Find popover
|
|
||||||
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
|
const popovers = Array.from(document.querySelectorAll('div')).filter(d =>
|
||||||
d.innerText.includes(eventName) &&
|
d.innerText.includes(eventName) &&
|
||||||
window.getComputedStyle(d).zIndex > 50 &&
|
window.getComputedStyle(d).zIndex > 50 &&
|
||||||
d.innerText.length < 2500
|
d.innerText.length < 2500
|
||||||
);
|
);
|
||||||
|
|
||||||
if (popovers.length === 0) {
|
if (popovers.length > 0) {
|
||||||
console.log(`Popover NOT found for ${eventName}`);
|
|
||||||
document.body.click();
|
|
||||||
return { name: eventName, type: 'Chain', skills: [] };
|
|
||||||
}
|
|
||||||
|
|
||||||
const pop = popovers[popovers.length - 1];
|
const pop = popovers[popovers.length - 1];
|
||||||
console.log(`Found popover for ${eventName}`);
|
|
||||||
|
|
||||||
// Check for OR structure - look for "Randomly either" or "or" divider
|
|
||||||
const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null ||
|
const hasOrDivider = pop.querySelector('[class*="divider_or"]') !== null ||
|
||||||
pop.innerText.includes('Randomly either') ||
|
pop.innerText.includes('Randomly either') ||
|
||||||
pop.innerText.toLowerCase().includes(' or ');
|
pop.innerText.toLowerCase().includes(' or ');
|
||||||
|
|
||||||
// Find all skill names (purple/blue links)
|
|
||||||
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
|
const skillLinks = Array.from(pop.querySelectorAll('span, a')).filter(el =>
|
||||||
el.innerText.length > 2 &&
|
el.innerText.length > 2 &&
|
||||||
!el.innerText.includes('Energy') &&
|
!el.innerText.includes('Energy') &&
|
||||||
!el.innerText.includes('bond') &&
|
|
||||||
(window.getComputedStyle(el).color === 'rgb(102, 107, 255)' ||
|
(window.getComputedStyle(el).color === 'rgb(102, 107, 255)' ||
|
||||||
el.className.includes('linkcolor'))
|
el.className.includes('linkcolor'))
|
||||||
);
|
);
|
||||||
|
|
||||||
console.log(`Found ${skillLinks.length} potential skills in popover`);
|
|
||||||
|
|
||||||
const skills = [];
|
const skills = [];
|
||||||
skillLinks.forEach(link => {
|
skillLinks.forEach(link => {
|
||||||
const skillName = link.innerText.trim();
|
const sName = link.innerText.trim();
|
||||||
if (skillName && skillName.length > 2 && !skills.some(s => s.name === skillName)) {
|
if (sName && !skills.some(s => s.name === sName)) {
|
||||||
// If there's an OR divider, all skills in this popover are part of OR groups
|
skills.push({ name: sName, is_or: hasOrDivider });
|
||||||
const isOr = hasOrDivider;
|
|
||||||
skills.push({ name: skillName, is_or: isOr });
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Close popover
|
results.push({
|
||||||
document.body.click();
|
name: eventName,
|
||||||
await new Promise(r => setTimeout(r, 200));
|
type: cat.type,
|
||||||
|
skills: skills,
|
||||||
return { name: eventName, type: 'Chain', skills: skills };
|
arrows: arrows
|
||||||
|
});
|
||||||
} catch (err) {
|
|
||||||
console.log(`Error clicking ${eventName}: ${err.message}`);
|
|
||||||
return { name: eventName, type: 'Chain', skills: [] };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
document.body.click();
|
||||||
|
await new Promise(r => setTimeout(r, 100));
|
||||||
|
} catch (err) {
|
||||||
|
console.log(`Failed to scrape event ${eventName}: ${err.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
}
|
}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# 3. Store ONLY the golden perk in database
|
# 3. Store all found events and identify golden skills
|
||||||
if golden_perk_data:
|
if all_events_data:
|
||||||
cur.execute("""
|
# Determine max arrows for Chain and Dates to identify final step
|
||||||
INSERT INTO support_events (card_id, event_name, event_type)
|
max_arrows = {
|
||||||
VALUES (?, ?, ?)
|
'Chain': max([e['arrows'] for e in all_events_data if e['type'] == 'Chain'] + [0]),
|
||||||
""", (card_id, golden_perk_data['name'], golden_perk_data['type']))
|
'Date': max([e['arrows'] for e in all_events_data if e['type'] == 'Date'] + [0])
|
||||||
|
}
|
||||||
|
|
||||||
|
for event in all_events_data:
|
||||||
|
cur.execute("INSERT INTO support_events (card_id, event_name, event_type) VALUES (?, ?, ?)",
|
||||||
|
(card_id, event['name'], event['type']))
|
||||||
event_id = cur.lastrowid
|
event_id = cur.lastrowid
|
||||||
|
|
||||||
for skill in golden_perk_data['skills']:
|
for skill in event['skills']:
|
||||||
# Normalization helper
|
|
||||||
def normalize(s):
|
def normalize(s):
|
||||||
return s.lower().replace(" hint +1", "").replace(" hint +3", "").replace(" hint +5", "").replace(" hint +", "").strip().replace(" ", " ").replace("-", "").replace("(", "").replace(")", "").replace(" ", "")
|
# Remove hint suffix and special characters
|
||||||
|
s = s.lower().split(' hint +')[0]
|
||||||
|
return re.sub(r'[()()\-\s\+]', '', s).strip()
|
||||||
|
|
||||||
skill_name = normalize(skill['name'])
|
n_name = normalize(skill['name'])
|
||||||
|
|
||||||
# Use extra aggressive name matching against the map values
|
|
||||||
# (The map keys are already normalized)
|
|
||||||
is_gold = 0
|
is_gold = 0
|
||||||
for k, gold in skill_rarity_map.items():
|
for k, gold in skill_rarity_map.items():
|
||||||
if normalize(k) == skill_name:
|
if normalize(k) == n_name:
|
||||||
is_gold = 1 if gold else 0
|
is_gold = 1 if gold else 0
|
||||||
break
|
break
|
||||||
|
|
||||||
# Fallback 1: If it's a chain event and specifically the last one, it's almost certainly gold
|
# Heuristic: If it's the last step of a Chain or Date, it's likely gold
|
||||||
if not is_gold and golden_perk_data.get('type') == 'Chain':
|
if not is_gold and event['type'] in ['Chain', 'Date']:
|
||||||
# Check for "hint" patterns which usually accompany gold perks in chain events
|
if event['arrows'] >= 3 and event['arrows'] == max_arrows[event['type']]:
|
||||||
if "hint +" in skill['name'].lower() or len(golden_perk_data['skills']) <= 2:
|
if len(event['skills']) <= 2 or "hint +" in skill['name'].lower():
|
||||||
is_gold = 1
|
is_gold = 1
|
||||||
print(f" ✨ Golden Skill Fallback (Last Chain Event): {skill['name']}")
|
print(f" ✨ Heuristic Gold: {skill['name']} in {event['name']}")
|
||||||
|
|
||||||
if is_gold:
|
if is_gold: print(f" ✨ Verified Gold: {skill['name']}")
|
||||||
print(f" ✨ Golden Skill Verified: {skill['name']}")
|
|
||||||
|
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
|
INSERT INTO event_skills (event_id, skill_name, is_gold, is_or)
|
||||||
VALUES (?, ?, ?, ?)
|
VALUES (?, ?, ?, ?)
|
||||||
""", (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0))
|
""", (event_id, skill['name'], is_gold, 1 if skill['is_or'] else 0))
|
||||||
|
|
||||||
skill_count = len(golden_perk_data['skills'])
|
print(f" Scraped {len(all_events_data)} total events.")
|
||||||
or_count = sum(1 for s in golden_perk_data['skills'] if s['is_or'])
|
|
||||||
print(f" Golden Perk: {golden_perk_data['name']} ({skill_count} skills, {or_count} with OR)")
|
|
||||||
else:
|
else:
|
||||||
print(f" No Golden Perk found for this card")
|
print(f" No events found.")
|
||||||
|
|
||||||
def run_scraper():
|
def run_scraper():
|
||||||
""" Run the web scraper to fetch card data from GameTora.com """
|
""" Run the web scraper to fetch card data from GameTora.com """
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ This file is the single source of truth for the application version.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# Semantic versioning: MAJOR.MINOR.PATCH
|
# Semantic versioning: MAJOR.MINOR.PATCH
|
||||||
VERSION: str = "13.0.3"
|
VERSION: str = "13.0.6"
|
||||||
|
|
||||||
# Application metadata
|
# Application metadata
|
||||||
APP_NAME: str = "UmamusumeCardManager"
|
APP_NAME: str = "UmamusumeCardManager"
|
||||||
|
|||||||
Reference in New Issue
Block a user