feat: Implement new database management and scraping utilities, and update application version to 13.0.0.
This commit is contained in:
53
fast_rescrape_limited.py
Normal file
53
fast_rescrape_limited.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Add parent dir to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from scraper.gametora_scraper import scrape_support_card, sync_playwright
|
||||
|
||||
DB_PATH = os.path.join("database", "umamusume.db")
|
||||
|
||||
def fast_rescrape_limited():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cur = conn.cursor()
|
||||
|
||||
# Find cards that have NO events
|
||||
cur.execute("""
|
||||
SELECT card_id, name, gametora_url
|
||||
FROM support_cards
|
||||
WHERE card_id NOT IN (SELECT DISTINCT card_id FROM support_events)
|
||||
ORDER BY rarity DESC, card_id ASC
|
||||
LIMIT 50
|
||||
""")
|
||||
cards_to_rescrape = cur.fetchall()
|
||||
|
||||
print(f"Found {len(cards_to_rescrape)} cards to re-scrape (Limited to 50).")
|
||||
|
||||
if not cards_to_rescrape:
|
||||
conn.close()
|
||||
return
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
count = 0
|
||||
for card_id, name, url in cards_to_rescrape:
|
||||
count += 1
|
||||
print(f"[{count}/{len(cards_to_rescrape)}] Re-scraping: {name}")
|
||||
try:
|
||||
from scraper.gametora_scraper import scrape_support_card
|
||||
scrape_support_card(page, url, conn)
|
||||
except Exception as e:
|
||||
print(f" Error: {e}")
|
||||
|
||||
browser.close()
|
||||
|
||||
conn.close()
|
||||
print("Limited re-scrape complete.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
fast_rescrape_limited()
|
||||
Reference in New Issue
Block a user