import csv
import re
import time
from typing import Tuple
import pandas as pd
from DrissionPage import ChromiumPage, ChromiumOptions
from lxml import etree
import ttkbootstrap as ttk
from ttkbootstrap.constants import *
from tkinter import filedialog, scrolledtext
from threading import Thread

from app.helper.domain import switch_domain

class AmazonPriceScraper:
    def __init__(self):
        self.log_text = None
        self.file_label = None
        self.country_var = None
        self.asins = []
        self.page = None  # 浏览器延迟初始化
        self.is_running = False

        # GUI 初始化
        self.root = ttk.Window(themename="flatly")
        self.root.title("Amazon价格爬取工具")
        self.root.geometry("500x600")
        self.root.resizable(False, False)

        # 国家域名映射
        self.domains = {
            "美国": "US", "英国": "UK", "日本": "JP",
            "法国": "FR", "德国": "DE", "加拿大": "CA"
        }

        self.setup_gui()

        # 设置窗口尺寸并居中
        self._center_window()

    def _center_window(self):
        """设置窗口居中"""
        window_width = 500
        window_height = 600

        # 获取屏幕尺寸
        screen_width = self.root.winfo_screenwidth()
        screen_height = self.root.winfo_screenheight()

        # 计算居中坐标
        x = int((screen_width - window_width) / 1.1)
        y = int((screen_height - window_height) / 3)

        # 设置窗口位置
        self.root.geometry(f"{window_width}x{window_height}+{x}+{y}")

    def setup_gui(self):
        ttk.Label(self.root, text="选择国家:").pack(pady=5)
        self.country_var = ttk.StringVar(value="美国")
        ttk.OptionMenu(self.root, self.country_var, "美国", *self.domains.values()).pack(pady=5)

        ttk.Button(self.root, text="选择ASIN Excel文件", command=self.load_excel).pack(pady=5)
        self.file_label = ttk.Label(self.root, text="未选择文件")
        self.file_label.pack(pady=5)

        self.log_text = scrolledtext.ScrolledText(self.root, height=20, width=80)
        self.log_text.pack(pady=10, padx=10, fill='both', expand=True)

        button_frame = ttk.Frame(self.root)
        button_frame.pack(pady=5)
        ttk.Button(button_frame, text="开始爬取", style=SUCCESS, command=self.start_scraping).pack(side='left', padx=5)
        ttk.Button(button_frame, text="停止", style=DANGER, command=self.stop_scraping).pack(side='left', padx=5)

    def log(self, message):
        self.log_text.insert(END, f"{time.strftime('%Y-%m-%d %H:%M:%S')}: {message}\n")
        self.log_text.see(END)
        self.root.update()

    def load_excel(self):
        file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx *.xls")])
        if file_path:
            try:
                df = pd.read_excel(file_path)
                self.asins = df['ASIN'].dropna().astype(str).tolist()
                self.file_label.config(text=f"已加载: {file_path}")
                self.log(f"成功加载 {len(self.asins)} 个ASIN")
            except Exception as e:
                self.log(f"加载Excel失败: {str(e)}")

    def init_browser(self):
        options = ChromiumOptions()
        options.headless(False).no_imgs(True).mute(True).set_load_mode("none")
        options.set_argument('--window-size=1024,768')
        self.page = ChromiumPage(options)
        self.log("浏览器初始化完成")

    @staticmethod
    def clean_price(price: str) -> str:
        currency_symbols = [r'\$', 'C\$', '¥', '£', '€', 'MX\$']
        cleaned = price.strip()
        for symbol in currency_symbols:
            cleaned = re.sub(symbol, '', cleaned)
        return cleaned.replace(',', '').strip() or "未找到价格"

    def fetch_price(self, asin: str, max_retries: int = 3) -> Tuple[str, str]:
        country = self.country_var.get()
        host = switch_domain(country).replace("vendorcentral.", "")
        url = f"{host}dp/{asin}?th=1"

        for attempt in range(max_retries):
            try:
                self.page.get(url)
                if not self.page.ele('xpath://form[@action="/errors/validateCaptcha"]', timeout=1):
                    break

                self.log(f"ASIN {asin}: 检测到验证码页面")
                continue_button = self.page.ele('css:button.a-button-text', timeout=2)
                if continue_button:
                    self.log(f"ASIN {asin}: 点击 'Continue shopping'")
                    continue_button.click()

            except Exception as e:
                self.log(f"ASIN {asin}: 尝试 {attempt+1} 失败: {str(e)}")
                if attempt == max_retries - 1:
                    return asin, f"错误: {str(e)}"

        try:
            self.page.wait.ele_displayed('xpath://div[@id="corePrice_feature_div"]', timeout=3)
            html_content = self.page.html
            tree = etree.HTML(html_content)
            price_whole = tree.xpath('//div[@id="corePrice_feature_div"]//span[@class="a-offscreen"]/text()')
            return asin, self.clean_price(price_whole[0]) if price_whole else "未找到价格元素"
        except Exception as e:
            return asin, f"错误: {str(e)}"

    def scrape(self):
        if not self.page:
            self.init_browser()  # 在开始爬取时初始化浏览器

        start_time = time.time()
        results = []

        for asin in self.asins:
            if not self.is_running:
                break
            result = self.fetch_price(asin)
            results.append(result)
            self.log(f"ASIN {result[0]}: {result[1]}")

        if results:
            output_file = f"amazon_prices_{self.country_var.get()}_{int(time.time())}.csv"
            with open(output_file, "w", encoding="utf-8", newline="") as f:
                writer = csv.writer(f)
                writer.writerow(["ASIN", "Price"])
                writer.writerows(results)
            self.log(f"结果已保存至 {output_file}")

        self.log(f"总耗时: {time.time() - start_time:.2f}秒")
        self.is_running = False
        if self.page:
            self.page.quit()  # 爬取完成后关闭浏览器
            self.page = None
            self.log("浏览器已关闭")

    def start_scraping(self):
        if not self.asins:
            self.log("请先选择包含ASIN的Excel文件")
            return
        if not self.is_running:
            self.is_running = True
            self.log("开始爬取...")
            Thread(target=self.scrape).start()

    def stop_scraping(self):
        self.is_running = False
        self.log("已停止爬取")
        if self.page:
            self.page.quit()  # 停止时关闭浏览器
            self.page = None
            self.log("浏览器已关闭")

    def run(self):
        try:
            self.root.mainloop()
        finally:
            if self.page:
                self.page.quit()  # 确保程序退出时关闭浏览器

if __name__ == "__main__":
    app = AmazonPriceScraper()
    app.run()