"""
Amazon价格爬取工具
"""

import csv
import re
import time
from typing import Tuple, List, Optional
from datetime import datetime
import pandas as pd
from DrissionPage import ChromiumPage, ChromiumOptions
from dotenv import load_dotenv
from lxml import etree
import ttkbootstrap as ttk
from ttkbootstrap.scrolled import ScrolledText
from ttkbootstrap.tooltip import ToolTip
from tkinter import filedialog
from threading import Thread
import logging

from app.helper.domain import switch_domain

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)


class ModernAmazonPriceScraper:
    """Amazon价格爬取工具的现代化GUI应用程序"""

    # 颜色配置
    COLORS = {
        'success': '#28a745',
        'danger': '#dc3545',
        'warning': '#ffc107',
        'info': '#17a2b8',
        'primary': '#0d6efd',
        'secondary': '#6c757d',
        'dark': '#343a40',
        'light': '#f8f9fa',
        'card_bg': '#2d3436',
        'text_muted': '#6c757d',
    }

    # 国家域名映射配置
    COUNTRY_DOMAINS = {
        "美国 (US)": "US",
        "英国 (UK)": "UK",
        "日本 (JP)": "JP",
        "法国 (FR)": "FR",
        "德国 (DE)": "DE",
        "加拿大 (CA)": "CA",
    }

    # 窗口配置
    WINDOW_WIDTH = 550
    WINDOW_HEIGHT = 780

    def __init__(self):
        """初始化应用程序"""
        self.asins: List[str] = []
        self.page: Optional[ChromiumPage] = None
        self.is_running: bool = False
        self.current_index: int = 0   # 当前进度索引
        self.results: List[Tuple[str, str]] = []  # 保存结果
        self.success_count: int = 0
        self.failed_count: int = 0
        self._scrape_thread: Optional[Thread] = None

        # GUI组件引用
        self.log_text: Optional[ScrolledText] = None
        self.file_label: Optional[ttk.Label] = None
        self.country_var: Optional[ttk.StringVar] = None
        self.progress_bar: Optional[ttk.Progressbar] = None
        self.progress_label: Optional[ttk.Label] = None
        self.status_label: Optional[ttk.Label] = None
        self.start_btn: Optional[ttk.Button] = None
        self.stop_btn: Optional[ttk.Button] = None
        self.stats_labels: dict = {}

        # 初始化主窗口 - 使用深色主题
        self.root = ttk.Window(themename="darkly")
        self.root.title("🛒 Amazon 价格爬取工具 Pro")
        self.root.resizable(False, False)

        # 设置自定义样式
        self._setup_custom_styles()

        # 构建GUI
        self._build_gui()

        # 窗口居中
        self._center_window()

    @staticmethod
    def _setup_custom_styles():
        """设置自定义样式"""
        style = ttk.Style()

        # 卡片框架样式
        style.configure(
            'Card.TFrame',
            background='#2d3436',
            relief='flat'
        )

        # 标题标签样式
        style.configure(
            'Title.TLabel',
            font=('Segoe UI', 24, 'bold'),
            foreground='#ffffff'
        )

        # 副标题样式
        style.configure(
            'Subtitle.TLabel',
            font=('Segoe UI', 10),
            foreground='#b2bec3'
        )

        # 区域标题样式
        style.configure(
            'SectionTitle.TLabel',
            font=('Segoe UI', 11, 'bold'),
            foreground='#74b9ff'
        )

        # 统计数字样式
        style.configure(
            'Stats.TLabel',
            font=('Segoe UI', 18, 'bold'),
            foreground='#00cec9'
        )

        # 统计标签样式
        style.configure(
            'StatsLabel.TLabel',
            font=('Segoe UI', 9),
            foreground='#b2bec3'
        )

        # 成功按钮样式
        style.configure(
            'success.TButton',
            font=('Segoe UI', 10, 'bold')
        )

        # 危险按钮样式
        style.configure(
            'danger.TButton',
            font=('Segoe UI', 10, 'bold')
        )

    def _center_window(self):
        """将窗口居中显示"""
        screen_width = self.root.winfo_screenwidth()
        screen_height = self.root.winfo_screenheight()

        x = int((screen_width - self.WINDOW_WIDTH) / 2)
        y = int((screen_height - self.WINDOW_HEIGHT) / 3)

        self.root.geometry(f"{self.WINDOW_WIDTH}x{self.WINDOW_HEIGHT}+{x}+{y}")

    def _build_gui(self):
        """构建完整的GUI界面"""
        # 主容器 - 添加内边距
        main_container = ttk.Frame(self.root, padding=20)
        main_container.pack(fill='both', expand=True)

        # 1. 标题区域
        self._build_header(main_container)

        # 2. 配置卡片
        self._build_config_card(main_container)

        # 3. 统计卡片
        self._build_stats_card(main_container)

        # 4. 日志区域
        self._build_log_area(main_container)

        # 5. 底部状态栏
        self._build_status_bar(main_container)

    @staticmethod
    def _build_header(parent: ttk.Frame):
        """构建标题区域"""
        header_frame = ttk.Frame(parent)
        header_frame.pack(fill='x', pady=(0, 20))

        # 主标题
        title_label = ttk.Label(
            header_frame,
            text="🛒 Amazon 价格爬取工具",
            style='Title.TLabel'
        )
        title_label.pack()

        # 副标题
        subtitle_label = ttk.Label(
            header_frame,
            text="快速批量获取Amazon商品价格，支持多站点爬取",
            style='Subtitle.TLabel'
        )
        subtitle_label.pack(pady=(5, 0))

    def _build_config_card(self, parent: ttk.Frame):
        """构建配置卡片区域"""
        # 卡片容器
        card_frame = ttk.LabelFrame(
            parent,
            text=" ⚙️ 爬取配置 ",
            padding=15,
            style="info"
        )
        card_frame.pack(fill='x', pady=(0, 15))

        # 上半部分：国家选择和文件选择
        top_row = ttk.Frame(card_frame)
        top_row.pack(fill='x', pady=(0, 10))

        # 国家选择
        country_frame = ttk.Frame(top_row)
        country_frame.pack(side='left', fill='x', expand=True)

        ttk.Label(
            country_frame,
            text="📍 目标站点",
            style='SectionTitle.TLabel'
        ).pack(anchor='w')

        self.country_var = ttk.StringVar(value="美国 (US)")
        country_menu = ttk.Combobox(
            country_frame,
            textvariable=self.country_var,
            values=list(self.COUNTRY_DOMAINS.keys()),
            state='readonly',
            width=20,
            style="info"
        )
        country_menu.pack(anchor='w', pady=(5, 0))
        ToolTip(country_menu, text="选择要爬取的Amazon站点")

        # 文件选择按钮
        file_frame = ttk.Frame(top_row)
        file_frame.pack(side='right', padx=(20, 0))

        select_btn = ttk.Button(
            file_frame,
            text="📂 选择Excel文件",
            command=self._load_excel,
            style="outline-info",
            width=18
        )
        select_btn.pack()
        ToolTip(select_btn, text="选择包含ASIN列表的Excel文件")

        # 文件状态标签
        self.file_label = ttk.Label(
            card_frame,
            text="📋 尚未选择文件",
            font=('Segoe UI', 9),
            foreground='#b2bec3'
        )
        self.file_label.pack(anchor='w', pady=(5, 10))

        # 分隔线
        ttk.Separator(card_frame, style="secondary").pack(fill='x', pady=10)

        # 进度条区域
        progress_frame = ttk.Frame(card_frame)
        progress_frame.pack(fill='x')

        self.progress_label = ttk.Label(
            progress_frame,
            text="准备就绪",
            font=('Segoe UI', 9),
            foreground='#b2bec3'
        )
        self.progress_label.pack(anchor='w')

        self.progress_bar = ttk.Progressbar(
            progress_frame,
            mode='determinate',
            style="success-striped",
            length=300
        )
        self.progress_bar.pack(fill='x', pady=(5, 10))

        # 操作按钮
        button_frame = ttk.Frame(card_frame)
        button_frame.pack()

        self.start_btn = ttk.Button(
            button_frame,
            text="🚀 开始爬取",
            command=self._start_scraping,
            style="success",
            width=15
        )
        self.start_btn.pack(side='left', padx=5)

        self.stop_btn = ttk.Button(
            button_frame,
            text="⏸ 暂停",
            command=self._stop_scraping,
            style="warning-outline",
            width=15,
            state='disabled'
        )
        self.stop_btn.pack(side='left', padx=5)

    def _build_stats_card(self, parent: ttk.Frame):
        """构建统计信息卡片"""
        stats_frame = ttk.Frame(parent)
        stats_frame.pack(fill='x', pady=(0, 15))

        # 三列统计
        stats_config = [
            ("total", "📊 总ASIN数", "0", "info"),
            ("success", "✅ 成功获取", "0", "success"),
            ("failed", "❌ 获取失败", "0", "danger"),
        ]

        for stat_id, label_text, value, sty in stats_config:
            stat_card = ttk.LabelFrame(
                stats_frame,
                text=f" {label_text} ",
                padding=10,
                style=sty
            )
            stat_card.pack(side='left', fill='both', expand=True, padx=2)

            value_label = ttk.Label(
                stat_card,
                text=value,
                style='Stats.TLabel'
            )
            value_label.pack()

            self.stats_labels[stat_id] = value_label

    def _build_log_area(self, parent: ttk.Frame):
        """构建日志显示区域"""
        log_frame = ttk.LabelFrame(
            parent,
            text=" 📜 运行日志 ",
            padding=10,
            style="secondary"
        )
        log_frame.pack(fill='both', expand=True, pady=(0, 10))

        # 使用ttkbootstrap的ScrolledText
        self.log_text = ScrolledText(
            log_frame,
            height=12,
            autohide=True,
            bootstyle="dark"
        )
        self.log_text.pack(fill='both', expand=True)

        # 设置日志文本样式
        self.log_text.text.configure(
            font=('Consolas', 9),
            bg='#1e272e',
            fg='#dfe6e9',
            insertbackground='#dfe6e9',
            selectbackground='#74b9ff',
            padx=10,
            pady=10
        )

        # 配置日志标签颜色
        self.log_text.text.tag_configure('info', foreground='#74b9ff')
        self.log_text.text.tag_configure('success', foreground='#00b894')
        self.log_text.text.tag_configure('warning', foreground='#fdcb6e')
        self.log_text.text.tag_configure('error', foreground='#e17055')
        self.log_text.text.tag_configure('time', foreground='#636e72')

    def _build_status_bar(self, parent: ttk.Frame):
        """构建底部状态栏"""
        status_frame = ttk.Frame(parent, padding=(0, 5))
        status_frame.pack(fill='x')

        self.status_label = ttk.Label(
            status_frame,
            text="就绪",
            font=('Segoe UI', 9),
            foreground='#b2bec3'
        )
        self.status_label.pack(side='left')

        # 版本信息
        version_label = ttk.Label(
            status_frame,
            text="v2.0.0",
            font=('Segoe UI', 8),
            foreground='#636e72'
        )
        version_label.pack(side='right')

    def _log(self, message: str, level: str = 'info'):
        """
        在日志区域添加带颜色的日志消息

        Args:
            message: 日志消息内容
            level: 日志级别 (info, success, warning, error)
        """
        timestamp = datetime.now().strftime('%H:%M:%S')

        # 插入时间戳
        self.log_text.text.insert('end', f"[{timestamp}] ", 'time')

        # 根据级别设置前缀符号 - 使用简洁ASCII符号
        level_prefixes = {
            'info': 'i',
            'success': '+',
            'warning': '!',
            'error': 'x'
        }
        prefix = level_prefixes.get(level, 'i')

        # 插入消息 - 简洁格式
        self.log_text.text.insert('end', f"{prefix}  {message}\n", level)
        self.log_text.text.see('end')
        self.root.update()

        # 同时记录到标准日志
        logger.info(message)

    def _update_stats(self, total: int = 0, success: int = 0, failed: int = 0):
        """
        更新统计数据显示

        Args:
            total: 总ASIN数量
            success: 成功获取数量
            failed: 失败数量
        """
        if total > 0:
            self.stats_labels['total'].configure(text=str(total))
        if success >= 0:
            self.stats_labels['success'].configure(text=str(success))
        if failed >= 0:
            self.stats_labels['failed'].configure(text=str(failed))

    def _update_progress(self, current: int, total: int, message: str = ""):
        """
        更新进度条和进度文本

        Args:
            current: 当前进度
            total: 总数
            message: 进度消息
        """
        if total > 0:
            percentage = (current / total) * 100
            self.progress_bar['value'] = percentage
            self.progress_label.configure(
                text=f"进度: {current}/{total} ({percentage:.1f}%) - {message}"
            )
        else:
            self.progress_bar['value'] = 0
            self.progress_label.configure(text=message or "准备就绪")

    def _set_running_state(self, is_running: bool):
        """
        设置运行状态，更新按钮和状态显示

        Args:
            is_running: 是否正在运行
        """
        self.is_running = is_running

        if is_running:
            self.start_btn.configure(state='disabled')
            self.stop_btn.configure(state='normal')
            self.status_label.configure(
                text="🔄 正在爬取中...",
                foreground='#00b894'
            )
        else:
            self.start_btn.configure(state='normal')
            self.stop_btn.configure(state='disabled')
            self.status_label.configure(
                text="💡 准备就绪 - 请选择文件并开始爬取",
                foreground='#b2bec3'
            )

    def _load_excel(self):
        """加载Excel文件并读取ASIN列表"""
        file_path = filedialog.askopenfilename(
            title="选择包含ASIN的Excel文件",
            filetypes=[
                ("Excel文件", "*.xlsx *.xls"),
                ("所有文件", "*.*")
            ]
        )

        if not file_path:
            return

        try:
            df = pd.read_excel(file_path)

            # 验证ASIN列是否存在
            if 'ASIN' not in df.columns:
                self._log("Excel文件中未找到'ASIN'列，请检查文件格式", 'error')
                return

            self.asins = df['ASIN'].dropna().astype(str).tolist()

            # 过滤空值和无效ASIN
            self.asins = [asin.strip() for asin in self.asins if asin.strip()]

            # 获取文件名用于显示
            file_name = file_path.split('/')[-1].split('\\')[-1]

            self.file_label.configure(
                text=f"📁 已加载: {file_name}",
                foreground='#00b894'
            )
            self._log(f"成功加载文件: {file_name}", 'success')
            self._log(f"共读取到 {len(self.asins)} 个有效ASIN", 'info')

            # 更新统计
            self._update_stats(total=len(self.asins), success=0, failed=0)
            self._update_progress(0, len(self.asins), "文件已加载，等待开始爬取")

        except Exception as e:
            error_msg = f"加载Excel文件失败: {str(e)}"
            self._log(error_msg, 'error')
            logger.error(error_msg, exc_info=True)

    def _init_browser(self):
        """初始化浏览器实例"""
        try:
            options = ChromiumOptions()
            options.headless(False).no_imgs(True).mute(True).set_load_mode("none")
            options.set_argument('--window-size=1024,768')
            self.page = ChromiumPage(options)
            self._log("浏览器初始化完成", 'success')
        except Exception as e:
            self._log(f"浏览器初始化失败: {str(e)}", 'error')
            raise

    @staticmethod
    def _clean_price(price: str) -> str:
        """
        清理价格字符串，移除货币符号

        Args:
            price: 原始价格字符串

        Returns:
            清理后的价格数字字符串
        """
        currency_symbols = [r'\$', 'C\$', '¥', '£', '€', 'MX\$']
        cleaned = price.strip()

        for symbol in currency_symbols:
            cleaned = re.sub(symbol, '', cleaned)

        return cleaned.replace(',', '').strip() or "未找到价格"

    def _get_country_code(self) -> str:
        """获取当前选中的国家代码"""
        selected = self.country_var.get()
        return self.COUNTRY_DOMAINS.get(selected, "US")

    def _fetch_price(self, asin: str, max_retries: int = 3) -> Tuple[str, str]:
        """
        获取单个ASIN的价格

        Args:
            asin: Amazon ASIN编码
            max_retries: 最大重试次数

        Returns:
            (ASIN, 价格) 元组
        """
        # 检查是否已停止或浏览器已关闭
        if not self.is_running or not self.page:
            return asin, "已停止"

        country = self._get_country_code()
        host = switch_domain(country).replace("vendorcentral.", "")
        url = f"{host}dp/{asin}?th=1"

        for attempt in range(max_retries):
            # 每次重试前检查状态
            if not self.is_running or not self.page:
                return asin, "已停止"

            try:
                self.page.get(url)

                # 检查是否遇到验证码
                if not self.page.ele('xpath://form[@action="/errors/validateCaptcha"]', timeout=1):
                    break

                self._log(f"ASIN {asin}: 检测到验证码页面，尝试跳过...", 'warning')
                continue_button = self.page.ele('css:button.a-button-text', timeout=2)
                if continue_button:
                    continue_button.click()

            except Exception as e:
                self._log(f"ASIN {asin}: 第{attempt + 1}次尝试失败: {str(e)}", 'warning')
                if attempt == max_retries - 1:
                    return asin, f"错误: {str(e)}"

        # 最终获取价格前再次检查
        if not self.is_running or not self.page:
            return asin, "已停止"

        try:
            self.page.wait.ele_displayed('xpath://div[@id="corePrice_feature_div"]', timeout=3)
            html_content = self.page.html
            tree = etree.HTML(html_content)
            price_whole = tree.xpath('//div[@id="corePrice_feature_div"]//span[@class="a-offscreen"]/text()')
            return asin, self._clean_price(price_whole[0]) if price_whole else "未找到价格元素"
        except Exception as e:
            return asin, f"错误: {str(e)}"

    def _scrape(self):
        """执行爬取任务，支持暂停/继续"""
        if not self.page:
            self._init_browser()

        start_time = time.time()
        total = len(self.asins)

        # 从头开始时重置
        if self.current_index == 0:
            self.results = []
            self.success_count = 0
            self.failed_count = 0
            self._log(f"开始爬取 {total} 个ASIN...", 'info')
        else:
            self._log(f"继续爬取 (从第{self.current_index + 1}个)...", 'info')

        for index in range(self.current_index, total):
            if not self.is_running:
                self.current_index = index
                self._log(f"已暂停 ({index}/{total})", 'warning')
                self._cleanup_browser()
                return  # 暂停时不推送

            asin = self.asins[index]
            
            # 先更新进度条，显示当前正在处理的ASIN
            self._update_progress(index + 1, total, asin)
            
            result = self._fetch_price(asin)
            
            if result[1] == "已停止":
                self.current_index = index
                self._log(f"已暂停 ({index}/{total})", 'warning')
                return

            self.results.append(result)

            is_success = not result[1].startswith("错误") and result[1] != "未找到价格元素"
            if is_success:
                self.success_count += 1
                self._log(f"[{index + 1}/{total}] {result[0]}: $ {result[1]}", 'success')
            else:
                self.failed_count += 1
                self._log(f"[{index + 1}/{total}] {result[0]}: {result[1]}", 'error')

            self._update_stats(success=self.success_count, failed=self.failed_count)

        # 全部完成
        self.current_index = 0
        
        if self.results:
            country = self._get_country_code()
            output_file = f"amazon_prices_{country}_{int(time.time())}.csv"

            with open(output_file, "w", encoding="utf-8", newline="") as f:
                writer = csv.writer(f)
                writer.writerow(["ASIN", "Price"])
                writer.writerows(self.results)

            self._log(f"已保存: {output_file}", 'success')
            self._push_to_rabbitmq(output_file, country)

        elapsed_time = time.time() - start_time
        self._log(f"完成! 耗时: {elapsed_time:.2f}秒", 'info')
        self._update_progress(total, total, f"完成 {self.success_count}/{self.failed_count}")

        self._cleanup_browser()
        self._set_running_state(False)
        self.results = []

    def _push_to_rabbitmq(self, output_file: str, country: str):
        """
        将结果推送到RabbitMQ

        Args:
            output_file: 输出文件路径
            country: 国家代码
        """
        try:
            # 使用utf-8-sig编码读取，处理BOM标记
            data = pd.read_csv(output_file, encoding='utf-8-sig')
            data['Price'] = pd.to_numeric(data['Price'], errors='coerce')
            valid_data = data.dropna(subset=['Price'])

            if valid_data.empty:
                self._log("没有有效的价格数据需要推送", 'warning')
                return

            self._log(f"开始推送 {len(valid_data)} 条数据到消息队列...", 'info')

            from app.helper import rabbitmq

            client = rabbitmq.RabbitMQClient()
            client.connection()
            client.connect(queue='price_robot', routing_key='price_robot', exchange='product')

            for _, item_row in valid_data.iterrows():
                push_data = {
                    'currency': country,
                    'asin': item_row.get('ASIN', ''),
                    'price': item_row['Price'],
                }
                client.send_message(push_data)

            client.close()
            self._log("消息推送完成", 'success')

        except Exception as e:
            self._log(f"推送消息失败: {str(e)}", 'error')
            logger.error(f"推送消息失败: {str(e)}", exc_info=True)

    def _cleanup_browser(self):
        """清理浏览器资源"""
        if self.page:
            try:
                self.page.quit()
                self.page = None
                self._log("浏览器已关闭", 'info')
            except Exception as e:
                logger.warning(f"关闭浏览器时发生错误: {str(e)}")

    def _start_scraping(self):
        """开始/继续爬取"""
        if not self.asins:
            self._log("请先选择Excel文件", 'warning')
            return

        if self.is_running or (self._scrape_thread and self._scrape_thread.is_alive()):
            self._log("任务运行中", 'warning')
            return

        self._set_running_state(True)
        
        if self.current_index == 0:
            self._update_stats(success=0, failed=0)
            self._log("启动...", 'info')
        else:
            self._log(f"继续 (从第{self.current_index + 1}个)...", 'info')

        self._scrape_thread = Thread(target=self._scrape, daemon=True)
        self._scrape_thread.start()

    def _stop_scraping(self):
        """暂停爬取"""
        if not self.is_running:
            return

        self._set_running_state(False)
        self._log("暂停中...", 'warning')

    def run(self):
        """运行应用程序主循环"""
        try:
            self._log("应用程序已启动", 'info')
            self.root.mainloop()
        finally:
            self._cleanup_browser()


def main():
    """应用程序入口点"""
    try:
        load_dotenv()
        app = ModernAmazonPriceScraper()
        app.run()
    except KeyboardInterrupt:
        logger.info("用户中断程序")
        exit(0)
    except Exception as e:
        logger.error(f"程序异常退出: {str(e)}", exc_info=True)
        exit(1)


if __name__ == "__main__":
    main()