# coding: utf-8
# spa查询
import math
import os
import shutil
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from urllib.parse import urlparse, parse_qs

import pandas as pd
from DrissionPage import ChromiumPage
from DrissionPage.errors import ElementNotFoundError
from lxml import etree

from helper import helper, excel, file, domain, logger, api

country = None
log = logger.ConsoleLog()

page = ChromiumPage()
page.set.load_mode.normal()
page.set.when_download_file_exists('overwrite')

# 下载目录
download_path = os.getcwd()
# 检查下载目录是否存在，如果不存在则创建
helper.make_dir(download_path)
# 设置下载路径，确保在打开浏览器前设置
page.set.download_path(download_path)


def page_get(url):
    host = domain.switch_domain(country)
    full_url = host + url
    page.get(full_url, timeout=5)


def export_list_read_data():
    file_name = "ContraCogsInvoices.xls"
    if not os.path.isfile(file_name):
        raise FileNotFoundError(f"{file_name},文件不存在")
    return pd.read_excel(file_name, engine='xlrd')


def get_report_table_html(invoice_id):
    while True:
        try:
            page_get(f"hz/vendor/members/coop?searchText={invoice_id}")
            # 点击选项卡
            page.ele("#a-autoid-2-announce").click()
            # 下载报表
            page.ele(f"#invoiceDownloads-{invoice_id}_2").click()
            page.wait(1)
            # 获取报表表单内容
            report_table_html = page.ele("#backup-report-table").html
            if report_table_html is None or report_table_html == "":
                log.warning("表单内容为空,刷新网页")
                page.refresh()
                continue
            return report_table_html
        except ElementNotFoundError:
            log.warning("元素未找到,刷新网页")
            page.refresh()


def export_item_read_data(invoice_id):
    file_name = f"spa\\{invoice_id}.xlsx"
    if os.path.isfile(file_name):
        df = pd.read_excel(file_name)
        return df

    # 获取报表表单内容
    report_table_html = get_report_table_html(invoice_id)
    tree = etree.HTML(report_table_html)
    # 提取所有链接
    links = tree.xpath('//table[@id="backup-report-table"]//a/@href')
    for link in links:
        # 解析链接中的查询参数
        parsed_url = urlparse(link)
        query_params = parse_qs(parsed_url.query)
        # 提取 filename 参数
        filename = query_params.get('fileName', ['未找到文件名'])[0]
        report_file_tmp_dir = f"spa\\{invoice_id}\\{filename}\\"
        host = domain.switch_domain(country)
        page.download(host + link, report_file_tmp_dir, show_msg=False)

        report_file = report_file_tmp_dir + "BackupReport.xls"
        file.wait_for_downloads(report_file, 120)

        try:
            df = pd.read_excel(report_file)
            # 获取表头
            headers = df.columns.tolist()
            # 要检查的列名
            column_names_to_check = ["Rebate In Agreement Currency", "Vendor Funding In Agreement Currency"]
            # 判断头文件是否满足读取条件,不满足删除文件夹
            header_is_normal = any(column in headers for column in column_names_to_check)
            if not header_is_normal:
                continue

            data = df[df['Asin'].notna()]
            excel.save_xls(data, file_name)
            shutil.rmtree(f"spa\\{invoice_id}")
            return pd.read_excel(file_name)
        except ValueError:
            pass


def main():
    # 获取数据
    relation_data = api.sku_relations(country)  # 获取 ASIN 与 SKU 的对应关系数据
    coop_list = export_list_read_data()  # 获取合作数据列表
    log.info(f"共计: {len(coop_list)} 条数据")

    # 获取当前日期和时间并格式化
    current_datetime = datetime.now().strftime('%Y%m%d%H%M')
    file_name = "spa.xlsx"
    new_file_name = f"{current_datetime}_{file_name}"

    sheet_data = []  # 用于保存小数据
    large_sheet_data = {}  # 保存大数据（需要分 Sheet）
    max_sheet_data = {}  # 保存超大数据（行数 > 5000）
    # 遍历合作列表
    for index, coop in coop_list.iterrows():
        index += 1
        invoice_id = coop.get("Invoice ID")  # 获取发票 ID
        log.info({"index": index, "invoice_id": invoice_id})

        if not invoice_id:
            log.warning(f"缺少 Invoice ID，跳过第 {index} 条数据")
            continue

        # 获取当前发票的 item 列表
        item_list = export_item_read_data(invoice_id)
        if item_list is None:
            log.warning(f"{invoice_id} 暂无报告信息")
            continue

        # 按 item_list 的长度处理小数据或大数据
        if len(item_list) >= 10:
            processed_items = process_large_items(item_list, relation_data)
            if processed_items:
                if len(processed_items) > 5000:
                    max_sheet_data[invoice_id] = processed_items
                else:
                    large_sheet_data[invoice_id] = processed_items
        else:
            processed_items = process_small_items(item_list, coop, relation_data)
            sheet_data.extend(processed_items)

        # 保存数据到 Excel 文件
    save_excel(sheet_data, large_sheet_data, max_sheet_data, new_file_name)


def process_large_items(item_list, relation_data):
    """处理大数据列表 (item_list 长度 >= 10)"""
    processed_items = []
    for _, item in item_list.iterrows():
        asin = item.get('Asin', None)
        if not validate_asin(asin):
            continue

        relation = relation_data.get(asin, {})

        processed_item = item.copy()
        processed_item.pop("Asin")
        processed_item['Asin'] = asin
        processed_item['ERP SKU'] = relation.get("erp_sku")
        processed_item['Group Name'] = relation.get("name")
        processed_items.append(processed_item)

    return processed_items


def process_small_items(item_list, coop, relation_data):
    """处理小数据列表 (item_list 长度 < 10)"""
    processed_items = []
    for _, item in item_list.iterrows():
        asin = item.get('Asin', None)
        if not validate_asin(asin):
            continue

        relation = relation_data.get(asin, {})
        rebate = item.get("Rebate In Agreement Currency", None)
        vendor_funding = item.get("Vendor Funding In Agreement Currency", None)

        processed_item = coop.copy()  # 复制 coop 数据
        processed_item["Asin"] = asin
        processed_item["ERP SKU"] = relation.get("erp_sku")
        processed_item["Group Name"] = relation.get("name")
        processed_item["Original balance"] = rebate or vendor_funding

        processed_items.append(processed_item)
    return processed_items


def validate_asin(asin):
    """验证 ASIN 是否有效"""
    return asin and not (isinstance(asin, float) and math.isnan(asin))


def save_excel(sheet_data, large_sheet_data, max_sheet_data, new_file_name):
    """保存数据到 Excel 文件"""
    # 创建一个写入函数
    def write_sheet(writer, data, sheet_name):
        log.info(f"开始写入 {sheet_name}, 共计 {len(data)} 条")
        df = pd.DataFrame(data)  # 将数据转换为 DataFrame
        df.to_excel(writer, sheet_name=sheet_name, index=False)

    # 初始化 Excel 写入器
    with pd.ExcelWriter(new_file_name, engine="openpyxl") as writer:
        # 写入小数据
        if sheet_data:
            log.info(f"保存小数据，共计 {len(sheet_data)} 条")
            write_sheet(writer, sheet_data, "Sheet1")

        # 写入大数据（使用多线程并行写入不同表）
        if large_sheet_data:
            log.info(f"保存大数据，共计 {sum(len(data) for data in large_sheet_data.values())} 条")
            with ThreadPoolExecutor() as executor:
                for sheet_name, data in large_sheet_data.items():
                    executor.submit(write_sheet, writer, data, sheet_name)

        # 写入超大数据
        if max_sheet_data:
            log.info(f"保存超大数据，共计 {sum(len(data) for data in max_sheet_data.values())} 条")
            with ThreadPoolExecutor() as executor:
                for sheet_name, data in max_sheet_data.items():
                    executor.submit(write_sheet, writer, data, sheet_name)

    log.info(f"文件 {new_file_name} 保存完成，路径：{os.path.abspath(new_file_name)}")


if __name__ == '__main__':
    try:
        country = helper.get_input_with_default("国家(目前支持[DE,FR,JP,CA,UK,US])", "US")
        domain.domain_page(page, country)
        main()
        page.close()
    except KeyboardInterrupt:
        pass
    except Exception as e:
        log.error(e)
        helper.print_trace("main", e)
