# coding: utf-8
# spa查询
import math
import os
import shutil
from urllib.parse import urlparse, parse_qs

import pandas as pd
from DrissionPage import ChromiumPage
from DrissionPage.errors import ElementNotFoundError
from lxml import etree

from helper import helper, excel, file, domain, logger, api

country = None
log = logger.ConsoleLog()

page = ChromiumPage()
page.set.load_mode.normal()
page.set.when_download_file_exists('overwrite')

# 下载目录
download_path = os.getcwd()
# 检查下载目录是否存在，如果不存在则创建
helper.make_dir(download_path)
# 设置下载路径，确保在打开浏览器前设置
page.set.download_path(download_path)


def page_get(url):
    host = domain.switch_domain(country)
    full_url = host + url
    page.get(full_url, timeout=3)

def export_list_read_data():
    file_name = "ContraCogsInvoices.xls"
    if not os.path.isfile(file_name):
        # raise FileNotFoundError(f"{file_name},文件不存在")
        page_get("hz/vendor/members/coop?ref_=vc_xx_subNav")
        # 全选
        page.ele("#select-all").click()
        # 点击选项卡
        page.ele("#cc-invoice-actions-dropdown").click()
        # 点击下载报表
        page.ele("#cc-invoice-actions-dropdown_2").click.to_download().wait()

    return pd.read_excel(file_name, engine='xlrd')


def get_report_table_html(invoice_id):
    while True:
        try:
            page_get(f"hz/vendor/members/coop?searchText={invoice_id}")
            # 点击选项卡
            page.ele("#a-autoid-2-announce").click()
            # 下载报表
            page.ele(f"#invoiceDownloads-{invoice_id}_2").click()
            page.wait(1)
            # 获取报表表单内容
            report_table_html = page.ele("#backup-report-table").html
            if report_table_html is None or report_table_html == "":
                log.warning("表单内容为空,刷新网页")
                page.refresh()
                continue
            return report_table_html
        except ElementNotFoundError:
            log.warning("元素未找到,刷新网页")
            page.refresh()


def export_item_read_data(invoice_id):
    file_name = f"spa\\{invoice_id}.xlsx"
    if os.path.isfile(file_name):
        df = pd.read_excel(file_name)
        return df

    # 获取报表表单内容
    report_table_html = get_report_table_html(invoice_id)
    tree = etree.HTML(report_table_html)
    # 提取所有链接
    links = tree.xpath('//table[@id="backup-report-table"]//a/@href')
    for link in links:
        # 解析链接中的查询参数
        parsed_url = urlparse(link)
        query_params = parse_qs(parsed_url.query)
        # 提取 filename 参数
        filename = query_params.get('fileName', ['未找到文件名'])[0]
        report_file_tmp_dir = f"spa\\{invoice_id}\\{filename}\\"
        host = domain.switch_domain(country)
        page.download(host + link, report_file_tmp_dir, show_msg=False)

        report_file = report_file_tmp_dir + "BackupReport.xls"
        file.wait_for_downloads(report_file)

        try:
            df = pd.read_excel(report_file)
            # 获取表头
            headers = df.columns.tolist()
            # 要检查的列名
            column_names_to_check = ["Rebate In Agreement Currency", "Vendor Funding In Agreement Currency"]
            # 判断头文件是否满足读取条件,不满足删除文件夹
            header_is_normal = any(column in headers for column in column_names_to_check)
            if not header_is_normal:
                continue

            data = df[df['Asin'].notna()]
            excel.save_xls(data, file_name)
            shutil.rmtree(f"spa\\{invoice_id}")
            return pd.read_excel(file_name)
        except ValueError:
            pass


def main():
    relation_data = api.sku_relations()  # 获取 ASIN 与 SKU 的对应关系数据
    coop_list = export_list_read_data()  # 获取合作数据列表
    log.info(f"共计:{len(coop_list)} 条数据")

    i = 0
    new_coop_data = []
    sheet_data = {}
    for _, coop in coop_list.iterrows():
        i += 1
        invoice_id = coop.get("Invoice ID")  # 获取发票 ID
        log.info({"index": i, "invoice_id": invoice_id})

        item_coop_data = []
        # 根据发票 ID 获取 item 列表
        item_list = export_item_read_data(invoice_id)
        if item_list is None:
            log.warning(f"{invoice_id} 暂无报告信息")
            continue

        for _, item in item_list.iterrows():
            asin = item.get("Asin")

            # 判断 ASIN 是否为空或无效
            if not asin or (isinstance(asin, float) and math.isnan(asin)):
                break

            relation = relation_data.get(asin)

            # 如果未匹配到 SKU，记录空值
            if not relation:
                item['Asin'] = asin
                item['ERP SKU'] = ""
                item['Group Name'] = ""
                item_coop_data.append(item)
                continue  # 跳过当前条目，继续下一个

            # 如果 item_list 长度大于 10，使用原有数据
            if len(item_list) >= 10:
                new_item = item.copy()
                new_item.pop("Asin")
                new_item['Asin'] = asin
                new_item['ERP SKU'] = relation.get("erp_sku")
                new_item['Group Name'] = relation.get("name")
                item_coop_data.append(new_item)
            else:
                new_item = coop.copy()
                rebate_in_agreement_currency = item.get("Rebate In Agreement Currency")
                if rebate_in_agreement_currency:
                    new_item['Original balance'] = rebate_in_agreement_currency
                vendor_funding_in_agreement_currency = item.get("Vendor Funding In Agreement Currency")
                if vendor_funding_in_agreement_currency:
                    new_item['Original balance'] = vendor_funding_in_agreement_currency

                new_item['Asin'] = asin
                new_item['ERP SKU'] = relation.get("erp_sku")
                new_item['Group Name'] = relation.get("name")
                new_coop_data.append(new_item)

            # 保存已处理的 item 数据到 Excel 文件中
        if item_coop_data:
            sheet_data[invoice_id] = item_coop_data

    # 保存最终的合作数据
    if new_coop_data:
        excel.save_xls(new_coop_data, 'SPA查询.xlsx')

    max_sheet_data = {}

    if sheet_data:
        # 循环遍历 sheet_data 字典
        for key, value in sheet_data.items():
            if len(value) > 5000:
                max_sheet_data[key] = value
                continue

            excel.save_xls(value, 'SPA查询.xlsx', key)

    if max_sheet_data:
        for key, value in max_sheet_data.items():
            excel.save_xls(value, 'SPA查询.xlsx', key)


if __name__ == '__main__':
    try:
        country = helper.get_input_with_default("国家", "usa")
        domain.domain_page(page, country)
        main()
        page.close()
    except KeyboardInterrupt:
        pass
    except Exception as e:
        log.error(e)
        helper.print_trace("main", e)
