spa查询

c46503f3 · 邱阿朋 · 5d07be2a · c46503f3
Commit c46503f3 authored Dec 21, 2024 by 邱阿朋
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 16 deletions

spa_search.py src/spa_search.py +27 -16

No files found.
--- a/src/spa_search.py
+++ b/src/spa_search.py
@@ -64,16 +64,27 @@ def get_report_table_html(invoice_id):


 def get_report_agreement_text(invoice_id):
-    # 点击选项卡
-    page.ele("#a-autoid-2-announce").click()
-    page.wait(1)
-    # 下载报表
-    page.ele(f"#invoiceDownloads-{invoice_id}_3").click()
-    page.wait(5)
+    page.ele("#return-to-invoice-overview-announce").click()
+    while True:
+        try:
+            # 点击选项卡
+            page.ele("#a-autoid-2-announce").click()
+            # 下载报表
+            page.ele(f"#invoiceDownloads-{invoice_id}_3").click()
+            page.wait(3)
+            break
+        except ElementNotFoundError:
+            log.warning("元素未找到,刷新网页")
+            page.refresh()
+
    # 获取报表表单内容
    tree = etree.HTML(page.html)
-    # 找到包含表格的部分
-    table = tree.xpath('//table[@width="90%"]')[0]  # 获取第一个（也是唯一一个）匹配的表格元素
+    # 找到包含表格的部分,  # 获取第一个（也是唯一一个）匹配的表格元素
+    table = tree.xpath('//table[@width="90%"]')
+    if len(table) == 0:
+        return []
+
+    table = table[0]
    # 获取所有表格行（tr），跳过表头
    rows = table.xpath('.//tr[position()>1]')

@@ -118,7 +129,7 @@ def export_item_read_data(invoice_id):
    if len(links) == 0:
        page.refresh()
        data_list = get_report_agreement_text(invoice_id)
-        return {"Sheet1": pd.DataFrame(data_list)}
+        return {"Accrual For Current Period": pd.DataFrame(data_list)}

    for link in links:
        # 解析链接中的查询参数
@@ -128,12 +139,12 @@ def export_item_read_data(invoice_id):
        filename = query_params.get('fileName', ['未找到文件名'])[0]
        report_file_tmp_dir = f"spa\\{invoice_id}\\{filename}\\"
        host = domain.switch_domain(country)
-        page.download(host + link, report_file_tmp_dir, show_msg=True)
-
        report_file = report_file_tmp_dir + "BackupReport.xls"
-        is_down = file.wait_for_downloads(report_file, 60)
-        if is_down is False:
-            raise FileNotFoundError(f"{report_file},文件不存在")
+        while True:
+            page.download(host + link, report_file_tmp_dir, show_msg=False)
+            is_down = file.wait_for_downloads(report_file, 60)
+            if is_down: break
+            log.warning(f"下载 {invoice_id} 失败,重新下载")

        try:
            df = pd.read_excel(report_file)
@@ -196,7 +207,7 @@ def main():
                if processed_items:
                    large_sheet_data[invoice_id_num] = processed_items
        else:
-            item_list = item_dict['Sheet1']
+            item_list = item_dict.get('Accrual For Current Period')
            # 如果是列表且长度 >= 10 则新增sheet
            if len(item_list) >= 10:
                processed_items = process_large_items(item_list, relation_data)
@@ -270,7 +281,7 @@ def write_sheet(writer, data, sheet_name):
 def save_excel(sheet_data, large_sheet_data, new_file_name):
    """保存数据到 Excel 文件"""
    # 初始化 Excel 写入器
-    with pd.ExcelWriter(new_file_name, engine="openpyxl") as writer:
+    with pd.ExcelWriter(new_file_name) as writer:
        # 写入小数据
        if sheet_data:
            log.info(f"保存小数据，共计 {len(sheet_data)} 条")