feat(ve): 优化数据处理和日志记录

- 在 return_goods 和 spa 模块中添加了更多日志记录，以便跟踪处理进度 - 优化了 spa 模块中的数据处理逻辑，包括： - 读取 Excel 文件的改进 - 添加总行数和总金额的计算 - 更好的错误处理和日志记录 - 新增 test.py 文件用于测试和验证 Excel 文件的处理逻辑

feat(ve): 优化数据处理和日志记录
- 在 return_goods 和 spa 模块中添加了更多日志记录，以便跟踪处理进度 - 优化了 spa 模块中的数据处理逻辑，包括： - 读取 Excel 文件的改进 - 添加总行数和总金额的计算 - 更好的错误处理和日志记录 - 新增 test.py 文件用于测试和验证 Excel 文件的处理逻辑
3839cc85 · 邱阿朋 · f87f1f6a · 3839cc85 · 3839cc85 · 3839cc85
Commit 3839cc85 authored Jun 16, 2025 by 邱阿朋
Hide whitespace changes
Inline Side-by-side

Showing with 134 additions and 9 deletions

return_goods.py app/vc/return_goods.py +4 -2

spa.py app/vc/spa.py +46 -7

test.py test.py +84 -0

No files found.
--- a/app/vc/return_goods.py
+++ b/app/vc/return_goods.py
@@ -109,11 +109,13 @@ class ReturnGoods(AutoInterface):
        self.logger.info(f"文件 {self.result_file_name} 保存完成，路径：{os.path.abspath(self.result_file_name)}")

    def push_data_queue(self):
+        self.logger.info("开始读取数据....")
+        data = pd.read_excel(self.result_file_name, keep_default_na=False, na_values=[])
+        self.logger.info("开始推送消息....")
+
        rabbit.connection()
        rabbit.connect(queue='return_robot', routing_key='return_robot', exchange='reports')

-        data = pd.read_excel(self.result_file_name, keep_default_na=False, na_values=[])
-
        for _, item_row in data.iterrows():
            push_data = {
                'return_id': str(item_row.get('Return ID', '')),

--- a/app/vc/spa.py
+++ b/app/vc/spa.py
@@ -244,17 +244,53 @@ class Spa(AutoInterface):
        return round(total_amount, 2)

    def push_data_queue(self):
-        rabbit.connection()
-        rabbit.connect(queue='spa_robot', routing_key='spa_robot', exchange='reports')
        self.logger.info("开始读取数据....")
-        data_dict = pd.read_excel(self.result_file_name, sheet_name=None, keep_default_na=False, na_values=[])
+        # 读取Excel文件
+        xls = pd.ExcelFile(self.result_file_name)
        self.logger.info("开始推送消息....")
-        for sheet_name, values in data_dict.items():
-            for _, item_row in values.iterrows():
+
+        rabbit.connection()
+        rabbit.connect(queue='spa_robot', routing_key='spa_robot', exchange='reports')
+
+        total_rows = 0
+        total_amount = 0.00
+
+        for sheet_name in xls.sheet_names:
+            self.logger.info(f"开始处理 {sheet_name}")
+            df = pd.read_excel(xls, sheet_name, keep_default_na=False, na_values=[])
+
+            if sheet_name == "Sheet1":
+                # 根据Invoice ID去重，保留第一条记录
+                total_rows = len(df['Invoice ID'].drop_duplicates())
+                # 清洗Original balance列，去除$和千位分隔符
+                df['Original balance'] = (df['Original balance'].
+                                                 astype(str).
+                                                 str.
+                                                 replace(r'[\$,]', '',regex=True))
+                # 转换为数值型，处理非数值数据
+                df['Original balance'] = pd.to_numeric(df['Original balance'], errors='coerce')
+                # 计算总和，忽略NaN
+                total_amount += df['Original balance'].sum(skipna=True)
+            else:
+                total_rows += 1
+
+                target_column = None
+                if 'Rebate In Agreement Currency' in df.columns:
+                    target_column = 'Rebate In Agreement Currency'
+                elif 'Vendor Funding In Agreement Currency' in df.columns:
+                    target_column = 'Vendor Funding In Agreement Currency'
+
+                if target_column:
+                    # 转换为数值型，处理非数值数据
+                    df[target_column] = pd.to_numeric(df[target_column], errors='coerce')
+                    rebate_total = df[target_column].sum(skipna=True)
+                    total_amount += rebate_total
+
+            parent_id = sheet_name
+
+            for _, item_row in df.iterrows():
                if sheet_name == "Sheet1":
                    parent_id = item_row.get('Invoice ID', "")
-                else:
-                    parent_id = sheet_name

                # 协议类型
                agreement_type = 0
@@ -293,6 +329,9 @@ class Spa(AutoInterface):

        rabbit.close()

+        self.logger.info(f"所有sheet的总行数: {total_rows}")
+        self.logger.info(f"所有sheet的总金额: {total_amount}")
+
    def run(self, file_name: str):
        # 获取数据
        relation_data = api.sku_relations(self.country)  # 获取 ASIN 与 SKU 的对应关系数据

--- a/test.py
+++ b/test.py
+import pandas as pd
+from DrissionPage import ChromiumPage
+
+from app.vc.spa import Spa
+from app.helper.logger import ConsoleLog
+
+
+def calculate_totals(file_path):
+    # 读取Excel文件
+    xls = pd.ExcelFile(file_path)
+
+    # 初始化总金额和总行数
+    total_amount = 0
+    total_rows = 0
+
+    # 处理Sheet1的Original balance
+    sheet1_df = pd.read_excel(xls, 'Sheet1')
+
+    # 清洗Original balance列，去除$和千位分隔符
+    sheet1_df['Original balance'] = sheet1_df['Original balance'].astype(str).str.replace(r'[\$,]', '', regex=True)
+    # 转换为数值型，处理非数值数据
+    sheet1_df['Original balance'] = pd.to_numeric(sheet1_df['Original balance'], errors='coerce')
+
+    # 计算总和，忽略NaN
+    sheet1_total = sheet1_df['Original balance'].sum(skipna=True)
+    print(f"Sheet1 的总金额: {sheet1_total}")
+
+    total_amount += sheet1_total
+
+    # 根据Invoice ID去重，保留第一条记录
+    unique_invoices = sheet1_df['Invoice ID'].drop_duplicates()
+    # 计算去重后的行数
+    unique_count = len(unique_invoices)
+
+    total_rows += unique_count
+
+    # 处理其他sheet的Rebate In Agreement Currency或Vendor Funding In Agreement Currency
+    for sheet_name in xls.sheet_names:
+        if sheet_name == 'Sheet1':
+            continue
+
+        try:
+            df = pd.read_excel(xls, sheet_name)
+            # 优先检查Rebate In Agreement Currency
+            target_column = None
+            if 'Rebate In Agreement Currency' in df.columns:
+                target_column = 'Rebate In Agreement Currency'
+            elif 'Vendor Funding In Agreement Currency' in df.columns:
+                target_column = 'Vendor Funding In Agreement Currency'
+
+            if target_column:
+                # 转换为数值型，处理非数值数据
+                df[target_column] = pd.to_numeric(df[target_column], errors='coerce')
+                rebate_total = df[target_column].sum(skipna=True)
+                total_amount += rebate_total
+                total_rows += 1
+            else:
+                print(
+                    f"{sheet_name}中既缺少'Rebate In Agreement Currency'列，也缺少'Vendor Funding In Agreement Currency'列")
+        except Exception as e:
+            print(f"处理{sheet_name}时发生错误: {e}")
+
+    # 输出最终结果
+    print("\n最终结果:")
+    print(f"所有sheet的总金额: {total_amount}")
+    print(f"所有sheet的总行数: {total_rows}")
+
+    return total_amount, total_rows
+
+
+def calculate_spa():
+    # 使用示例
+    file_path = '2025-06-13-16-52_US_spa.xlsx'  # 替换为你的Excel文件路径
+    total_amount, total_rows = calculate_totals(file_path)
+
+def sap():
+    logger = ConsoleLog()
+    page = ChromiumPage()
+    spa = Spa(logger, page, "US", "VECELO")
+    spa.result_file_name = "2025-06-13-16-52_US_spa.xlsx"
+    spa.push_data_queue()
+
+
+sap()
\ No newline at end of file