Commit 3839cc85 authored by 邱阿朋's avatar 邱阿朋

feat(ve): 优化数据处理和日志记录

- 在 return_goods 和 spa 模块中添加了更多日志记录,以便跟踪处理进度
- 优化了 spa 模块中的数据处理逻辑,包括:
  - 读取 Excel 文件的改进
  - 添加总行数和总金额的计算  - 更好的错误处理和日志记录
- 新增 test.py 文件用于测试和验证 Excel 文件的处理逻辑
parent f87f1f6a
......@@ -109,11 +109,13 @@ class ReturnGoods(AutoInterface):
self.logger.info(f"文件 {self.result_file_name} 保存完成,路径:{os.path.abspath(self.result_file_name)}")
def push_data_queue(self):
self.logger.info("开始读取数据....")
data = pd.read_excel(self.result_file_name, keep_default_na=False, na_values=[])
self.logger.info("开始推送消息....")
rabbit.connection()
rabbit.connect(queue='return_robot', routing_key='return_robot', exchange='reports')
data = pd.read_excel(self.result_file_name, keep_default_na=False, na_values=[])
for _, item_row in data.iterrows():
push_data = {
'return_id': str(item_row.get('Return ID', '')),
......
......@@ -244,17 +244,53 @@ class Spa(AutoInterface):
return round(total_amount, 2)
def push_data_queue(self):
rabbit.connection()
rabbit.connect(queue='spa_robot', routing_key='spa_robot', exchange='reports')
self.logger.info("开始读取数据....")
data_dict = pd.read_excel(self.result_file_name, sheet_name=None, keep_default_na=False, na_values=[])
# 读取Excel文件
xls = pd.ExcelFile(self.result_file_name)
self.logger.info("开始推送消息....")
for sheet_name, values in data_dict.items():
for _, item_row in values.iterrows():
rabbit.connection()
rabbit.connect(queue='spa_robot', routing_key='spa_robot', exchange='reports')
total_rows = 0
total_amount = 0.00
for sheet_name in xls.sheet_names:
self.logger.info(f"开始处理 {sheet_name}")
df = pd.read_excel(xls, sheet_name, keep_default_na=False, na_values=[])
if sheet_name == "Sheet1":
# 根据Invoice ID去重,保留第一条记录
total_rows = len(df['Invoice ID'].drop_duplicates())
# 清洗Original balance列,去除$和千位分隔符
df['Original balance'] = (df['Original balance'].
astype(str).
str.
replace(r'[\$,]', '',regex=True))
# 转换为数值型,处理非数值数据
df['Original balance'] = pd.to_numeric(df['Original balance'], errors='coerce')
# 计算总和,忽略NaN
total_amount += df['Original balance'].sum(skipna=True)
else:
total_rows += 1
target_column = None
if 'Rebate In Agreement Currency' in df.columns:
target_column = 'Rebate In Agreement Currency'
elif 'Vendor Funding In Agreement Currency' in df.columns:
target_column = 'Vendor Funding In Agreement Currency'
if target_column:
# 转换为数值型,处理非数值数据
df[target_column] = pd.to_numeric(df[target_column], errors='coerce')
rebate_total = df[target_column].sum(skipna=True)
total_amount += rebate_total
parent_id = sheet_name
for _, item_row in df.iterrows():
if sheet_name == "Sheet1":
parent_id = item_row.get('Invoice ID', "")
else:
parent_id = sheet_name
# 协议类型
agreement_type = 0
......@@ -293,6 +329,9 @@ class Spa(AutoInterface):
rabbit.close()
self.logger.info(f"所有sheet的总行数: {total_rows}")
self.logger.info(f"所有sheet的总金额: {total_amount}")
def run(self, file_name: str):
# 获取数据
relation_data = api.sku_relations(self.country) # 获取 ASIN 与 SKU 的对应关系数据
......
import pandas as pd
from DrissionPage import ChromiumPage
from app.vc.spa import Spa
from app.helper.logger import ConsoleLog
def calculate_totals(file_path):
# 读取Excel文件
xls = pd.ExcelFile(file_path)
# 初始化总金额和总行数
total_amount = 0
total_rows = 0
# 处理Sheet1的Original balance
sheet1_df = pd.read_excel(xls, 'Sheet1')
# 清洗Original balance列,去除$和千位分隔符
sheet1_df['Original balance'] = sheet1_df['Original balance'].astype(str).str.replace(r'[\$,]', '', regex=True)
# 转换为数值型,处理非数值数据
sheet1_df['Original balance'] = pd.to_numeric(sheet1_df['Original balance'], errors='coerce')
# 计算总和,忽略NaN
sheet1_total = sheet1_df['Original balance'].sum(skipna=True)
print(f"Sheet1 的总金额: {sheet1_total}")
total_amount += sheet1_total
# 根据Invoice ID去重,保留第一条记录
unique_invoices = sheet1_df['Invoice ID'].drop_duplicates()
# 计算去重后的行数
unique_count = len(unique_invoices)
total_rows += unique_count
# 处理其他sheet的Rebate In Agreement Currency或Vendor Funding In Agreement Currency
for sheet_name in xls.sheet_names:
if sheet_name == 'Sheet1':
continue
try:
df = pd.read_excel(xls, sheet_name)
# 优先检查Rebate In Agreement Currency
target_column = None
if 'Rebate In Agreement Currency' in df.columns:
target_column = 'Rebate In Agreement Currency'
elif 'Vendor Funding In Agreement Currency' in df.columns:
target_column = 'Vendor Funding In Agreement Currency'
if target_column:
# 转换为数值型,处理非数值数据
df[target_column] = pd.to_numeric(df[target_column], errors='coerce')
rebate_total = df[target_column].sum(skipna=True)
total_amount += rebate_total
total_rows += 1
else:
print(
f"{sheet_name}中既缺少'Rebate In Agreement Currency'列,也缺少'Vendor Funding In Agreement Currency'列")
except Exception as e:
print(f"处理{sheet_name}时发生错误: {e}")
# 输出最终结果
print("\n最终结果:")
print(f"所有sheet的总金额: {total_amount}")
print(f"所有sheet的总行数: {total_rows}")
return total_amount, total_rows
def calculate_spa():
# 使用示例
file_path = '2025-06-13-16-52_US_spa.xlsx' # 替换为你的Excel文件路径
total_amount, total_rows = calculate_totals(file_path)
def sap():
logger = ConsoleLog()
page = ChromiumPage()
spa = Spa(logger, page, "US", "VECELO")
spa.result_file_name = "2025-06-13-16-52_US_spa.xlsx"
spa.push_data_queue()
sap()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment