Commit 8f595ff1 authored by 邱阿朋's avatar 邱阿朋

spa

parent 028d0f9d
......@@ -25,13 +25,13 @@ def save_xls(data, output_file, sheet_name='Sheet1', adjusted=True):
df = pd.DataFrame(data)
df.to_excel(writer, index=False, sheet_name=sheet_name)
if not adjusted:
return
# 使用 openpyxl 重新加载工作簿
wb = load_workbook(output_file)
ws = wb[sheet_name]
if not adjusted:
return
# 自动调整列宽
for column in ws.columns:
max_length = 0
......
......@@ -399,11 +399,11 @@ def main():
if all_normal_pay_data:
# 将所有数据合并为一个 DataFrame
normal_pay_summary = pd.concat(all_normal_pay_data, ignore_index=True)
excel.save_xls(normal_pay_summary, new_file_name, "正常回款导出明细")
excel.save_xls(normal_pay_summary, new_file_name, "正常回款导出明细",False)
if all_price_pay_data:
price_pay_summary = pd.concat(all_price_pay_data, ignore_index=True)
excel.save_xls(price_pay_summary, new_file_name, "Price导出明细")
excel.save_xls(price_pay_summary, new_file_name, "Price导出明细",False)
if __name__ == '__main__':
......
......@@ -3,6 +3,7 @@
import math
import os
import shutil
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from urllib.parse import urlparse, parse_qs
......@@ -84,7 +85,7 @@ def export_item_read_data(invoice_id):
page.download(host + link, report_file_tmp_dir, show_msg=False)
report_file = report_file_tmp_dir + "BackupReport.xls"
file.wait_for_downloads(report_file)
file.wait_for_downloads(report_file, 120)
try:
df = pd.read_excel(report_file)
......@@ -106,93 +107,128 @@ def export_item_read_data(invoice_id):
def main():
# 获取数据
relation_data = api.sku_relations(country) # 获取 ASIN 与 SKU 的对应关系数据
coop_list = export_list_read_data() # 获取合作数据列表
log.info(f"共计:{len(coop_list)} 条数据")
log.info(f"共计: {len(coop_list)} 条数据")
# 获取当前日期和时间并格式化
current_datetime = datetime.now().strftime('%Y%m%d%H%M') # 格式化为 'YYYY-MM-DD_HH-MM-SS'
# 原文件名
current_datetime = datetime.now().strftime('%Y%m%d%H%M')
file_name = "spa.xlsx"
# 拼接新的文件名
new_file_name = f"{current_datetime}_{file_name}"
i = 0
new_coop_data = []
sheet_data = {}
for _, coop in coop_list.iterrows():
i += 1
sheet_data = [] # 用于保存小数据
large_sheet_data = {} # 保存大数据(需要分 Sheet)
max_sheet_data = {} # 保存超大数据(行数 > 5000)
# 遍历合作列表
for index, coop in coop_list.iterrows():
index += 1
invoice_id = coop.get("Invoice ID") # 获取发票 ID
log.info({"index": i, "invoice_id": invoice_id})
log.info({"index": index, "invoice_id": invoice_id})
item_coop_data = []
# 根据发票 ID 获取 item 列表
if not invoice_id:
log.warning(f"缺少 Invoice ID,跳过第 {index} 条数据")
continue
# 获取当前发票的 item 列表
item_list = export_item_read_data(invoice_id)
if item_list is None:
log.warning(f"{invoice_id} 暂无报告信息")
continue
# 按 item_list 的长度处理小数据或大数据
if len(item_list) >= 10:
processed_items = process_large_items(item_list, relation_data)
if processed_items:
if len(processed_items) > 5000:
max_sheet_data[invoice_id] = processed_items
else:
large_sheet_data[invoice_id] = processed_items
else:
processed_items = process_small_items(item_list, coop, relation_data)
sheet_data.extend(processed_items)
# 保存数据到 Excel 文件
save_excel(sheet_data, large_sheet_data, max_sheet_data, new_file_name)
def process_large_items(item_list, relation_data):
"""处理大数据列表 (item_list 长度 >= 10)"""
processed_items = []
for _, item in item_list.iterrows():
asin = item.get("Asin")
asin = item.get('Asin', None)
if not validate_asin(asin):
continue
# 判断 ASIN 是否为空或无效
if not asin or (isinstance(asin, float) and math.isnan(asin)):
break
relation = relation_data.get(asin, {})
relation = relation_data.get(asin)
processed_item = item.copy()
processed_item.pop("Asin")
processed_item['Asin'] = asin
processed_item['ERP SKU'] = relation.get("erp_sku")
processed_item['Group Name'] = relation.get("name")
processed_items.append(processed_item)
# 如果未匹配到 SKU,记录空值
if not relation:
item['Asin'] = asin
item['ERP SKU'] = ""
item['Group Name'] = ""
item_coop_data.append(item)
continue # 跳过当前条目,继续下一个
return processed_items
# 如果 item_list 长度大于 10,使用原有数据
if len(item_list) >= 10:
new_item = item.copy()
new_item.pop("Asin")
new_item['Asin'] = asin
new_item['ERP SKU'] = relation.get("erp_sku")
new_item['Group Name'] = relation.get("name")
item_coop_data.append(new_item)
else:
new_item = coop.copy()
rebate_in_agreement_currency = item.get("Rebate In Agreement Currency")
if rebate_in_agreement_currency:
new_item['Original balance'] = rebate_in_agreement_currency
vendor_funding_in_agreement_currency = item.get("Vendor Funding In Agreement Currency")
if vendor_funding_in_agreement_currency:
new_item['Original balance'] = vendor_funding_in_agreement_currency
new_item['Asin'] = asin
new_item['ERP SKU'] = relation.get("erp_sku")
new_item['Group Name'] = relation.get("name")
new_coop_data.append(new_item)
def process_small_items(item_list, coop, relation_data):
"""处理小数据列表 (item_list 长度 < 10)"""
processed_items = []
for _, item in item_list.iterrows():
asin = item.get('Asin', None)
if not validate_asin(asin):
continue
relation = relation_data.get(asin, {})
rebate = item.get("Rebate In Agreement Currency", None)
vendor_funding = item.get("Vendor Funding In Agreement Currency", None)
processed_item = coop.copy() # 复制 coop 数据
processed_item["Asin"] = asin
processed_item["ERP SKU"] = relation.get("erp_sku")
processed_item["Group Name"] = relation.get("name")
processed_item["Original balance"] = rebate or vendor_funding
# 保存已处理的 item 数据到 Excel 文件中
if item_coop_data:
sheet_data[invoice_id] = item_coop_data
processed_items.append(processed_item)
return processed_items
# 保存最终的合作数据
if new_coop_data:
excel.save_xls(new_coop_data, new_file_name)
max_sheet_data = {}
def validate_asin(asin):
"""验证 ASIN 是否有效"""
return asin and not (isinstance(asin, float) and math.isnan(asin))
def save_excel(sheet_data, large_sheet_data, max_sheet_data, new_file_name):
"""保存数据到 Excel 文件"""
# 创建一个写入函数
def write_sheet(writer, data, sheet_name):
log.info(f"开始写入 {sheet_name}, 共计 {len(data)} 条")
df = pd.DataFrame(data) # 将数据转换为 DataFrame
df.to_excel(writer, sheet_name=sheet_name, index=False)
# 初始化 Excel 写入器
with pd.ExcelWriter(new_file_name, engine="openpyxl") as writer:
# 写入小数据
if sheet_data:
# 循环遍历 sheet_data 字典
for key, value in sheet_data.items():
if len(value) > 5000:
max_sheet_data[key] = value
continue
log.info(f"保存小数据,共计 {len(sheet_data)} 条")
write_sheet(writer, sheet_data, "Sheet1")
excel.save_xls(value, new_file_name, key)
# 写入大数据(使用多线程并行写入不同表)
if large_sheet_data:
log.info(f"保存大数据,共计 {sum(len(data) for data in large_sheet_data.values())} 条")
with ThreadPoolExecutor() as executor:
for sheet_name, data in large_sheet_data.items():
executor.submit(write_sheet, writer, data, sheet_name)
# 写入超大数据
if max_sheet_data:
for key, value in max_sheet_data.items():
excel.save_xls(value, new_file_name, key)
log.info(f"保存超大数据,共计 {sum(len(data) for data in max_sheet_data.values())} 条")
with ThreadPoolExecutor() as executor:
for sheet_name, data in max_sheet_data.items():
executor.submit(write_sheet, writer, data, sheet_name)
log.info(f"文件 {new_file_name} 保存完成,路径:{os.path.abspath(new_file_name)}")
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment