Commit f1481184 authored by 邱阿朋's avatar 邱阿朋

spa数据查询

parent 235f7228
......@@ -11,7 +11,7 @@ from DrissionPage import ChromiumPage
from DrissionPage.errors import ElementNotFoundError
from lxml import etree
from helper import helper, file, excel
from helper import helper, excel, file
page = ChromiumPage()
page.set.load_mode.eager()
......@@ -49,69 +49,73 @@ def export_list_read_data():
return pd.read_excel(file_name, engine='xlrd')
def export_item_read_data(invoice_id):
def get_report_table_html(invoice_id):
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}")
try:
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}")
# 点击选项卡
page.ele("#a-autoid-2-announce").click()
# 下载报表
page.ele(f"#invoiceDownloads-{invoice_id}_2").click()
time.sleep(2)
time.sleep(1)
# 获取报表表单内容
report_table_html = page.ele("#backup-report-table").html
tree = etree.HTML(report_table_html)
# 提取所有链接
links = tree.xpath('//table[@id="backup-report-table"]//a/@href')
for link in links:
# 解析链接中的查询参数
parsed_url = urlparse(link)
query_params = parse_qs(parsed_url.query)
# 提取 filename 参数
filename = query_params.get('fileName', ['未找到文件名'])[0]
report_file_dir = f"coop\\{invoice_id}"
report_file_tmp_dir = f"{report_file_dir}\\{filename}\\"
full_url = "https://vendorcentral.amazon.com" + link
page.download(full_url, report_file_tmp_dir, show_msg=False)
report_file = report_file_tmp_dir + "BackupReport.xls"
file.wait_for_downloads(report_file)
try:
df = pd.read_excel(report_file)
# 获取表头
headers = df.columns.tolist()
# 要检查的列名
column_names_to_check = ["Rebate In Agreement Currency", "Vendor Funding In Agreement Currency"]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal = any(column in headers for column in column_names_to_check)
if not header_is_normal:
shutil.rmtree(report_file_tmp_dir)
continue
df = df[df['Asin'].notna()]
return df
except ValueError:
# 递归删除文件夹
shutil.rmtree(report_file_tmp_dir)
return report_table_html
except ElementNotFoundError:
print("导出按钮不存在刷新网页")
page.refresh()
export_item_read_data(invoice_id)
def export_item_read_data(invoice_id):
file_name = f"coop\\{invoice_id}.xlsx"
if os.path.isfile(file_name):
df = pd.read_excel(file_name)
return df
# 获取报表表单内容
report_table_html = get_report_table_html(invoice_id)
tree = etree.HTML(report_table_html)
# 提取所有链接
links = tree.xpath('//table[@id="backup-report-table"]//a/@href')
for link in links:
# 解析链接中的查询参数
parsed_url = urlparse(link)
query_params = parse_qs(parsed_url.query)
# 提取 filename 参数
filename = query_params.get('fileName', ['未找到文件名'])[0]
report_file_tmp_dir = f"coop\\{invoice_id}\\{filename}\\"
page.download("https://vendorcentral.amazon.com" + link, report_file_tmp_dir, show_msg=True)
report_file = report_file_tmp_dir + "BackupReport.xls"
file.wait_for_downloads(report_file)
try:
df = pd.read_excel(report_file)
# 获取表头
headers = df.columns.tolist()
# 要检查的列名
column_names_to_check = ["Rebate In Agreement Currency", "Vendor Funding In Agreement Currency"]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal = any(column in headers for column in column_names_to_check)
if not header_is_normal:
continue
data = df[df['Asin'].notna()]
excel.save_xls(data, file_name)
shutil.rmtree(f"coop\\{invoice_id}")
return pd.read_excel(file_name)
except ValueError:
pass
def main():
relation_data = asin_sku_relations() # 获取 ASIN 与 SKU 的对应关系数据
coop_list = export_list_read_data() # 获取合作数据列表
# coop_list = coop_list[:189]
print(f"共计:{len(coop_list)},条数据")
i = 0
new_coop_data = []
sheet_data = {}
for _, coop in coop_list.iterrows():
i += 1
invoice_id = coop.get("Invoice ID") # 获取发票 ID
......@@ -120,6 +124,10 @@ def main():
item_coop_data = []
# 根据发票 ID 获取 item 列表
item_list = export_item_read_data(invoice_id)
if item_list is None:
print(f"{invoice_id} 暂无报告信息")
continue
for _, item in item_list.iterrows():
asin = item.get("Asin")
......@@ -131,7 +139,6 @@ def main():
# 如果未匹配到 SKU,记录空值
if not relation:
print(f"未匹配到 SKU:{asin}")
item['Asin'] = asin
item['ERP SKU'] = ""
item['Group Name'] = ""
......@@ -155,12 +162,27 @@ def main():
# 保存已处理的 item 数据到 Excel 文件中
if item_coop_data:
excel.save_xls(item_coop_data, 'SPA查询.xlsx', invoice_id)
sheet_data[invoice_id] = item_coop_data
# 保存最终的合作数据
if new_coop_data:
excel.save_xls(new_coop_data, 'SPA查询.xlsx')
max_sheet_data = {}
if sheet_data:
# 循环遍历 sheet_data 字典
for key, value in sheet_data.items():
if len(value) > 10000:
max_sheet_data[key] = value
continue
excel.save_xls(value, 'SPA查询.xlsx', key)
if max_sheet_data:
for key, value in max_sheet_data.items():
excel.save_xls(value, 'SPA查询.xlsx', key)
page.close() # 关闭页面
......
......@@ -14,7 +14,7 @@ def wait_for_downloads(file_name, timeout=60):
files = os.path.isfile(file_name)
if files:
return True
time.sleep(1)
time.sleep(0.5)
return False
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment