Commit 7f607f47 authored by 邱阿朋's avatar 邱阿朋

spa数据查询

parent e9bc06f5
# coding: utf-8
# 回款明细
import math
import os
import shutil
import time
from urllib.parse import urlparse, parse_qs
import pandas as pd
from DrissionPage import ChromiumPage
from DrissionPage.errors import PageDisconnectedError, ElementNotFoundError
from DrissionPage.errors import ElementNotFoundError
from lxml import etree
from helper import helper
from helper import helper, file, excel
page = ChromiumPage()
page.set.load_mode.eager()
......@@ -20,6 +25,16 @@ helper.make_dir(download_path)
page.set.download_path(download_path)
def asin_sku_relations():
relations_dict = {}
df = pd.read_excel('relations.xlsx')
for index, row in df.iterrows():
row_dict = row.to_dict()
relations_dict[row_dict['ASIN']] = {"SKU": row_dict['SKU'], "NAME": row_dict['NAME']}
return relations_dict
def export_list_read_data():
file_name = "ContraCogsInvoices.xls"
if not os.path.isfile(file_name):
......@@ -36,15 +51,52 @@ def export_list_read_data():
def export_item_read_data(invoice_id):
try:
file_name = f"coop\\{invoice_id}.csv"
if not os.path.isfile(file_name):
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}")
# 点击选项卡
page.ele("#a-autoid-2-announce").click()
# 下载报表
file_name = f"coop\\{invoice_id}.csv"
page.ele(f"#invoiceDownloads-{invoice_id}_1").click.to_download(rename=file_name).wait()
return pd.read_csv(file_name, engine='python', on_bad_lines='skip')
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}")
# 点击选项卡
page.ele("#a-autoid-2-announce").click()
# 下载报表
page.ele(f"#invoiceDownloads-{invoice_id}_2").click()
time.sleep(2)
# 获取报表表单内容
report_table_html = page.ele("#backup-report-table").html
tree = etree.HTML(report_table_html)
# 提取所有链接
links = tree.xpath('//table[@id="backup-report-table"]//a/@href')
for link in links:
# 解析链接中的查询参数
parsed_url = urlparse(link)
query_params = parse_qs(parsed_url.query)
# 提取 filename 参数
filename = query_params.get('fileName', ['未找到文件名'])[0]
report_file_dir = f"coop\\{invoice_id}"
report_file_tmp_dir = f"{report_file_dir}\\{filename}\\"
full_url = "https://vendorcentral.amazon.com" + link
page.download(full_url, report_file_tmp_dir, show_msg=False)
report_file = report_file_tmp_dir + "BackupReport.xls"
file.wait_for_downloads(report_file)
try:
df = pd.read_excel(report_file)
# 获取表头
headers = df.columns.tolist()
# 要检查的列名
column_names_to_check = ["Rebate In Agreement Currency", "Vendor Funding In Agreement Currency"]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal = any(column in headers for column in column_names_to_check)
if not header_is_normal:
shutil.rmtree(report_file_tmp_dir)
continue
df = df[df['Asin'].notna()]
return df
except ValueError:
# 递归删除文件夹
shutil.rmtree(report_file_tmp_dir)
except ElementNotFoundError:
print("导出按钮不存在刷新网页")
page.refresh()
......@@ -52,19 +104,67 @@ def export_item_read_data(invoice_id):
def main():
coop_data = export_list_read_data()
for _, data in coop_data.iterrows():
# 根据回款id搜索下载报表
invoice_id = data.get("Invoice ID")
print(invoice_id)
export_item_read_data(invoice_id)
relation_data = asin_sku_relations() # 获取 ASIN 与 SKU 的对应关系数据
coop_list = export_list_read_data() # 获取合作数据列表
print(f"共计:{len(coop_list)},条数据")
i = 0
new_coop_data = []
for _, coop in coop_list.iterrows():
if i == 20: break
i += 1
invoice_id = coop.get("Invoice ID") # 获取发票 ID
print({"index": i, "invoice_id": invoice_id})
item_coop_data = []
# 根据发票 ID 获取 item 列表
item_list = export_item_read_data(invoice_id)
for _, item in item_list.iterrows():
asin = item.get("Asin")
# 判断 ASIN 是否为空或无效
if not asin or (isinstance(asin, float) and math.isnan(asin)):
break
relation = relation_data.get(asin)
# 如果未匹配到 SKU,记录空值
if not relation:
print(f"未匹配到 SKU:{asin}")
item['Asin'] = asin
item['ERP SKU'] = ""
item['Group Name'] = ""
item_coop_data.append(item)
continue # 跳过当前条目,继续下一个
# 如果 item_list 长度大于 10,使用原有数据
if len(item_list) >= 10:
new_item = item.copy()
new_item['Asin'] = asin
new_item['ERP SKU'] = relation.get("SKU")
new_item['Group Name'] = relation.get("NAME")
item_coop_data.append(new_item)
else:
# 否则新建一个条目
new_item = coop.copy()
new_item['Asin'] = asin
new_item['ERP SKU'] = relation.get("SKU")
new_item['Group Name'] = relation.get("NAME")
new_coop_data.append(new_item)
# 保存已处理的 item 数据到 Excel 文件中
if item_coop_data:
excel.save_xls(item_coop_data, 'SPA查询.xlsx', invoice_id)
# 保存最终的合作数据
if new_coop_data:
excel.save_xls(new_coop_data, 'SPA查询.xlsx')
page.close() # 关闭页面
page.close()
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass
except PageDisconnectedError as e:
print("与页面的连接已断开")
main()
......@@ -55,7 +55,7 @@ def export_details_read_data(invoice_number):
# 将字典转换为 URL 查询参数
query_string = urllib.parse.urlencode(params)
page.get(
f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?" + query_string)
f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details" + query_string)
if not os.path.isfile(file_name):
page.ele("#line-items-export-to-spreadsheet-announce", timeout=5).click.to_download(rename=file_name)
......
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment