Commit f1481184 authored by 邱阿朋's avatar 邱阿朋

spa数据查询

parent 235f7228
...@@ -11,7 +11,7 @@ from DrissionPage import ChromiumPage ...@@ -11,7 +11,7 @@ from DrissionPage import ChromiumPage
from DrissionPage.errors import ElementNotFoundError from DrissionPage.errors import ElementNotFoundError
from lxml import etree from lxml import etree
from helper import helper, file, excel from helper import helper, excel, file
page = ChromiumPage() page = ChromiumPage()
page.set.load_mode.eager() page.set.load_mode.eager()
...@@ -49,69 +49,73 @@ def export_list_read_data(): ...@@ -49,69 +49,73 @@ def export_list_read_data():
return pd.read_excel(file_name, engine='xlrd') return pd.read_excel(file_name, engine='xlrd')
def export_item_read_data(invoice_id): def get_report_table_html(invoice_id):
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}")
try: try:
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}")
# 点击选项卡 # 点击选项卡
page.ele("#a-autoid-2-announce").click() page.ele("#a-autoid-2-announce").click()
# 下载报表 # 下载报表
page.ele(f"#invoiceDownloads-{invoice_id}_2").click() page.ele(f"#invoiceDownloads-{invoice_id}_2").click()
time.sleep(2) time.sleep(1)
# 获取报表表单内容 # 获取报表表单内容
report_table_html = page.ele("#backup-report-table").html report_table_html = page.ele("#backup-report-table").html
tree = etree.HTML(report_table_html) return report_table_html
# 提取所有链接
links = tree.xpath('//table[@id="backup-report-table"]//a/@href')
for link in links:
# 解析链接中的查询参数
parsed_url = urlparse(link)
query_params = parse_qs(parsed_url.query)
# 提取 filename 参数
filename = query_params.get('fileName', ['未找到文件名'])[0]
report_file_dir = f"coop\\{invoice_id}"
report_file_tmp_dir = f"{report_file_dir}\\{filename}\\"
full_url = "https://vendorcentral.amazon.com" + link
page.download(full_url, report_file_tmp_dir, show_msg=False)
report_file = report_file_tmp_dir + "BackupReport.xls"
file.wait_for_downloads(report_file)
try:
df = pd.read_excel(report_file)
# 获取表头
headers = df.columns.tolist()
# 要检查的列名
column_names_to_check = ["Rebate In Agreement Currency", "Vendor Funding In Agreement Currency"]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal = any(column in headers for column in column_names_to_check)
if not header_is_normal:
shutil.rmtree(report_file_tmp_dir)
continue
df = df[df['Asin'].notna()]
return df
except ValueError:
# 递归删除文件夹
shutil.rmtree(report_file_tmp_dir)
except ElementNotFoundError: except ElementNotFoundError:
print("导出按钮不存在刷新网页")
page.refresh() page.refresh()
export_item_read_data(invoice_id)
def export_item_read_data(invoice_id):
file_name = f"coop\\{invoice_id}.xlsx"
if os.path.isfile(file_name):
df = pd.read_excel(file_name)
return df
# 获取报表表单内容
report_table_html = get_report_table_html(invoice_id)
tree = etree.HTML(report_table_html)
# 提取所有链接
links = tree.xpath('//table[@id="backup-report-table"]//a/@href')
for link in links:
# 解析链接中的查询参数
parsed_url = urlparse(link)
query_params = parse_qs(parsed_url.query)
# 提取 filename 参数
filename = query_params.get('fileName', ['未找到文件名'])[0]
report_file_tmp_dir = f"coop\\{invoice_id}\\{filename}\\"
page.download("https://vendorcentral.amazon.com" + link, report_file_tmp_dir, show_msg=True)
report_file = report_file_tmp_dir + "BackupReport.xls"
file.wait_for_downloads(report_file)
try:
df = pd.read_excel(report_file)
# 获取表头
headers = df.columns.tolist()
# 要检查的列名
column_names_to_check = ["Rebate In Agreement Currency", "Vendor Funding In Agreement Currency"]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal = any(column in headers for column in column_names_to_check)
if not header_is_normal:
continue
data = df[df['Asin'].notna()]
excel.save_xls(data, file_name)
shutil.rmtree(f"coop\\{invoice_id}")
return pd.read_excel(file_name)
except ValueError:
pass
def main(): def main():
relation_data = asin_sku_relations() # 获取 ASIN 与 SKU 的对应关系数据 relation_data = asin_sku_relations() # 获取 ASIN 与 SKU 的对应关系数据
coop_list = export_list_read_data() # 获取合作数据列表 coop_list = export_list_read_data() # 获取合作数据列表
# coop_list = coop_list[:189]
print(f"共计:{len(coop_list)},条数据") print(f"共计:{len(coop_list)},条数据")
i = 0 i = 0
new_coop_data = [] new_coop_data = []
sheet_data = {}
for _, coop in coop_list.iterrows(): for _, coop in coop_list.iterrows():
i += 1 i += 1
invoice_id = coop.get("Invoice ID") # 获取发票 ID invoice_id = coop.get("Invoice ID") # 获取发票 ID
...@@ -120,6 +124,10 @@ def main(): ...@@ -120,6 +124,10 @@ def main():
item_coop_data = [] item_coop_data = []
# 根据发票 ID 获取 item 列表 # 根据发票 ID 获取 item 列表
item_list = export_item_read_data(invoice_id) item_list = export_item_read_data(invoice_id)
if item_list is None:
print(f"{invoice_id} 暂无报告信息")
continue
for _, item in item_list.iterrows(): for _, item in item_list.iterrows():
asin = item.get("Asin") asin = item.get("Asin")
...@@ -131,7 +139,6 @@ def main(): ...@@ -131,7 +139,6 @@ def main():
# 如果未匹配到 SKU,记录空值 # 如果未匹配到 SKU,记录空值
if not relation: if not relation:
print(f"未匹配到 SKU:{asin}")
item['Asin'] = asin item['Asin'] = asin
item['ERP SKU'] = "" item['ERP SKU'] = ""
item['Group Name'] = "" item['Group Name'] = ""
...@@ -155,12 +162,27 @@ def main(): ...@@ -155,12 +162,27 @@ def main():
# 保存已处理的 item 数据到 Excel 文件中 # 保存已处理的 item 数据到 Excel 文件中
if item_coop_data: if item_coop_data:
excel.save_xls(item_coop_data, 'SPA查询.xlsx', invoice_id) sheet_data[invoice_id] = item_coop_data
# 保存最终的合作数据 # 保存最终的合作数据
if new_coop_data: if new_coop_data:
excel.save_xls(new_coop_data, 'SPA查询.xlsx') excel.save_xls(new_coop_data, 'SPA查询.xlsx')
max_sheet_data = {}
if sheet_data:
# 循环遍历 sheet_data 字典
for key, value in sheet_data.items():
if len(value) > 10000:
max_sheet_data[key] = value
continue
excel.save_xls(value, 'SPA查询.xlsx', key)
if max_sheet_data:
for key, value in max_sheet_data.items():
excel.save_xls(value, 'SPA查询.xlsx', key)
page.close() # 关闭页面 page.close() # 关闭页面
......
...@@ -14,7 +14,7 @@ def wait_for_downloads(file_name, timeout=60): ...@@ -14,7 +14,7 @@ def wait_for_downloads(file_name, timeout=60):
files = os.path.isfile(file_name) files = os.path.isfile(file_name)
if files: if files:
return True return True
time.sleep(1) time.sleep(0.5)
return False return False
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment