Commit 7690d798 authored by 邱阿朋's avatar 邱阿朋

爬虫处理

parent 131c9a08
......@@ -20,7 +20,21 @@ helper.make_dir(download_path)
page.set.download_path(download_path)
def export_list(invoice_id):
def export_list_read_data():
file_name = "ContraCogsInvoices.xls"
if not os.path.isfile(file_name):
page.get("https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav")
# 全选
page.ele("#select-all").click()
# 点击选项卡
page.ele("#cc-invoice-actions-dropdown").click()
# 点击下载报表
page.ele("#cc-invoice-actions-dropdown_2").click.to_download().wait()
return pd.read_excel(file_name, engine='xlrd')
def export_item_read_data(invoice_id):
try:
file_name = f"coop\\{invoice_id}.csv"
if not os.path.isfile(file_name):
......@@ -30,30 +44,20 @@ def export_list(invoice_id):
# 下载报表
file_name = f"coop\\{invoice_id}.csv"
page.ele(f"#invoiceDownloads-{invoice_id}_1").click.to_download(rename=file_name).wait()
return pd.read_csv(file_name, engine='python', on_bad_lines='skip')
except ElementNotFoundError:
print("导出按钮不存在刷新网页")
page.refresh()
export_list(invoice_id)
export_item_read_data(invoice_id)
def main():
page.get("https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav")
file_name = "ContraCogsInvoices.xls"
if not os.path.isfile(file_name):
# 全选
page.ele("#select-all").click()
# 点击选项卡
page.ele("#cc-invoice-actions-dropdown").click()
# 点击下载报表
page.ele("#cc-invoice-actions-dropdown_2").click.to_download().wait()
file_name = "ContraCogsInvoices.xls"
coop_data = pd.read_excel(file_name, engine='xlrd')
coop_data = export_list_read_data()
for _, data in coop_data.iterrows():
# 根据回款id搜索下载报表
invoice_id = data.get("Invoice ID")
print(invoice_id)
export_list(invoice_id)
export_item_read_data(invoice_id)
if __name__ == '__main__':
......
......@@ -24,14 +24,14 @@ page.set.download_path(download_path)
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
def export_list_filter_data():
def export_list_read_data():
file_name = 'Payments.xlsx'
if not os.path.isfile(file_name):
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home")
page.ele("#remittance-home-select-all").click()
page.ele("#remittance-home-export-link").click.to_download().wait()
df = pd.read_excel('Payments.xlsx', skiprows=22)
df = pd.read_excel(file_name, skiprows=22)
# 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述
pattern = r'Price|PCR|Missed|Shortage|^[A-Z0-9]{8}/[A-Z0-9]{4}/'
# 过滤符合条件的行
......@@ -65,7 +65,7 @@ def export_details_read_data(invoice_number):
def main():
list_data = export_list_filter_data()
list_data = export_list_read_data()
excel.save_xls(list_data, "回款数据.xlsx", "Remittance payments")
all_normal_pay_data = []
......
......@@ -57,20 +57,19 @@ def asin_sku_relations():
return relations_dict
def export_list():
def export_list_read_data():
file_name = "Return_Summary.xls"
if not os.path.isfile(file_name):
# 访问网页
open_url("https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav")
# 导出退货单
page.ele("#file-download-button").click.to_download().wait()
return pd.read_excel('Return_Summary.xls', engine='xlrd')
return pd.read_excel(file_name, engine='xlrd')
def export_item(return_id):
returns_dir = "returns"
helper.make_dir(returns_dir)
file_name = f"{returns_dir}\\{return_id}.xls"
def export_item_read_data(return_id):
file_name = f"returns\\{return_id}.xls"
if not os.path.isfile(file_name):
# 打开退回详情下载明细
open_url(f"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}")
......@@ -85,13 +84,13 @@ def main():
relations_dict = asin_sku_relations()
# 下载并读取list数据
list_data = export_list()
list_data = export_list_read_data()
new_list_data = []
for _, data in list_data.iterrows():
return_id = data.get('Return ID')
# 下载退货详情表格读取数据
item_data = export_item(return_id)
item_data = export_item_read_data(return_id)
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
item_data_result = item_data.groupby(['Purchase order', 'ASIN'], as_index=False).agg({
'Quantity': 'sum',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment