Commit 7690d798 authored by 邱阿朋's avatar 邱阿朋

爬虫处理

parent 131c9a08
...@@ -20,7 +20,21 @@ helper.make_dir(download_path) ...@@ -20,7 +20,21 @@ helper.make_dir(download_path)
page.set.download_path(download_path) page.set.download_path(download_path)
def export_list(invoice_id): def export_list_read_data():
file_name = "ContraCogsInvoices.xls"
if not os.path.isfile(file_name):
page.get("https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav")
# 全选
page.ele("#select-all").click()
# 点击选项卡
page.ele("#cc-invoice-actions-dropdown").click()
# 点击下载报表
page.ele("#cc-invoice-actions-dropdown_2").click.to_download().wait()
return pd.read_excel(file_name, engine='xlrd')
def export_item_read_data(invoice_id):
try: try:
file_name = f"coop\\{invoice_id}.csv" file_name = f"coop\\{invoice_id}.csv"
if not os.path.isfile(file_name): if not os.path.isfile(file_name):
...@@ -30,30 +44,20 @@ def export_list(invoice_id): ...@@ -30,30 +44,20 @@ def export_list(invoice_id):
# 下载报表 # 下载报表
file_name = f"coop\\{invoice_id}.csv" file_name = f"coop\\{invoice_id}.csv"
page.ele(f"#invoiceDownloads-{invoice_id}_1").click.to_download(rename=file_name).wait() page.ele(f"#invoiceDownloads-{invoice_id}_1").click.to_download(rename=file_name).wait()
return pd.read_csv(file_name, engine='python', on_bad_lines='skip')
except ElementNotFoundError: except ElementNotFoundError:
print("导出按钮不存在刷新网页") print("导出按钮不存在刷新网页")
page.refresh() page.refresh()
export_list(invoice_id) export_item_read_data(invoice_id)
def main():
page.get("https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav")
file_name = "ContraCogsInvoices.xls"
if not os.path.isfile(file_name):
# 全选
page.ele("#select-all").click()
# 点击选项卡
page.ele("#cc-invoice-actions-dropdown").click()
# 点击下载报表
page.ele("#cc-invoice-actions-dropdown_2").click.to_download().wait()
file_name = "ContraCogsInvoices.xls" def main():
coop_data = pd.read_excel(file_name, engine='xlrd') coop_data = export_list_read_data()
for _, data in coop_data.iterrows(): for _, data in coop_data.iterrows():
# 根据回款id搜索下载报表 # 根据回款id搜索下载报表
invoice_id = data.get("Invoice ID") invoice_id = data.get("Invoice ID")
print(invoice_id) print(invoice_id)
export_list(invoice_id) export_item_read_data(invoice_id)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -24,14 +24,14 @@ page.set.download_path(download_path) ...@@ -24,14 +24,14 @@ page.set.download_path(download_path)
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl") warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
def export_list_filter_data(): def export_list_read_data():
file_name = 'Payments.xlsx' file_name = 'Payments.xlsx'
if not os.path.isfile(file_name): if not os.path.isfile(file_name):
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home") page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home")
page.ele("#remittance-home-select-all").click() page.ele("#remittance-home-select-all").click()
page.ele("#remittance-home-export-link").click.to_download().wait() page.ele("#remittance-home-export-link").click.to_download().wait()
df = pd.read_excel('Payments.xlsx', skiprows=22) df = pd.read_excel(file_name, skiprows=22)
# 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述 # 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述
pattern = r'Price|PCR|Missed|Shortage|^[A-Z0-9]{8}/[A-Z0-9]{4}/' pattern = r'Price|PCR|Missed|Shortage|^[A-Z0-9]{8}/[A-Z0-9]{4}/'
# 过滤符合条件的行 # 过滤符合条件的行
...@@ -65,7 +65,7 @@ def export_details_read_data(invoice_number): ...@@ -65,7 +65,7 @@ def export_details_read_data(invoice_number):
def main(): def main():
list_data = export_list_filter_data() list_data = export_list_read_data()
excel.save_xls(list_data, "回款数据.xlsx", "Remittance payments") excel.save_xls(list_data, "回款数据.xlsx", "Remittance payments")
all_normal_pay_data = [] all_normal_pay_data = []
......
...@@ -57,20 +57,19 @@ def asin_sku_relations(): ...@@ -57,20 +57,19 @@ def asin_sku_relations():
return relations_dict return relations_dict
def export_list(): def export_list_read_data():
# 访问网页 file_name = "Return_Summary.xls"
open_url("https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav") if not os.path.isfile(file_name):
# 访问网页
# 导出退货单 open_url("https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav")
page.ele("#file-download-button").click.to_download().wait() # 导出退货单
return pd.read_excel('Return_Summary.xls', engine='xlrd') page.ele("#file-download-button").click.to_download().wait()
return pd.read_excel(file_name, engine='xlrd')
def export_item(return_id):
returns_dir = "returns"
helper.make_dir(returns_dir)
file_name = f"{returns_dir}\\{return_id}.xls" def export_item_read_data(return_id):
file_name = f"returns\\{return_id}.xls"
if not os.path.isfile(file_name): if not os.path.isfile(file_name):
# 打开退回详情下载明细 # 打开退回详情下载明细
open_url(f"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}") open_url(f"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}")
...@@ -85,13 +84,13 @@ def main(): ...@@ -85,13 +84,13 @@ def main():
relations_dict = asin_sku_relations() relations_dict = asin_sku_relations()
# 下载并读取list数据 # 下载并读取list数据
list_data = export_list() list_data = export_list_read_data()
new_list_data = [] new_list_data = []
for _, data in list_data.iterrows(): for _, data in list_data.iterrows():
return_id = data.get('Return ID') return_id = data.get('Return ID')
# 下载退货详情表格读取数据 # 下载退货详情表格读取数据
item_data = export_item(return_id) item_data = export_item_read_data(return_id)
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和 # 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
item_data_result = item_data.groupby(['Purchase order', 'ASIN'], as_index=False).agg({ item_data_result = item_data.groupby(['Purchase order', 'ASIN'], as_index=False).agg({
'Quantity': 'sum', 'Quantity': 'sum',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment