Commit e9bc06f5 authored by 邱阿朋's avatar 邱阿朋

回款数据

parent 3fa0dfb5
...@@ -59,6 +59,7 @@ def main(): ...@@ -59,6 +59,7 @@ def main():
print(invoice_id) print(invoice_id)
export_item_read_data(invoice_id) export_item_read_data(invoice_id)
page.close()
if __name__ == '__main__': if __name__ == '__main__':
try: try:
......
...@@ -58,7 +58,7 @@ def export_details_read_data(invoice_number): ...@@ -58,7 +58,7 @@ def export_details_read_data(invoice_number):
f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?" + query_string) f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?" + query_string)
if not os.path.isfile(file_name): if not os.path.isfile(file_name):
page.ele("#line-items-export-to-spreadsheet-announce", timeout=2).click.to_download(rename=file_name) page.ele("#line-items-export-to-spreadsheet-announce", timeout=5).click.to_download(rename=file_name)
file.wait_for_downloads(file_name) file.wait_for_downloads(file_name)
excel.remove_last_comma(file_name) excel.remove_last_comma(file_name)
...@@ -80,8 +80,6 @@ def get_content(tree, row_index: int, cell_index: int) -> str: ...@@ -80,8 +80,6 @@ def get_content(tree, row_index: int, cell_index: int) -> str:
def get_po_code(index, po_id) -> dict: def get_po_code(index, po_id) -> dict:
page.get("https://vendorcentral.amazon.com/po/vendor/members/po-mgmt/dashboard", timeout=3)
result = { result = {
"index": index, "index": index,
"po_id": po_id "po_id": po_id
...@@ -89,7 +87,7 @@ def get_po_code(index, po_id) -> dict: ...@@ -89,7 +87,7 @@ def get_po_code(index, po_id) -> dict:
page.get(f"https://vendorcentral.amazon.com/po/vendor/members/po-mgmt/order?poId={po_id}", timeout=3) page.get(f"https://vendorcentral.amazon.com/po/vendor/members/po-mgmt/order?poId={po_id}", timeout=3)
po_table = page.ele("#po-header", timeout=2).html po_table = page.ele("#po-header", timeout=5).html
# 使用 lxml 解析 HTML # 使用 lxml 解析 HTML
tree = etree.HTML(po_table) tree = etree.HTML(po_table)
...@@ -107,6 +105,9 @@ def get_po_code(index, po_id) -> dict: ...@@ -107,6 +105,9 @@ def get_po_code(index, po_id) -> dict:
def price_extract_data(html_content): def price_extract_data(html_content):
# 使用正则表达式删除所有 HTML 注释
html_content = re.sub(r'<!--.*?-->', '', html_content)
# 使用 lxml 解析 HTML # 使用 lxml 解析 HTML
tree = etree.HTML(html_content) tree = etree.HTML(html_content)
...@@ -116,26 +117,25 @@ def price_extract_data(html_content): ...@@ -116,26 +117,25 @@ def price_extract_data(html_content):
data_list = [] data_list = []
for row in rows: for row in rows:
# 确保在提取数据之前定义data为字典 # 定义 data 字典,提取并去除多余字符
data = { data = {
'PO_NUMBER': row.xpath('./td[@data-column="PO_NUMBER"]/span/span/a/text()'), 'PO_NUMBER': row.xpath('string(./td[@data-column="PO_NUMBER"]/span/span/a)').strip(),
'ASIN': row.xpath('./td[@data-column="ASIN"]/span/span/a/text()'), 'ASIN': row.xpath('string(./td[@data-column="ASIN"]/span/span/a)').strip(),
'EXTERNAL_ID': row.xpath('./td[@data-column="EXTERNAL_ID"]/span/span/a/text()'), 'EXTERNAL_ID': row.xpath('string(./td[@data-column="EXTERNAL_ID"]/span/span/a)').strip(),
'TITLE': row.xpath('./td[@data-column="TITLE"]/text()'), 'TITLE': row.xpath('string(./td[@data-column="TITLE"])').strip(),
'QUANTITY': row.xpath('./td[@data-column="QUANTITY"]/text()'), 'QUANTITY': row.xpath('string(./td[@data-column="QUANTITY"])').strip(),
'INVOICE_COST': row.xpath('./td[@data-column="INVOICE_COST"]/text()'), 'INVOICE_COST': row.xpath('string(./td[@data-column="INVOICE_COST"])').strip().replace('$', ''),
'PO_COST': row.xpath('./td[@data-column="PO_COST"]/text()'), 'PO_COST': row.xpath('string(./td[@data-column="PO_COST"])').strip().replace('$', ''),
'INITIAL_RESEARCH_COST': row.xpath('./td[@data-column="INITIAL_RESEARCH_COST"]/text()'), 'INITIAL_RESEARCH_COST': row.xpath('string(./td[@data-column="INITIAL_RESEARCH_COST"])').strip().replace(
'RESOLUTION_DECISION': row.xpath('./td[@data-column="RESOLUTION_DECISION"]/text()'), '$', ''),
'RESOLUTION_COST': row.xpath('./td[@data-column="RESOLUTION_COST"]/text()') 'RESOLUTION_DECISION': row.xpath('string(./td[@data-column="RESOLUTION_DECISION"])').strip(),
'RESOLUTION_COST': row.xpath('string(./td[@data-column="RESOLUTION_COST"])').strip().replace('$', '')
} }
# 使用.get()方法安全地获取列表中的第一个元素 # 如果字段为空则设为空字符串
for key in data: for key in data:
if data[key]: # 检查列表是否非空 if not data[key]:
data[key] = data[key][0].strip() # 访问第一个元素并去除空格 data[key] = "" # 将 None 转为 ""
else:
data[key] = None # 或者赋值为空字符串或其他适当的默认值
data_list.append(data) data_list.append(data)
...@@ -145,20 +145,24 @@ def price_extract_data(html_content): ...@@ -145,20 +145,24 @@ def price_extract_data(html_content):
def click_get_price_data(): def click_get_price_data():
try: try:
# 获取 Amounts 表格html # 获取 Amounts 表格html
page_html = page.ele(".a-column a-span4", timeout=2).html page_html = page.ele(".a-box-inner", timeout=5).html
# 使用 lxml 解析 HTML # 使用 lxml 解析 HTML
tree = etree.HTML(page_html) tree = etree.HTML(page_html)
# 使用 XPath 查找第三个 span class="a-color-base invoice-property-field" # 使用 XPath 查找第三个 span class="a-color-base invoice-property-field"
# price_variance_amount = tree.xpath('(//div[@class="a-column a-span4"][2]//span[@class="a-color-base invoice-property-field"])[3]/text()')
price_variance_amount = tree.xpath( price_variance_amount = tree.xpath(
'//span[contains(text(),"Price variance amount (price claim)")]/../../div[@class="a-column a-span6 a-span-last"]/span/text()') '//span[contains(text(),"Price variance amount (price claim)")]/../../div[@class="a-column a-span6 a-span-last"]/span/text()')
# 检查内容是否有效 # 检查内容是否有效
if price_variance_amount and price_variance_amount[0].strip() != "-": if price_variance_amount and price_variance_amount[0].strip() != "-":
page.ele("#pd", timeout=2).click() page.ele("#pd", timeout=5).click()
print("等在加载数据,15秒后执行") while True:
time.sleep(15) print("等待争议数据加载,5秒后获取表单数据")
table_html = page.ele(".a-bordered a-horizontal-stripes mt-table", timeout=2).html time.sleep(5)
return price_extract_data(table_html) try:
table_html = page.ele("#priceDiscrepancyWithDMSGridForm", timeout=5).html
price_data = price_extract_data(table_html)
return price_data
except ElementNotFoundError:
print("未获取到表数据")
return [] return []
except ElementNotFoundError: except ElementNotFoundError:
...@@ -169,14 +173,17 @@ def click_get_price_data(): ...@@ -169,14 +173,17 @@ def click_get_price_data():
def handle_price_data(price_data, detail_data): def handle_price_data(price_data, detail_data):
result = {} result = {}
"""处理争议数据""" """处理争议数据"""
for _, price in price_data: for price in price_data:
if price['ASIN'] == detail_data['ASIN']: if price['ASIN'] == detail_data.get('ASIN'):
result['Quantity received'] = price['Quantity'] result = detail_data.to_dict()
result['Amount'] = price['RESOLUTION_COST'] result['Quantity received'] = price['QUANTITY']
if price['RESOLUTION_DECISION'] == "Approved": if price['RESOLUTION_DECISION'] == "Approved":
result['Shortage quantity'] = 0 result['Shortage quantity'] = 0
result['Amount'] = price['RESOLUTION_COST']
else: else:
result['Shortage quantity'] = 1 result['Shortage quantity'] = 1
amount = (float(price['INVOICE_COST']) - float(price['INITIAL_RESEARCH_COST'])) * int(price['QUANTITY'])
result['Amount'] = f"${amount:.2f}"
break break
return result return result
...@@ -206,8 +213,8 @@ def handle_data(detail_datum, vendor, deduction_points): ...@@ -206,8 +213,8 @@ def handle_data(detail_datum, vendor, deduction_points):
def main(): def main():
list_data = export_list_read_data() list_data = export_list_read_data()
excel.save_xls(list_data, "回款数据.xlsx", "Remittance payments") excel.save_xls(list_data, "回款数据.xlsx", "Remittance payments")
# list_data = list_data[62:]
print(f"共计:{len(list_data)} 订单") print(f"共计:{len(list_data)} 订单")
all_normal_pay_data = [] all_normal_pay_data = []
all_price_pay_data = [] all_price_pay_data = []
i = 0 i = 0
...@@ -217,12 +224,9 @@ def main(): ...@@ -217,12 +224,9 @@ def main():
# 取订单前8位后面的没用 # 取订单前8位后面的没用
invoice_number = invoice_number[:8] invoice_number = invoice_number[:8]
# invoice_search_page = page.new_tab(f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-po-search?searchByNumberToken={invoice_number}")
# invoice_search_page.close()
# 获取当前订单的Payee和优惠比例 # 获取当前订单的Payee和优惠比例
vendor_payment_terms = get_po_code(i, invoice_number) vendor_payment_terms = get_po_code(i, invoice_number)
time.sleep(1) time.sleep(2)
print(vendor_payment_terms) print(vendor_payment_terms)
vendor = vendor_payment_terms['vendor'] vendor = vendor_payment_terms['vendor']
...@@ -230,9 +234,10 @@ def main(): ...@@ -230,9 +234,10 @@ def main():
# 下载excel文件并读取数据 # 下载excel文件并读取数据
detail_data = export_details_read_data(invoice_number) detail_data = export_details_read_data(invoice_number)
time.sleep(2)
# 获取争议数据 # 获取争议数据
price_data = click_get_price_data() all_price_data = click_get_price_data()
# 初始化列表存储新字段数据 # 初始化列表存储新字段数据
normal_pay_data = [] normal_pay_data = []
...@@ -244,13 +249,13 @@ def main(): ...@@ -244,13 +249,13 @@ def main():
# 将处理后的记录添加到临时列表 # 将处理后的记录添加到临时列表
normal_pay_data.append(success_data) normal_pay_data.append(success_data)
if price_data: if all_price_data:
# 争议回款 # 争议回款
price_data = handle_price_data(price_data, detail_datum) handle_after_price_data = handle_price_data(all_price_data, detail_datum)
price_data = handle_data(price_data, vendor, deduction_points) if handle_after_price_data:
price_data = handle_data(handle_after_price_data, vendor, deduction_points)
# 将处理后的记录添加到临时列表 # 将处理后的记录添加到临时列表
price_pay_data.append(price_data) price_pay_data.append(price_data)
# 添加到汇总列表 # 添加到汇总列表
all_normal_pay_data.append(pd.DataFrame(normal_pay_data)) all_normal_pay_data.append(pd.DataFrame(normal_pay_data))
...@@ -265,6 +270,7 @@ def main(): ...@@ -265,6 +270,7 @@ def main():
excel.save_xls(normal_pay_summary, "回款数据.xlsx", "正常回款导出明细") excel.save_xls(normal_pay_summary, "回款数据.xlsx", "正常回款导出明细")
excel.save_xls(price_pay_summary, "回款数据.xlsx", "Price导出明细") excel.save_xls(price_pay_summary, "回款数据.xlsx", "Price导出明细")
page.close()
if __name__ == '__main__': if __name__ == '__main__':
try: try:
......
...@@ -26,7 +26,8 @@ page.set.download_path(download_path) ...@@ -26,7 +26,8 @@ page.set.download_path(download_path)
def open_url(url): def open_url(url):
# 访问网页 # 访问网页
page.get(url) page.get(url, timeout=5)
time.sleep(2)
element = page.ele('#ap_email', timeout=1) element = page.ele('#ap_email', timeout=1)
if element: if element:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment