Commit e61bb4a6 authored by 邱阿朋's avatar 邱阿朋

回款数据

parent 7690d798
# coding: utf-8
import re
import pandas as pd
import xlrd
from openpyxl.reader.excel import load_workbook
......@@ -46,3 +48,28 @@ def save_xls(data, output_file, sheet_name='Sheet1', adjusted=True):
ws.column_dimensions[column_letter].width = adjusted_width
wb.save(output_file)
def remove_last_comma(csv_file, skip_rows=2):
# 创建一个空列表用于存储处理后的行
cleaned_lines = []
# 读取原始 CSV 文件并处理行末的逗号
with open(csv_file, 'r', encoding='utf-8') as file:
# 跳过指定数量的行
for _ in range(skip_rows):
next(file) # 跳过每一行
for line in file:
# 使用正则表达式替换 空格 + 数字 + 引号
cleaned_line = re.sub(r'(\s\d+)"', r'\1 ', line) # 去掉空格 + 数字后面的引号
# 使用正则表达式替换每个逗号前的空格为引号
cleaned_line = re.sub(r'\s+,\s*"', r'", "', cleaned_line)
# 去掉末尾的逗号和换行符
cleaned_line = cleaned_line.rstrip(',\n')
# 不添加换行符,待会写入时统一处理
cleaned_lines.append(cleaned_line)
# 将处理后的数据写入同一个文件
with open(csv_file, 'w', encoding='utf-8', newline='') as cleaned_file:
cleaned_file.write('\n'.join(cleaned_lines) + '\n') # 使用 join 处理换行符
......@@ -3,16 +3,16 @@ import os
import time
def wait_for_downloads(download_dir, timeout=60):
def wait_for_downloads(file_name, timeout=60):
"""
监控下载目录,等待新文件下载完成。
:param download_dir: 文件下载目录
监控下载文件,等待新文件下载完成。
:param file_name: 文件下载目录
:param timeout: 超时时间,单位:秒
"""
end_time = time.time() + timeout
while time.time() < end_time:
files = os.listdir(download_dir)
if files: # 如果文件夹内有文件
files = os.path.isfile(file_name)
if files:
return True
time.sleep(1)
return False
......
# coding: utf-8
# 回款明细
import os
import re
import time
import urllib.parse
import warnings
import pandas as pd
from DrissionPage import ChromiumPage
from DrissionPage.errors import PageDisconnectedError, ElementNotFoundError
from lxml import etree
from helper import helper, excel
from helper import helper, excel, file
page = ChromiumPage()
page.set.load_mode.normal()
......@@ -27,13 +30,14 @@ warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
def export_list_read_data():
file_name = 'Payments.xlsx'
if not os.path.isfile(file_name):
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home")
page.ele("#remittance-home-select-all").click()
page.ele("#remittance-home-export-link").click.to_download().wait()
page.get(f"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home", timeout=3)
page.ele("#remittance-home-select-all", timeout=2).click()
page.ele("#remittance-home-export-link", timeout=2).click.to_download()
file.wait_for_downloads(file_name)
df = pd.read_excel(file_name, skiprows=22)
# 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述
pattern = r'Price|PCR|Missed|Shortage|^[A-Z0-9]{8}/[A-Z0-9]{4}/'
pattern = r'Price Claim|PCR|Missed Adjustment|Shortage Claim|^[A-Z0-9]{8}/[A-Z0-9]{4}/'
# 过滤符合条件的行
return df[df['Description'].str.contains(pattern, na=False, regex=True)]
......@@ -41,18 +45,23 @@ def export_list_read_data():
def export_details_read_data(invoice_number):
# 读取详情内容
file_name = f"invoices\\{invoice_number}.csv"
if not os.path.isfile(file_name):
try:
params = {
"invoiceNumber": invoice_number,
"payeeCode": "VECET",
"activeTab": "lineItems",
"invoiceNumber": invoice_number,
}
# 将字典转换为 URL 查询参数
query_string = urllib.parse.urlencode(params)
page.get(
f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?" + query_string)
try:
page.ele("#line-items-export-to-spreadsheet-announce").click.to_download(rename=file_name).wait()
if not os.path.isfile(file_name):
page.ele("#line-items-export-to-spreadsheet-announce", timeout=2).click.to_download(rename=file_name)
file.wait_for_downloads(file_name)
excel.remove_last_comma(file_name)
except ElementNotFoundError:
print("导出按钮不存在刷新网页")
page.refresh()
......@@ -61,52 +70,180 @@ def export_details_read_data(invoice_number):
if not os.path.isfile(file_name):
export_details_read_data(invoice_number)
return pd.read_csv(file_name, skiprows=2, engine='python', on_bad_lines='skip')
return pd.read_csv(file_name)
def get_content(tree, row_index: int, cell_index: int) -> str:
"""获取指定行和列的内容,如果没有找到,则返回 None。"""
content = tree.xpath(f'//*[@role="row"][{row_index}]/*[@role="cell"][{cell_index}]/text()')
return content[0] if content else None
def get_po_code(index, po_id) -> dict:
result = {
"index": index,
"po_id": po_id
}
page.get(f"https://vendorcentral.amazon.com/po/vendor/members/po-mgmt/order?poId={po_id}", timeout=3)
po_table = page.ele("#po-header", timeout=2).html
# 使用 lxml 解析 HTML
tree = etree.HTML(po_table)
# 获取 Vendor 内容
result["vendor"] = get_content(tree, 2, 2)
# 正则表达式查找数字和%之间的内容
match = re.search(r'Payment Terms.*?(\d+%)', po_table)
if match:
result["payment_terms"] = match.group(1)[:-1] # 去掉%
else:
result["payment_terms"] = None
return result
def price_extract_data(html_content):
# 使用 lxml 解析 HTML
tree = etree.HTML(html_content)
# 提取所有行的数据
rows = tree.xpath('//tr[contains(@class, "mt-row")]')
data_list = []
for row in rows:
# 确保在提取数据之前定义data为字典
data = {
'PO_NUMBER': row.xpath('./td[@data-column="PO_NUMBER"]/span/span/a/text()'),
'ASIN': row.xpath('./td[@data-column="ASIN"]/span/span/a/text()'),
'EXTERNAL_ID': row.xpath('./td[@data-column="EXTERNAL_ID"]/span/span/a/text()'),
'TITLE': row.xpath('./td[@data-column="TITLE"]/text()'),
'QUANTITY': row.xpath('./td[@data-column="QUANTITY"]/text()'),
'INVOICE_COST': row.xpath('./td[@data-column="INVOICE_COST"]/text()'),
'PO_COST': row.xpath('./td[@data-column="PO_COST"]/text()'),
'INITIAL_RESEARCH_COST': row.xpath('./td[@data-column="INITIAL_RESEARCH_COST"]/text()'),
'RESOLUTION_DECISION': row.xpath('./td[@data-column="RESOLUTION_DECISION"]/text()'),
'RESOLUTION_COST': row.xpath('./td[@data-column="RESOLUTION_COST"]/text()')
}
# 使用.get()方法安全地获取列表中的第一个元素
for key in data:
if data[key]: # 检查列表是否非空
data[key] = data[key][0].strip() # 访问第一个元素并去除空格
else:
data[key] = None # 或者赋值为空字符串或其他适当的默认值
data_list.append(data)
return data_list
def click_get_price_data():
try:
# 获取 Amounts 表格html
page_html = page.ele(".a-column a-span4", timeout=2).html
# 使用 lxml 解析 HTML
tree = etree.HTML(page_html)
# 使用 XPath 查找第三个 span class="a-color-base invoice-property-field"
price_variance_amount = tree.xpath(
'(//div[@class="a-column a-span4"]//span[@class="a-color-base invoice-property-field"])[3]/text()')
# 检查内容是否有效
if price_variance_amount and price_variance_amount[0].strip() != "-":
page.ele("#pd", timeout=2).click()
print("等在加载数据,15秒后执行")
time.sleep(15)
table_html = page.ele(".a-bordered a-horizontal-stripes mt-table", timeout=2).html
return price_extract_data(table_html)
except ElementNotFoundError:
page.refresh()
click_get_price_data()
def handle_price_data(price_data, detail_data):
result = None
"""处理争议数据"""
for _, price in price_data:
if price['ASIN'] == detail_data['ASIN']:
result = detail_data.copy()
result['Quantity received'] = price['Quantity']
result['Amount'] = price['RESOLUTION_COST']
if price['RESOLUTION_DECISION'] == "Approved":
result['Shortage quantity'] = 0
else:
result['Shortage quantity'] = 1
break
return result
def handle_data(detail_datum, vendor, deduction_points):
"""处理正常数据"""
amount = detail_datum.get('Amount', '$0.00') # 默认值设为 '$0.00' 以避免错误
amount = float(amount.replace('$', '').replace(',', ''))
# 如果是0则回款完成
is_finished = "否"
if detail_datum.get('Shortage quantity', -1) == 0:
is_finished = "是"
# 计算扣除后的金额
amount_after_deduction = amount - (amount * (deduction_points / 100))
# 复制原始行数据,避免直接修改
record = detail_datum.copy()
record["IsFinished"] = is_finished
record["DeductionPoints"] = f"{deduction_points}%" # 拼接百分号
record["Code"] = vendor
record["AmountAfterDeduction"] = amount_after_deduction
return record
def main():
list_data = export_list_read_data()
excel.save_xls(list_data, "回款数据.xlsx", "Remittance payments")
print(f"共计:{len(list_data)} 订单")
all_normal_pay_data = []
all_price_pay_data = []
i = 0
for _, data in list_data.iterrows():
i += 1
invoice_number = data.get("Invoice Number")
description = data.get("Description")
if "Price" in description or "PCR" in description or "Missed" in description or "Shortage" in description:
# 获取前8位
# 取订单前8位后面的没用
invoice_number = invoice_number[:8]
print(invoice_number)
# invoice_search_page = page.new_tab(f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-po-search?searchByNumberToken={invoice_number}")
# invoice_search_page.close()
# 获取当前订单的Payee和优惠比例
vendor_payment_terms = get_po_code(i, invoice_number)
time.sleep(1)
vendor = vendor_payment_terms['vendor']
deduction_points = int(vendor_payment_terms['payment_terms'])
# 下载excel文件并读取数据
detail_data = export_details_read_data(invoice_number)
# 获取争议数据
price_data = click_get_price_data()
# 初始化列表存储新字段数据
normal_pay_data = []
price_pay_data = []
for index, detail_datum in detail_data.iterrows():
amount = detail_datum.get('Amount', 0) # 使用默认值 0 防止 None
code = "VECET"
deduction_points = 1
# 正常回款数据
success_data = handle_data(detail_datum, vendor, deduction_points)
# 计算扣除后的金额
amount_after_deduction = amount - (amount * deduction_points / 100)
# 复制原始行数据,避免直接修改
new_record = detail_datum.copy()
new_record["IsFinished"] = "是"
new_record["DeductionPoints"] = f"{deduction_points}%" # 拼接百分号
new_record["Code"] = code
new_record["AmountAfterDeduction"] = amount_after_deduction
# 争议回款
price_data = handle_price_data(price_data, detail_datum)
price_data = handle_data(price_data, vendor, deduction_points)
# 将处理后的记录添加到临时列表
normal_pay_data.append(new_record)
normal_pay_data.append(success_data)
# 将处理后的记录添加到临时列表
price_pay_data.append(new_record)
price_pay_data.append(price_data)
# 添加到汇总列表
all_normal_pay_data.append(pd.DataFrame(normal_pay_data))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment