Commit 306d49db authored by 邱阿朋's avatar 邱阿朋

feat(helper): 增加德国站点支持并优化数据处理

- 在 domain.py 中添加了对德国站点的支持
- 重构了 payment_erp.py 中的数据读取和处理逻辑,提高了代码可读性和灵活性
- 修改了 rabbitmq.py 中的死信交换机和路由键命名规则,提高了代码一致性- 在 spa_search.py 中优化了写入函数,增加了对 sheet 名称的类型检查,并添加了消息推送数量的日志输出
parent 5e0347e7
......@@ -13,6 +13,8 @@ def switch_domain(country):
domain = "https://vendorcentral.amazon.co.uk/"
if country == "FR":
domain = "https://vendorcentral.amazon.fr/"
if country == "DE":
domain = "https://vendorcentral.amazon.de/"
if country == "CA":
domain = "https://vendorcentral.amazon.ca/"
if country == "mx":
......
......@@ -52,13 +52,13 @@ class RabbitMQClient:
self.channel.exchange_declare(exchange=exchange, exchange_type=exchange_type, durable=True)
# 死信交换机和路由键配置
dead_letter_exchange = queue + "_dead_letter_exchange" # 死信交换机名称
dead_letter_routing_key = queue + "_dead_letter" # 死信路由键
dead_exchange = exchange + "_dead" # 死信交换机名称
dead_queue = queue + "_dead" # 死信路由键
# 队列声明的参数
queue_arguments = {
"x-dead-letter-exchange": dead_letter_exchange, # 设置死信交换机
"x-dead-letter-routing-key": dead_letter_routing_key, # 设置死信路由键
"x-dead-letter-exchange": dead_exchange, # 设置死信交换机
"x-dead-letter-routing-key": dead_queue, # 设置死信路由键
}
# 声明队列
......
......@@ -41,40 +41,47 @@ def page_get(url):
def export_list_read_data():
file_name = 'Payments.xlsx'
try:
while True:
page_get(f"hz/vendor/members/remittance/home")
# 选择日期下拉框
page.ele("#date-range-option-wrap").click()
page.wait(1)
# 点击第一个选项
page.ele("#date-range-option_0").click()
page.wait(1)
# 点击搜索按钮
page.ele("#remittanceSearchForm-submit-aui-button").click()
page.wait(2)
page.ele("#remittance-home-select-all").click()
page.ele("#remittance-home-export-link").click.to_download()
is_down = file.wait_for_downloads(file_name, 30)
if is_down: break
log.warning(f"下载失败,重新下载")
except ElementNotFoundError:
log.error("页面加载失败,刷新重新加载")
page.refresh()
all_df = pd.read_excel(file_name, header=None, keep_default_na=False, na_values=[])
# 找到所有空行的索引,这里假设完全空的行为表头之间的分界线
empty_rows = all_df[all_df.isnull().all(axis=1)].index.tolist()
# 定位表头与数据的分隔
first_header_start = empty_rows[1] + 1 # 第一个表头开始的行
second_header_start = empty_rows[2] + 4 # 第二个表头开始的行
payments = pd.read_excel(file_name, skiprows=first_header_start, nrows=second_header_start - 7)
invoices = pd.read_excel(file_name, skiprows=second_header_start)
# try:
# while True:
# page_get(f"hz/vendor/members/remittance/home")
#
# # 选择日期下拉框
# page.ele("#date-range-option-wrap").click()
# page.wait(1)
# # 点击第一个选项
# page.ele("#date-range-option_0").click()
# page.wait(1)
# # 点击搜索按钮
# page.ele("#remittanceSearchForm-submit-aui-button").click()
# page.wait(2)
#
# page.ele("#remittance-home-select-all").click()
# page.ele("#remittance-home-export-link").click.to_download()
# is_down = file.wait_for_downloads(file_name, 30)
# if is_down: break
# log.warning(f"下载失败,重新下载")
# except ElementNotFoundError:
# log.error("页面加载失败,刷新重新加载")
# page.refresh()
df = pd.read_excel(file_name, header=None)
# 定位标题行
pay_title = df[df[0].str.contains('Remittance payments', case=False, na=False)].index[0]
inv_title = df[df[0].str.contains('Invoices', case=False, na=False)].index[0]
# 定位表头起始行(跳过标题后的空行)
pay_header = df.loc[pay_title + 1:].notna().any(axis=1).idxmax()
inv_header = df.loc[inv_title + 1:].notna().any(axis=1).idxmax()
# 计算第一个表格的结束位置(第二个标题前的空行)
empty_lines = df.index[df.isnull().all(axis=1)].tolist()
separator = max([x for x in empty_lines if pay_header < x < inv_title], default=inv_title - 1)
# 读取并清理数据
test = separator - pay_header - 1
payments = pd.read_excel(file_name, header=pay_header, nrows=test).dropna(how='all')
invoices = pd.read_excel(file_name, header=inv_header).dropna(how='all')
return [payments, invoices]
......
......@@ -283,6 +283,9 @@ def validate_asin(asin):
# 创建一个写入函数
def write_sheet(writer, data, sheet_name):
if not isinstance(sheet_name, str):
sheet_name = str(sheet_name)
log.info(f"开始写入 {sheet_name}, 共计 {len(data)} 条")
df = pd.DataFrame(data) # 将数据转换为 DataFrame
df.to_excel(writer, sheet_name=sheet_name, index=False)
......@@ -314,7 +317,7 @@ def push_data_queue(file_name):
rabbit.connect(queue='spa_robot', routing_key='spa_robot', exchange='reports')
log.info("开始读取数据....")
data_dict = pd.read_excel(file_name, sheet_name=None, keep_default_na=False, na_values=[])
log.info("开始推送消息....")
log.info(f"开始推送消息....{len(data_dict)}")
for sheet_name, values in data_dict.items():
for _, item_row in values.iterrows():
if sheet_name == "Sheet1":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment