Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
8f595ff1
Commit
8f595ff1
authored
Dec 18, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
spa
parent
028d0f9d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
114 additions
and
78 deletions
+114
-78
excel.py
src/helper/excel.py
+4
-4
payment.py
src/payment.py
+2
-2
spa_search.py
src/spa_search.py
+108
-72
No files found.
src/helper/excel.py
View file @
8f595ff1
...
...
@@ -25,14 +25,14 @@ def save_xls(data, output_file, sheet_name='Sheet1', adjusted=True):
df
=
pd
.
DataFrame
(
data
)
df
.
to_excel
(
writer
,
index
=
False
,
sheet_name
=
sheet_name
)
if
not
adjusted
:
return
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
[
sheet_name
]
if
not
adjusted
:
return
# 自动调整列宽
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
column_letter
=
column
[
0
]
.
column_letter
...
...
src/payment.py
View file @
8f595ff1
...
...
@@ -399,11 +399,11 @@ def main():
if
all_normal_pay_data
:
# 将所有数据合并为一个 DataFrame
normal_pay_summary
=
pd
.
concat
(
all_normal_pay_data
,
ignore_index
=
True
)
excel
.
save_xls
(
normal_pay_summary
,
new_file_name
,
"正常回款导出明细"
)
excel
.
save_xls
(
normal_pay_summary
,
new_file_name
,
"正常回款导出明细"
,
False
)
if
all_price_pay_data
:
price_pay_summary
=
pd
.
concat
(
all_price_pay_data
,
ignore_index
=
True
)
excel
.
save_xls
(
price_pay_summary
,
new_file_name
,
"Price导出明细"
)
excel
.
save_xls
(
price_pay_summary
,
new_file_name
,
"Price导出明细"
,
False
)
if
__name__
==
'__main__'
:
...
...
src/spa_search.py
View file @
8f595ff1
...
...
@@ -3,6 +3,7 @@
import
math
import
os
import
shutil
from
concurrent.futures
import
ThreadPoolExecutor
from
datetime
import
datetime
from
urllib.parse
import
urlparse
,
parse_qs
...
...
@@ -84,7 +85,7 @@ def export_item_read_data(invoice_id):
page
.
download
(
host
+
link
,
report_file_tmp_dir
,
show_msg
=
False
)
report_file
=
report_file_tmp_dir
+
"BackupReport.xls"
file
.
wait_for_downloads
(
report_file
)
file
.
wait_for_downloads
(
report_file
,
120
)
try
:
df
=
pd
.
read_excel
(
report_file
)
...
...
@@ -106,93 +107,128 @@ def export_item_read_data(invoice_id):
def
main
():
# 获取数据
relation_data
=
api
.
sku_relations
(
country
)
# 获取 ASIN 与 SKU 的对应关系数据
coop_list
=
export_list_read_data
()
# 获取合作数据列表
log
.
info
(
f
"共计:{len(coop_list)} 条数据"
)
log
.
info
(
f
"共计:
{len(coop_list)} 条数据"
)
# 获取当前日期和时间并格式化
current_datetime
=
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d
%
H
%
M'
)
# 格式化为 'YYYY-MM-DD_HH-MM-SS'
# 原文件名
current_datetime
=
datetime
.
now
()
.
strftime
(
'
%
Y
%
m
%
d
%
H
%
M'
)
file_name
=
"spa.xlsx"
# 拼接新的文件名
new_file_name
=
f
"{current_datetime}_{file_name}"
i
=
0
new_coop_data
=
[]
sheet_data
=
{}
for
_
,
coop
in
coop_list
.
iterrows
():
i
+=
1
sheet_data
=
[]
# 用于保存小数据
large_sheet_data
=
{}
# 保存大数据(需要分 Sheet)
max_sheet_data
=
{}
# 保存超大数据(行数 > 5000)
# 遍历合作列表
for
index
,
coop
in
coop_list
.
iterrows
():
index
+=
1
invoice_id
=
coop
.
get
(
"Invoice ID"
)
# 获取发票 ID
log
.
info
({
"index"
:
i
,
"invoice_id"
:
invoice_id
})
log
.
info
({
"index"
:
i
ndex
,
"invoice_id"
:
invoice_id
})
item_coop_data
=
[]
# 根据发票 ID 获取 item 列表
if
not
invoice_id
:
log
.
warning
(
f
"缺少 Invoice ID,跳过第 {index} 条数据"
)
continue
# 获取当前发票的 item 列表
item_list
=
export_item_read_data
(
invoice_id
)
if
item_list
is
None
:
log
.
warning
(
f
"{invoice_id} 暂无报告信息"
)
continue
for
_
,
item
in
item_list
.
iterrows
():
asin
=
item
.
get
(
"Asin"
)
# 判断 ASIN 是否为空或无效
if
not
asin
or
(
isinstance
(
asin
,
float
)
and
math
.
isnan
(
asin
)):
break
relation
=
relation_data
.
get
(
asin
)
# 如果未匹配到 SKU,记录空值
if
not
relation
:
item
[
'Asin'
]
=
asin
item
[
'ERP SKU'
]
=
""
item
[
'Group Name'
]
=
""
item_coop_data
.
append
(
item
)
continue
# 跳过当前条目,继续下一个
# 如果 item_list 长度大于 10,使用原有数据
if
len
(
item_list
)
>=
10
:
new_item
=
item
.
copy
()
new_item
.
pop
(
"Asin"
)
new_item
[
'Asin'
]
=
asin
new_item
[
'ERP SKU'
]
=
relation
.
get
(
"erp_sku"
)
new_item
[
'Group Name'
]
=
relation
.
get
(
"name"
)
item_coop_data
.
append
(
new_item
)
else
:
new_item
=
coop
.
copy
()
rebate_in_agreement_currency
=
item
.
get
(
"Rebate In Agreement Currency"
)
if
rebate_in_agreement_currency
:
new_item
[
'Original balance'
]
=
rebate_in_agreement_currency
vendor_funding_in_agreement_currency
=
item
.
get
(
"Vendor Funding In Agreement Currency"
)
if
vendor_funding_in_agreement_currency
:
new_item
[
'Original balance'
]
=
vendor_funding_in_agreement_currency
new_item
[
'Asin'
]
=
asin
new_item
[
'ERP SKU'
]
=
relation
.
get
(
"erp_sku"
)
new_item
[
'Group Name'
]
=
relation
.
get
(
"name"
)
new_coop_data
.
append
(
new_item
)
# 保存已处理的 item 数据到 Excel 文件中
if
item_coop_data
:
sheet_data
[
invoice_id
]
=
item_coop_data
# 保存最终的合作数据
if
new_coop_data
:
excel
.
save_xls
(
new_coop_data
,
new_file_name
)
max_sheet_data
=
{}
if
sheet_data
:
# 循环遍历 sheet_data 字典
for
key
,
value
in
sheet_data
.
items
():
if
len
(
value
)
>
5000
:
max_sheet_data
[
key
]
=
value
continue
# 按 item_list 的长度处理小数据或大数据
if
len
(
item_list
)
>=
10
:
processed_items
=
process_large_items
(
item_list
,
relation_data
)
if
processed_items
:
if
len
(
processed_items
)
>
5000
:
max_sheet_data
[
invoice_id
]
=
processed_items
else
:
large_sheet_data
[
invoice_id
]
=
processed_items
else
:
processed_items
=
process_small_items
(
item_list
,
coop
,
relation_data
)
sheet_data
.
extend
(
processed_items
)
# 保存数据到 Excel 文件
save_excel
(
sheet_data
,
large_sheet_data
,
max_sheet_data
,
new_file_name
)
def
process_large_items
(
item_list
,
relation_data
):
"""处理大数据列表 (item_list 长度 >= 10)"""
processed_items
=
[]
for
_
,
item
in
item_list
.
iterrows
():
asin
=
item
.
get
(
'Asin'
,
None
)
if
not
validate_asin
(
asin
):
continue
relation
=
relation_data
.
get
(
asin
,
{})
excel
.
save_xls
(
value
,
new_file_name
,
key
)
processed_item
=
item
.
copy
()
processed_item
.
pop
(
"Asin"
)
processed_item
[
'Asin'
]
=
asin
processed_item
[
'ERP SKU'
]
=
relation
.
get
(
"erp_sku"
)
processed_item
[
'Group Name'
]
=
relation
.
get
(
"name"
)
processed_items
.
append
(
processed_item
)
return
processed_items
def
process_small_items
(
item_list
,
coop
,
relation_data
):
"""处理小数据列表 (item_list 长度 < 10)"""
processed_items
=
[]
for
_
,
item
in
item_list
.
iterrows
():
asin
=
item
.
get
(
'Asin'
,
None
)
if
not
validate_asin
(
asin
):
continue
if
max_sheet_data
:
for
key
,
value
in
max_sheet_data
.
items
():
excel
.
save_xls
(
value
,
new_file_name
,
key
)
relation
=
relation_data
.
get
(
asin
,
{})
rebate
=
item
.
get
(
"Rebate In Agreement Currency"
,
None
)
vendor_funding
=
item
.
get
(
"Vendor Funding In Agreement Currency"
,
None
)
processed_item
=
coop
.
copy
()
# 复制 coop 数据
processed_item
[
"Asin"
]
=
asin
processed_item
[
"ERP SKU"
]
=
relation
.
get
(
"erp_sku"
)
processed_item
[
"Group Name"
]
=
relation
.
get
(
"name"
)
processed_item
[
"Original balance"
]
=
rebate
or
vendor_funding
processed_items
.
append
(
processed_item
)
return
processed_items
def
validate_asin
(
asin
):
"""验证 ASIN 是否有效"""
return
asin
and
not
(
isinstance
(
asin
,
float
)
and
math
.
isnan
(
asin
))
def
save_excel
(
sheet_data
,
large_sheet_data
,
max_sheet_data
,
new_file_name
):
"""保存数据到 Excel 文件"""
# 创建一个写入函数
def
write_sheet
(
writer
,
data
,
sheet_name
):
log
.
info
(
f
"开始写入 {sheet_name}, 共计 {len(data)} 条"
)
df
=
pd
.
DataFrame
(
data
)
# 将数据转换为 DataFrame
df
.
to_excel
(
writer
,
sheet_name
=
sheet_name
,
index
=
False
)
# 初始化 Excel 写入器
with
pd
.
ExcelWriter
(
new_file_name
,
engine
=
"openpyxl"
)
as
writer
:
# 写入小数据
if
sheet_data
:
log
.
info
(
f
"保存小数据,共计 {len(sheet_data)} 条"
)
write_sheet
(
writer
,
sheet_data
,
"Sheet1"
)
# 写入大数据(使用多线程并行写入不同表)
if
large_sheet_data
:
log
.
info
(
f
"保存大数据,共计 {sum(len(data) for data in large_sheet_data.values())} 条"
)
with
ThreadPoolExecutor
()
as
executor
:
for
sheet_name
,
data
in
large_sheet_data
.
items
():
executor
.
submit
(
write_sheet
,
writer
,
data
,
sheet_name
)
# 写入超大数据
if
max_sheet_data
:
log
.
info
(
f
"保存超大数据,共计 {sum(len(data) for data in max_sheet_data.values())} 条"
)
with
ThreadPoolExecutor
()
as
executor
:
for
sheet_name
,
data
in
max_sheet_data
.
items
():
executor
.
submit
(
write_sheet
,
writer
,
data
,
sheet_name
)
log
.
info
(
f
"文件 {new_file_name} 保存完成,路径:{os.path.abspath(new_file_name)}"
)
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment