Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
f1481184
Commit
f1481184
authored
Oct 22, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
spa数据查询
parent
235f7228
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
70 additions
and
48 deletions
+70
-48
coop.py
coop.py
+69
-47
file.py
helper/file.py
+1
-1
No files found.
coop.py
View file @
f1481184
...
...
@@ -11,7 +11,7 @@ from DrissionPage import ChromiumPage
from
DrissionPage.errors
import
ElementNotFoundError
from
lxml
import
etree
from
helper
import
helper
,
file
,
excel
from
helper
import
helper
,
excel
,
file
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
eager
()
...
...
@@ -49,69 +49,73 @@ def export_list_read_data():
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
def
export_item_read_data
(
invoice_id
):
def
get_report_table_html
(
invoice_id
):
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
try
:
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_2"
)
.
click
()
time
.
sleep
(
2
)
time
.
sleep
(
1
)
# 获取报表表单内容
report_table_html
=
page
.
ele
(
"#backup-report-table"
)
.
html
tree
=
etree
.
HTML
(
report_table_html
)
# 提取所有链接
links
=
tree
.
xpath
(
'//table[@id="backup-report-table"]//a/@href'
)
for
link
in
links
:
# 解析链接中的查询参数
parsed_url
=
urlparse
(
link
)
query_params
=
parse_qs
(
parsed_url
.
query
)
# 提取 filename 参数
filename
=
query_params
.
get
(
'fileName'
,
[
'未找到文件名'
])[
0
]
report_file_dir
=
f
"coop
\\
{invoice_id}"
report_file_tmp_dir
=
f
"{report_file_dir}
\\
{filename}
\\
"
full_url
=
"https://vendorcentral.amazon.com"
+
link
page
.
download
(
full_url
,
report_file_tmp_dir
,
show_msg
=
False
)
report_file
=
report_file_tmp_dir
+
"BackupReport.xls"
file
.
wait_for_downloads
(
report_file
)
try
:
df
=
pd
.
read_excel
(
report_file
)
# 获取表头
headers
=
df
.
columns
.
tolist
()
# 要检查的列名
column_names_to_check
=
[
"Rebate In Agreement Currency"
,
"Vendor Funding In Agreement Currency"
]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal
=
any
(
column
in
headers
for
column
in
column_names_to_check
)
if
not
header_is_normal
:
shutil
.
rmtree
(
report_file_tmp_dir
)
continue
df
=
df
[
df
[
'Asin'
]
.
notna
()]
return
df
except
ValueError
:
# 递归删除文件夹
shutil
.
rmtree
(
report_file_tmp_dir
)
return
report_table_html
except
ElementNotFoundError
:
print
(
"导出按钮不存在刷新网页"
)
page
.
refresh
()
export_item_read_data
(
invoice_id
)
def
export_item_read_data
(
invoice_id
):
file_name
=
f
"coop
\\
{invoice_id}.xlsx"
if
os
.
path
.
isfile
(
file_name
):
df
=
pd
.
read_excel
(
file_name
)
return
df
# 获取报表表单内容
report_table_html
=
get_report_table_html
(
invoice_id
)
tree
=
etree
.
HTML
(
report_table_html
)
# 提取所有链接
links
=
tree
.
xpath
(
'//table[@id="backup-report-table"]//a/@href'
)
for
link
in
links
:
# 解析链接中的查询参数
parsed_url
=
urlparse
(
link
)
query_params
=
parse_qs
(
parsed_url
.
query
)
# 提取 filename 参数
filename
=
query_params
.
get
(
'fileName'
,
[
'未找到文件名'
])[
0
]
report_file_tmp_dir
=
f
"coop
\\
{invoice_id}
\\
{filename}
\\
"
page
.
download
(
"https://vendorcentral.amazon.com"
+
link
,
report_file_tmp_dir
,
show_msg
=
True
)
report_file
=
report_file_tmp_dir
+
"BackupReport.xls"
file
.
wait_for_downloads
(
report_file
)
try
:
df
=
pd
.
read_excel
(
report_file
)
# 获取表头
headers
=
df
.
columns
.
tolist
()
# 要检查的列名
column_names_to_check
=
[
"Rebate In Agreement Currency"
,
"Vendor Funding In Agreement Currency"
]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal
=
any
(
column
in
headers
for
column
in
column_names_to_check
)
if
not
header_is_normal
:
continue
data
=
df
[
df
[
'Asin'
]
.
notna
()]
excel
.
save_xls
(
data
,
file_name
)
shutil
.
rmtree
(
f
"coop
\\
{invoice_id}"
)
return
pd
.
read_excel
(
file_name
)
except
ValueError
:
pass
def
main
():
relation_data
=
asin_sku_relations
()
# 获取 ASIN 与 SKU 的对应关系数据
coop_list
=
export_list_read_data
()
# 获取合作数据列表
# coop_list = coop_list[:189]
print
(
f
"共计:{len(coop_list)},条数据"
)
i
=
0
new_coop_data
=
[]
sheet_data
=
{}
for
_
,
coop
in
coop_list
.
iterrows
():
i
+=
1
invoice_id
=
coop
.
get
(
"Invoice ID"
)
# 获取发票 ID
...
...
@@ -120,6 +124,10 @@ def main():
item_coop_data
=
[]
# 根据发票 ID 获取 item 列表
item_list
=
export_item_read_data
(
invoice_id
)
if
item_list
is
None
:
print
(
f
"{invoice_id} 暂无报告信息"
)
continue
for
_
,
item
in
item_list
.
iterrows
():
asin
=
item
.
get
(
"Asin"
)
...
...
@@ -131,7 +139,6 @@ def main():
# 如果未匹配到 SKU,记录空值
if
not
relation
:
print
(
f
"未匹配到 SKU:{asin}"
)
item
[
'Asin'
]
=
asin
item
[
'ERP SKU'
]
=
""
item
[
'Group Name'
]
=
""
...
...
@@ -155,12 +162,27 @@ def main():
# 保存已处理的 item 数据到 Excel 文件中
if
item_coop_data
:
excel
.
save_xls
(
item_coop_data
,
'SPA查询.xlsx'
,
invoice_id
)
sheet_data
[
invoice_id
]
=
item_coop_data
# 保存最终的合作数据
if
new_coop_data
:
excel
.
save_xls
(
new_coop_data
,
'SPA查询.xlsx'
)
max_sheet_data
=
{}
if
sheet_data
:
# 循环遍历 sheet_data 字典
for
key
,
value
in
sheet_data
.
items
():
if
len
(
value
)
>
10000
:
max_sheet_data
[
key
]
=
value
continue
excel
.
save_xls
(
value
,
'SPA查询.xlsx'
,
key
)
if
max_sheet_data
:
for
key
,
value
in
max_sheet_data
.
items
():
excel
.
save_xls
(
value
,
'SPA查询.xlsx'
,
key
)
page
.
close
()
# 关闭页面
...
...
helper/file.py
View file @
f1481184
...
...
@@ -14,7 +14,7 @@ def wait_for_downloads(file_name, timeout=60):
files
=
os
.
path
.
isfile
(
file_name
)
if
files
:
return
True
time
.
sleep
(
1
)
time
.
sleep
(
0.5
)
return
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment