Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
7f607f47
Commit
7f607f47
authored
Oct 21, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
spa数据查询
parent
e9bc06f5
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
125 additions
and
25 deletions
+125
-25
coop.py
coop.py
+124
-24
invoices.py
invoices.py
+1
-1
relations.xlsx
relations.xlsx
+0
-0
No files found.
coop.py
View file @
7f607f47
# coding: utf-8
# 回款明细
import
math
import
os
import
shutil
import
time
from
urllib.parse
import
urlparse
,
parse_qs
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
,
ElementNotFoundError
from
DrissionPage.errors
import
ElementNotFoundError
from
lxml
import
etree
from
helper
import
helper
from
helper
import
helper
,
file
,
excel
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
eager
()
...
...
@@ -20,6 +25,16 @@ helper.make_dir(download_path)
page
.
set
.
download_path
(
download_path
)
def
asin_sku_relations
():
relations_dict
=
{}
df
=
pd
.
read_excel
(
'relations.xlsx'
)
for
index
,
row
in
df
.
iterrows
():
row_dict
=
row
.
to_dict
()
relations_dict
[
row_dict
[
'ASIN'
]]
=
{
"SKU"
:
row_dict
[
'SKU'
],
"NAME"
:
row_dict
[
'NAME'
]}
return
relations_dict
def
export_list_read_data
():
file_name
=
"ContraCogsInvoices.xls"
if
not
os
.
path
.
isfile
(
file_name
):
...
...
@@ -36,15 +51,52 @@ def export_list_read_data():
def
export_item_read_data
(
invoice_id
):
try
:
file_name
=
f
"coop
\\
{invoice_id}.csv"
if
not
os
.
path
.
isfile
(
file_name
):
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
file_name
=
f
"coop
\\
{invoice_id}.csv"
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_1"
)
.
click
.
to_download
(
rename
=
file_name
)
.
wait
()
return
pd
.
read_csv
(
file_name
,
engine
=
'python'
,
on_bad_lines
=
'skip'
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_2"
)
.
click
()
time
.
sleep
(
2
)
# 获取报表表单内容
report_table_html
=
page
.
ele
(
"#backup-report-table"
)
.
html
tree
=
etree
.
HTML
(
report_table_html
)
# 提取所有链接
links
=
tree
.
xpath
(
'//table[@id="backup-report-table"]//a/@href'
)
for
link
in
links
:
# 解析链接中的查询参数
parsed_url
=
urlparse
(
link
)
query_params
=
parse_qs
(
parsed_url
.
query
)
# 提取 filename 参数
filename
=
query_params
.
get
(
'fileName'
,
[
'未找到文件名'
])[
0
]
report_file_dir
=
f
"coop
\\
{invoice_id}"
report_file_tmp_dir
=
f
"{report_file_dir}
\\
{filename}
\\
"
full_url
=
"https://vendorcentral.amazon.com"
+
link
page
.
download
(
full_url
,
report_file_tmp_dir
,
show_msg
=
False
)
report_file
=
report_file_tmp_dir
+
"BackupReport.xls"
file
.
wait_for_downloads
(
report_file
)
try
:
df
=
pd
.
read_excel
(
report_file
)
# 获取表头
headers
=
df
.
columns
.
tolist
()
# 要检查的列名
column_names_to_check
=
[
"Rebate In Agreement Currency"
,
"Vendor Funding In Agreement Currency"
]
# 判断头文件是否满足读取条件,不满足删除文件夹
header_is_normal
=
any
(
column
in
headers
for
column
in
column_names_to_check
)
if
not
header_is_normal
:
shutil
.
rmtree
(
report_file_tmp_dir
)
continue
df
=
df
[
df
[
'Asin'
]
.
notna
()]
return
df
except
ValueError
:
# 递归删除文件夹
shutil
.
rmtree
(
report_file_tmp_dir
)
except
ElementNotFoundError
:
print
(
"导出按钮不存在刷新网页"
)
page
.
refresh
()
...
...
@@ -52,19 +104,67 @@ def export_item_read_data(invoice_id):
def
main
():
coop_data
=
export_list_read_data
()
for
_
,
data
in
coop_data
.
iterrows
():
# 根据回款id搜索下载报表
invoice_id
=
data
.
get
(
"Invoice ID"
)
print
(
invoice_id
)
export_item_read_data
(
invoice_id
)
relation_data
=
asin_sku_relations
()
# 获取 ASIN 与 SKU 的对应关系数据
coop_list
=
export_list_read_data
()
# 获取合作数据列表
print
(
f
"共计:{len(coop_list)},条数据"
)
i
=
0
new_coop_data
=
[]
for
_
,
coop
in
coop_list
.
iterrows
():
if
i
==
20
:
break
i
+=
1
invoice_id
=
coop
.
get
(
"Invoice ID"
)
# 获取发票 ID
print
({
"index"
:
i
,
"invoice_id"
:
invoice_id
})
item_coop_data
=
[]
# 根据发票 ID 获取 item 列表
item_list
=
export_item_read_data
(
invoice_id
)
for
_
,
item
in
item_list
.
iterrows
():
asin
=
item
.
get
(
"Asin"
)
# 判断 ASIN 是否为空或无效
if
not
asin
or
(
isinstance
(
asin
,
float
)
and
math
.
isnan
(
asin
)):
break
relation
=
relation_data
.
get
(
asin
)
# 如果未匹配到 SKU,记录空值
if
not
relation
:
print
(
f
"未匹配到 SKU:{asin}"
)
item
[
'Asin'
]
=
asin
item
[
'ERP SKU'
]
=
""
item
[
'Group Name'
]
=
""
item_coop_data
.
append
(
item
)
continue
# 跳过当前条目,继续下一个
# 如果 item_list 长度大于 10,使用原有数据
if
len
(
item_list
)
>=
10
:
new_item
=
item
.
copy
()
new_item
[
'Asin'
]
=
asin
new_item
[
'ERP SKU'
]
=
relation
.
get
(
"SKU"
)
new_item
[
'Group Name'
]
=
relation
.
get
(
"NAME"
)
item_coop_data
.
append
(
new_item
)
else
:
# 否则新建一个条目
new_item
=
coop
.
copy
()
new_item
[
'Asin'
]
=
asin
new_item
[
'ERP SKU'
]
=
relation
.
get
(
"SKU"
)
new_item
[
'Group Name'
]
=
relation
.
get
(
"NAME"
)
new_coop_data
.
append
(
new_item
)
# 保存已处理的 item 数据到 Excel 文件中
if
item_coop_data
:
excel
.
save_xls
(
item_coop_data
,
'SPA查询.xlsx'
,
invoice_id
)
# 保存最终的合作数据
if
new_coop_data
:
excel
.
save_xls
(
new_coop_data
,
'SPA查询.xlsx'
)
page
.
close
()
# 关闭页面
page
.
close
()
if
__name__
==
'__main__'
:
try
:
main
()
except
KeyboardInterrupt
:
pass
except
PageDisconnectedError
as
e
:
print
(
"与页面的连接已断开"
)
main
()
invoices.py
View file @
7f607f47
...
...
@@ -55,7 +55,7 @@ def export_details_read_data(invoice_number):
# 将字典转换为 URL 查询参数
query_string
=
urllib
.
parse
.
urlencode
(
params
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details
?
"
+
query_string
)
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details"
+
query_string
)
if
not
os
.
path
.
isfile
(
file_name
):
page
.
ele
(
"#line-items-export-to-spreadsheet-announce"
,
timeout
=
5
)
.
click
.
to_download
(
rename
=
file_name
)
...
...
relations.xlsx
View file @
7f607f47
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment