Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
7690d798
Commit
7690d798
authored
Oct 17, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
爬虫处理
parent
131c9a08
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
35 additions
and
32 deletions
+35
-32
coop.py
coop.py
+20
-16
invoices.py
invoices.py
+3
-3
returns.py
returns.py
+12
-13
No files found.
coop.py
View file @
7690d798
...
@@ -20,7 +20,21 @@ helper.make_dir(download_path)
...
@@ -20,7 +20,21 @@ helper.make_dir(download_path)
page
.
set
.
download_path
(
download_path
)
page
.
set
.
download_path
(
download_path
)
def
export_list
(
invoice_id
):
def
export_list_read_data
():
file_name
=
"ContraCogsInvoices.xls"
if
not
os
.
path
.
isfile
(
file_name
):
page
.
get
(
"https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav"
)
# 全选
page
.
ele
(
"#select-all"
)
.
click
()
# 点击选项卡
page
.
ele
(
"#cc-invoice-actions-dropdown"
)
.
click
()
# 点击下载报表
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
.
wait
()
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
def
export_item_read_data
(
invoice_id
):
try
:
try
:
file_name
=
f
"coop
\\
{invoice_id}.csv"
file_name
=
f
"coop
\\
{invoice_id}.csv"
if
not
os
.
path
.
isfile
(
file_name
):
if
not
os
.
path
.
isfile
(
file_name
):
...
@@ -30,30 +44,20 @@ def export_list(invoice_id):
...
@@ -30,30 +44,20 @@ def export_list(invoice_id):
# 下载报表
# 下载报表
file_name
=
f
"coop
\\
{invoice_id}.csv"
file_name
=
f
"coop
\\
{invoice_id}.csv"
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_1"
)
.
click
.
to_download
(
rename
=
file_name
)
.
wait
()
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_1"
)
.
click
.
to_download
(
rename
=
file_name
)
.
wait
()
return
pd
.
read_csv
(
file_name
,
engine
=
'python'
,
on_bad_lines
=
'skip'
)
except
ElementNotFoundError
:
except
ElementNotFoundError
:
print
(
"导出按钮不存在刷新网页"
)
print
(
"导出按钮不存在刷新网页"
)
page
.
refresh
()
page
.
refresh
()
export_list
(
invoice_id
)
export_item_read_data
(
invoice_id
)
def
main
():
page
.
get
(
"https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav"
)
file_name
=
"ContraCogsInvoices.xls"
if
not
os
.
path
.
isfile
(
file_name
):
# 全选
page
.
ele
(
"#select-all"
)
.
click
()
# 点击选项卡
page
.
ele
(
"#cc-invoice-actions-dropdown"
)
.
click
()
# 点击下载报表
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
.
wait
()
file_name
=
"ContraCogsInvoices.xls"
def
main
():
coop_data
=
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
coop_data
=
export_list_read_data
(
)
for
_
,
data
in
coop_data
.
iterrows
():
for
_
,
data
in
coop_data
.
iterrows
():
# 根据回款id搜索下载报表
# 根据回款id搜索下载报表
invoice_id
=
data
.
get
(
"Invoice ID"
)
invoice_id
=
data
.
get
(
"Invoice ID"
)
print
(
invoice_id
)
print
(
invoice_id
)
export_
list
(
invoice_id
)
export_
item_read_data
(
invoice_id
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
invoices.py
View file @
7690d798
...
@@ -24,14 +24,14 @@ page.set.download_path(download_path)
...
@@ -24,14 +24,14 @@ page.set.download_path(download_path)
warnings
.
filterwarnings
(
"ignore"
,
category
=
UserWarning
,
module
=
"openpyxl"
)
warnings
.
filterwarnings
(
"ignore"
,
category
=
UserWarning
,
module
=
"openpyxl"
)
def
export_list_
filter
_data
():
def
export_list_
read
_data
():
file_name
=
'Payments.xlsx'
file_name
=
'Payments.xlsx'
if
not
os
.
path
.
isfile
(
file_name
):
if
not
os
.
path
.
isfile
(
file_name
):
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home"
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home"
)
page
.
ele
(
"#remittance-home-select-all"
)
.
click
()
page
.
ele
(
"#remittance-home-select-all"
)
.
click
()
page
.
ele
(
"#remittance-home-export-link"
)
.
click
.
to_download
()
.
wait
()
page
.
ele
(
"#remittance-home-export-link"
)
.
click
.
to_download
()
.
wait
()
df
=
pd
.
read_excel
(
'Payments.xlsx'
,
skiprows
=
22
)
df
=
pd
.
read_excel
(
file_name
,
skiprows
=
22
)
# 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述
# 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述
pattern
=
r'Price|PCR|Missed|Shortage|^[A-Z0-9]{8}/[A-Z0-9]{4}/'
pattern
=
r'Price|PCR|Missed|Shortage|^[A-Z0-9]{8}/[A-Z0-9]{4}/'
# 过滤符合条件的行
# 过滤符合条件的行
...
@@ -65,7 +65,7 @@ def export_details_read_data(invoice_number):
...
@@ -65,7 +65,7 @@ def export_details_read_data(invoice_number):
def
main
():
def
main
():
list_data
=
export_list_
filter
_data
()
list_data
=
export_list_
read
_data
()
excel
.
save_xls
(
list_data
,
"回款数据.xlsx"
,
"Remittance payments"
)
excel
.
save_xls
(
list_data
,
"回款数据.xlsx"
,
"Remittance payments"
)
all_normal_pay_data
=
[]
all_normal_pay_data
=
[]
...
...
returns.py
View file @
7690d798
...
@@ -57,20 +57,19 @@ def asin_sku_relations():
...
@@ -57,20 +57,19 @@ def asin_sku_relations():
return
relations_dict
return
relations_dict
def
export_list
():
def
export_list
_read_data
():
# 访问网页
file_name
=
"Return_Summary.xls"
open_url
(
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
if
not
os
.
path
.
isfile
(
file_name
):
# 访问网页
# 导出退货单
open_url
(
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
.
wait
()
# 导出退货单
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
.
wait
(
)
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
def
export_item
(
return_id
):
returns_dir
=
"returns"
helper
.
make_dir
(
returns_dir
)
file_name
=
f
"{returns_dir}
\\
{return_id}.xls"
def
export_item_read_data
(
return_id
):
file_name
=
f
"returns
\\
{return_id}.xls"
if
not
os
.
path
.
isfile
(
file_name
):
if
not
os
.
path
.
isfile
(
file_name
):
# 打开退回详情下载明细
# 打开退回详情下载明细
open_url
(
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
open_url
(
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
...
@@ -85,13 +84,13 @@ def main():
...
@@ -85,13 +84,13 @@ def main():
relations_dict
=
asin_sku_relations
()
relations_dict
=
asin_sku_relations
()
# 下载并读取list数据
# 下载并读取list数据
list_data
=
export_list
()
list_data
=
export_list
_read_data
()
new_list_data
=
[]
new_list_data
=
[]
for
_
,
data
in
list_data
.
iterrows
():
for
_
,
data
in
list_data
.
iterrows
():
return_id
=
data
.
get
(
'Return ID'
)
return_id
=
data
.
get
(
'Return ID'
)
# 下载退货详情表格读取数据
# 下载退货详情表格读取数据
item_data
=
export_item
(
return_id
)
item_data
=
export_item
_read_data
(
return_id
)
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
item_data_result
=
item_data
.
groupby
([
'Purchase order'
,
'ASIN'
],
as_index
=
False
)
.
agg
({
item_data_result
=
item_data
.
groupby
([
'Purchase order'
,
'ASIN'
],
as_index
=
False
)
.
agg
({
'Quantity'
:
'sum'
,
'Quantity'
:
'sum'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment