Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
131c9a08
Commit
131c9a08
authored
Oct 17, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
爬虫处理
parent
ccca3f9a
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
160 additions
and
106 deletions
+160
-106
.gitignore
.gitignore
+2
-1
coop.py
coop.py
+27
-18
excel.py
helper/excel.py
+23
-13
invoices.py
invoices.py
+74
-38
returns.py
returns.py
+34
-36
No files found.
.gitignore
View file @
131c9a08
...
...
@@ -8,3 +8,4 @@ Payments.xlsx
returns
invoices
coop
*.xlsx
\ No newline at end of file
coop.py
View file @
131c9a08
...
...
@@ -4,7 +4,7 @@ import os
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
DrissionPage.errors
import
PageDisconnectedError
,
ElementNotFoundError
from
helper
import
helper
...
...
@@ -20,31 +20,40 @@ helper.make_dir(download_path)
page
.
set
.
download_path
(
download_path
)
def
export_list
(
invoice_id
):
try
:
file_name
=
f
"coop
\\
{invoice_id}.csv"
if
not
os
.
path
.
isfile
(
file_name
):
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
file_name
=
f
"coop
\\
{invoice_id}.csv"
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_1"
)
.
click
.
to_download
(
rename
=
file_name
)
.
wait
()
except
ElementNotFoundError
:
print
(
"导出按钮不存在刷新网页"
)
page
.
refresh
()
export_list
(
invoice_id
)
def
main
():
page
.
get
(
"https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav"
)
file_name
=
"ContraCogsInvoices.xls"
if
not
os
.
path
.
isfile
(
file_name
):
# 全选
page
.
ele
(
"#select-all"
)
.
click
()
# 点击选项卡
page
.
ele
(
"#cc-invoice-actions-dropdown"
)
.
click
()
# 点击下载报表
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
page
.
download
.
wait
()
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
.
wait
()
file_name
=
"ContraCogsInvoices.xls"
coop_data
=
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
for
_
,
data
in
coop_data
.
iterrows
():
# 根据回款id搜索下载报表
invoice_id
=
data
.
get
(
"Invoice ID"
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
file_name
=
f
"coop
\\
{invoice_id}.csv"
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_1"
)
.
click
.
to_download
(
rename
=
file_name
)
# invoice_data = pd.read_csv(file_name)
# for _, invoice, in invoice_data.iterrows():
# print(f"{invoice.to_dict()}")
print
(
invoice_id
)
export_list
(
invoice_id
)
if
__name__
==
'__main__'
:
...
...
helper/excel.py
View file @
131c9a08
...
...
@@ -11,28 +11,38 @@ def open_xls(file_path):
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
def
save_xls
(
data
,
output_file
,
sheet_name
=
'Sheet1'
,
adjusted
=
True
):
try
:
# 如果文件已存在,则追加新的 sheet
with
pd
.
ExcelWriter
(
output_file
,
engine
=
'openpyxl'
,
mode
=
'a'
,
if_sheet_exists
=
'replace'
)
as
writer
:
df
=
pd
.
DataFrame
(
data
)
df
.
to_excel
(
writer
,
index
=
False
,
sheet_name
=
sheet_name
)
except
FileNotFoundError
:
# 如果文件不存在,创建一个新的文件并写入
with
pd
.
ExcelWriter
(
output_file
,
engine
=
'openpyxl'
,
mode
=
'w'
)
as
writer
:
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
df
.
to_excel
(
writer
,
index
=
False
,
sheet_name
=
sheet_name
)
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
ws
=
wb
[
sheet_name
]
if
not
adjusted
:
return
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
# 判断 cell.value 是否为 None,并尝试转换为字符串计算长度
if
cell
.
value
is
not
None
and
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
p
ass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
except
(
TypeError
,
AttributeError
)
as
e
:
p
rint
(
f
"Error processing cell {cell.coordinate}: {e}"
)
continue
adjusted_width
=
max_length
+
3
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
invoices.py
View file @
131c9a08
# coding: utf-8
# 回款明细
import
os
import
time
import
urllib.parse
import
warnings
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
DrissionPage.errors
import
PageDisconnectedError
,
ElementNotFoundError
from
helper
import
helper
from
helper
import
helper
,
excel
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
...
...
@@ -25,24 +24,12 @@ page.set.download_path(download_path)
warnings
.
filterwarnings
(
"ignore"
,
category
=
UserWarning
,
module
=
"openpyxl"
)
# 对过滤后的数据,进一步处理 Description 列
def
process_description
(
description
):
# 按空格分割最后一段
parts
=
description
.
split
(
'/'
)
# 检查分割后的最后一个部分的前面是否为8位数
if
len
(
parts
)
>
0
and
len
(
parts
[
-
1
])
>=
8
:
# 返回分割后的前8位
return
parts
[
-
1
][:
8
]
return
None
def
download_filter_data
():
def
export_list_filter_data
():
file_name
=
'Payments.xlsx'
if
not
os
.
path
.
isfile
(
file_name
):
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home"
)
page
.
ele
(
"#remittance-home-select-all"
)
.
click
()
page
.
ele
(
"#remittance-home-export-link"
)
.
click
.
to_download
()
page
.
download
.
wait
()
page
.
ele
(
"#remittance-home-export-link"
)
.
click
.
to_download
()
.
wait
()
df
=
pd
.
read_excel
(
'Payments.xlsx'
,
skiprows
=
22
)
# 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述
...
...
@@ -51,8 +38,38 @@ def download_filter_data():
return
df
[
df
[
'Description'
]
.
str
.
contains
(
pattern
,
na
=
False
,
regex
=
True
)]
def
export_details_read_data
(
invoice_number
):
# 读取详情内容
file_name
=
f
"invoices
\\
{invoice_number}.csv"
if
not
os
.
path
.
isfile
(
file_name
):
params
=
{
"invoiceNumber"
:
invoice_number
,
"payeeCode"
:
"VECET"
,
"activeTab"
:
"lineItems"
,
}
# 将字典转换为 URL 查询参数
query_string
=
urllib
.
parse
.
urlencode
(
params
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?"
+
query_string
)
try
:
page
.
ele
(
"#line-items-export-to-spreadsheet-announce"
)
.
click
.
to_download
(
rename
=
file_name
)
.
wait
()
except
ElementNotFoundError
:
print
(
"导出按钮不存在刷新网页"
)
page
.
refresh
()
export_details_read_data
(
invoice_number
)
if
not
os
.
path
.
isfile
(
file_name
):
export_details_read_data
(
invoice_number
)
return
pd
.
read_csv
(
file_name
,
skiprows
=
2
,
engine
=
'python'
,
on_bad_lines
=
'skip'
)
def
main
():
list_data
=
download_filter_data
()
list_data
=
export_list_filter_data
()
excel
.
save_xls
(
list_data
,
"回款数据.xlsx"
,
"Remittance payments"
)
all_normal_pay_data
=
[]
all_price_pay_data
=
[]
for
_
,
data
in
list_data
.
iterrows
():
invoice_number
=
data
.
get
(
"Invoice Number"
)
description
=
data
.
get
(
"Description"
)
...
...
@@ -62,26 +79,45 @@ def main():
print
(
invoice_number
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-po-search?searchByNumberToken={invoice_number}"
)
params
=
{
"invoiceNumber"
:
invoice_number
,
"payeeCode"
:
"VECET"
,
"activeTab"
:
"lineItems"
,
}
# 将字典转换为 URL 查询参数
query_string
=
urllib
.
parse
.
urlencode
(
params
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?"
+
query_string
)
# 读取详情内容
file_name
=
f
"invoices
\\
{invoice_number}.csv"
page
.
ele
(
"#line-items-export-to-spreadsheet-announce"
)
.
click
.
to_download
(
rename
=
file_name
)
time
.
sleep
(
3
)
detail_data
=
pd
.
read_csv
(
file_name
,
skiprows
=
2
,
engine
=
'python'
,
on_bad_lines
=
'skip'
)
# for _, detail, in detail_data.iterrows():
# print(f"{detail.to_dict()}")
# invoice_search_page = page.new_tab(f"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-po-search?searchByNumberToken={invoice_number}")
# invoice_search_page.close()
# 下载excel文件并读取数据
detail_data
=
export_details_read_data
(
invoice_number
)
# 初始化列表存储新字段数据
normal_pay_data
=
[]
price_pay_data
=
[]
for
index
,
detail_datum
in
detail_data
.
iterrows
():
amount
=
detail_datum
.
get
(
'Amount'
,
0
)
# 使用默认值 0 防止 None
code
=
"VECET"
deduction_points
=
1
# 计算扣除后的金额
amount_after_deduction
=
amount
-
(
amount
*
deduction_points
/
100
)
# 复制原始行数据,避免直接修改
new_record
=
detail_datum
.
copy
()
new_record
[
"IsFinished"
]
=
"是"
new_record
[
"DeductionPoints"
]
=
f
"{deduction_points}
%
"
# 拼接百分号
new_record
[
"Code"
]
=
code
new_record
[
"AmountAfterDeduction"
]
=
amount_after_deduction
# 将处理后的记录添加到临时列表
normal_pay_data
.
append
(
new_record
)
# 将处理后的记录添加到临时列表
price_pay_data
.
append
(
new_record
)
# 添加到汇总列表
all_normal_pay_data
.
append
(
pd
.
DataFrame
(
normal_pay_data
))
all_price_pay_data
.
append
(
pd
.
DataFrame
(
price_pay_data
))
# 将所有数据合并为一个 DataFrame
normal_pay_summary
=
pd
.
concat
(
all_normal_pay_data
,
ignore_index
=
True
)
price_pay_summary
=
pd
.
concat
(
all_price_pay_data
,
ignore_index
=
True
)
excel
.
save_xls
(
normal_pay_summary
,
"回款数据.xlsx"
,
"正常回款导出明细"
)
excel
.
save_xls
(
price_pay_summary
,
"回款数据.xlsx"
,
"Price导出明细"
)
if
__name__
==
'__main__'
:
...
...
returns.py
View file @
131c9a08
...
...
@@ -46,6 +46,40 @@ def open_url(url):
# todo 识别图形码
def
asin_sku_relations
():
relations_dict
=
{}
# 读取ASIN和sku映射关系
df
=
pd
.
read_excel
(
'relations.xlsx'
)
for
index
,
row
in
df
.
iterrows
():
row_dict
=
row
.
to_dict
()
relations_dict
[
row_dict
[
'ASIN'
]]
=
row_dict
[
'SKU'
]
return
relations_dict
def
export_list
():
# 访问网页
open_url
(
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
# 导出退货单
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
.
wait
()
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
def
export_item
(
return_id
):
returns_dir
=
"returns"
helper
.
make_dir
(
returns_dir
)
file_name
=
f
"{returns_dir}
\\
{return_id}.xls"
if
not
os
.
path
.
isfile
(
file_name
):
# 打开退回详情下载明细
open_url
(
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
.
wait
()
# 读取回退商品详情
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
def
main
():
# 读取asin和sku映射关系
relations_dict
=
asin_sku_relations
()
...
...
@@ -84,42 +118,6 @@ def main():
page
.
close
()
def
asin_sku_relations
():
relations_dict
=
{}
# 读取ASIN和sku映射关系
df
=
pd
.
read_excel
(
'relations.xlsx'
)
for
index
,
row
in
df
.
iterrows
():
row_dict
=
row
.
to_dict
()
relations_dict
[
row_dict
[
'ASIN'
]]
=
row_dict
[
'SKU'
]
return
relations_dict
def
export_list
():
# 访问网页
open_url
(
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
# 导出退货单
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
page
.
download
.
wait
()
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
def
export_item
(
return_id
):
returns_dir
=
"returns"
helper
.
make_dir
(
returns_dir
)
file_name
=
f
"{returns_dir}
\\
{return_id}.xls"
if
not
os
.
path
.
isfile
(
file_name
):
# 打开退回详情下载明细
open_url
(
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
page
.
download
.
wait
()
# 读取回退商品详情
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
if
__name__
==
'__main__'
:
try
:
email
=
helper
.
get_input_with_default
(
"请输入账户"
,
"us-cs001@khdtek.com"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment