Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
ccca3f9a
Commit
ccca3f9a
authored
Oct 16, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
爬虫处理
parent
82a37d66
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
139 additions
and
71 deletions
+139
-71
.gitignore
.gitignore
+1
-0
coop.py
coop.py
+4
-8
excel.py
helper/excel.py
+38
-0
file.py
helper/file.py
+27
-0
helper.py
helper/helper.py
+0
-38
invoices.py
invoices.py
+60
-13
returns.py
returns.py
+9
-12
No files found.
.gitignore
View file @
ccca3f9a
...
...
@@ -4,6 +4,7 @@
*.pyc
Return_Summary.xls
ContraCogsInvoices.xls
Payments.xlsx
returns
invoices
coop
\ No newline at end of file
coop.py
View file @
ccca3f9a
...
...
@@ -3,16 +3,12 @@
import
os
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
,
ChromiumOptions
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
helper
import
helper
# 创建配置对象(默认从 ini 文件中读取配置)
co
=
ChromiumOptions
()
# 设置不加载图片、静音
co
.
no_imgs
(
True
)
.
mute
(
True
)
page
=
ChromiumPage
(
addr_or_opts
=
co
)
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
eager
()
page
.
set
.
when_download_file_exists
(
'overwrite'
)
...
...
@@ -31,8 +27,8 @@ def main():
# 点击选项卡
page
.
ele
(
"#cc-invoice-actions-dropdown"
)
.
click
()
# 点击下载报表
mission
=
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
mission
.
wait
()
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
page
.
download
.
wait
()
file_name
=
"ContraCogsInvoices.xls"
coop_data
=
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
...
...
helper/excel.py
0 → 100644
View file @
ccca3f9a
# coding: utf-8
import
pandas
as
pd
import
xlrd
from
openpyxl.reader.excel
import
load_workbook
def
open_xls
(
file_path
):
# 开始处理excel数据
workbook
=
xlrd
.
open_workbook
(
filename
=
file_path
)
# 选择工作表
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
pass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
helper/file.py
0 → 100644
View file @
ccca3f9a
# coding: utf-8
import
os
import
time
def
wait_for_downloads
(
download_dir
,
timeout
=
60
):
"""
监控下载目录,等待新文件下载完成。
:param download_dir: 文件下载目录
:param timeout: 超时时间,单位:秒
"""
end_time
=
time
.
time
()
+
timeout
while
time
.
time
()
<
end_time
:
files
=
os
.
listdir
(
download_dir
)
if
files
:
# 如果文件夹内有文件
return
True
time
.
sleep
(
1
)
return
False
def
make_dir
(
path
):
# 检查下载目录是否存在,如果不存在则创建
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
return
False
return
True
helper/helper.py
View file @
ccca3f9a
# coding: utf-8
import
os
import
pandas
as
pd
import
xlrd
from
openpyxl.reader.excel
import
load_workbook
def
open_xls
(
file_path
):
# 开始处理excel数据
workbook
=
xlrd
.
open_workbook
(
filename
=
file_path
)
# 选择工作表
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
pass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
def
make_dir
(
path
):
# 检查下载目录是否存在,如果不存在则创建
...
...
invoices.py
View file @
ccca3f9a
# coding: utf-8
# 回款明细
import
os
import
time
import
urllib.parse
import
warnings
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
...
...
@@ -18,23 +21,67 @@ helper.make_dir(download_path)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
# 忽略 openpyxl 样式警告
warnings
.
filterwarnings
(
"ignore"
,
category
=
UserWarning
,
module
=
"openpyxl"
)
# 对过滤后的数据,进一步处理 Description 列
def
process_description
(
description
):
# 按空格分割最后一段
parts
=
description
.
split
(
'/'
)
# 检查分割后的最后一个部分的前面是否为8位数
if
len
(
parts
)
>
0
and
len
(
parts
[
-
1
])
>=
8
:
# 返回分割后的前8位
return
parts
[
-
1
][:
8
]
return
None
def
download_filter_data
():
file_name
=
'Payments.xlsx'
if
not
os
.
path
.
isfile
(
file_name
):
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/remittance/home"
)
page
.
ele
(
"#remittance-home-select-all"
)
.
click
()
page
.
ele
(
"#remittance-home-export-link"
)
.
click
.
to_download
()
page
.
download
.
wait
()
df
=
pd
.
read_excel
(
'Payments.xlsx'
,
skiprows
=
22
)
# 定义正则表达式模式,匹配包含 'Price' 或 'PCR' 或 'XXXXXXXX/XXXX/' 的描述
pattern
=
r'Price|PCR|Missed|Shortage|^[A-Z0-9]{8}/[A-Z0-9]{4}/'
# 过滤符合条件的行
return
df
[
df
[
'Description'
]
.
str
.
contains
(
pattern
,
na
=
False
,
regex
=
True
)]
def
main
():
po
=
"74HDTI2S"
list_data
=
download_filter_data
()
for
_
,
data
in
list_data
.
iterrows
():
invoice_number
=
data
.
get
(
"Invoice Number"
)
description
=
data
.
get
(
"Description"
)
if
"Price"
in
description
or
"PCR"
in
description
or
"Missed"
in
description
or
"Shortage"
in
description
:
# 获取前8位
invoice_number
=
invoice_number
[:
8
]
print
(
invoice_number
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?invoiceNumber={po}&payeeCode=VECET&activeTab=lineItems"
)
try
:
invoice_dir
=
"invoices"
file_name
=
f
"{invoice_dir}
\\
{po}.csv"
mission
=
page
.
ele
(
"#line-items-export-to-spreadsheet-announce"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
.
wait
()
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-po-search?searchByNumberToken={invoice_number}"
)
params
=
{
"invoiceNumber"
:
invoice_number
,
"payeeCode"
:
"VECET"
,
"activeTab"
:
"lineItems"
,
}
# 将字典转换为 URL 查询参数
query_string
=
urllib
.
parse
.
urlencode
(
params
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?"
+
query_string
)
detail_data
=
pd
.
read_csv
(
file_name
,
skiprows
=
2
)
for
_
,
data
,
in
detail_data
.
iterrows
():
print
(
f
"{data.to_dict()}"
)
# 读取详情内容
file_name
=
f
"invoices
\\
{invoice_number}.csv"
page
.
ele
(
"#line-items-export-to-spreadsheet-announce"
)
.
click
.
to_download
(
rename
=
file_name
)
time
.
sleep
(
3
)
except
Exception
as
err
:
print
(
f
"读取 CSV 文件时出错: {err}"
)
detail_data
=
pd
.
read_csv
(
file_name
,
skiprows
=
2
,
engine
=
'python'
,
on_bad_lines
=
'skip'
)
# for _, detail, in detail_data.iterrows():
# print(f"{detail.to_dict()}")
if
__name__
==
'__main__'
:
...
...
returns.py
View file @
ccca3f9a
...
...
@@ -4,19 +4,15 @@ import os
import
time
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
,
ChromiumOptions
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
helper
import
helper
from
helper
import
helper
,
excel
email
=
None
password
=
None
# 创建配置对象(默认从 ini 文件中读取配置)
co
=
ChromiumOptions
()
# 设置不加载图片、静音
co
.
no_imgs
(
True
)
.
mute
(
True
)
page
=
ChromiumPage
(
addr_or_opts
=
co
)
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
eager
()
page
.
set
.
when_download_file_exists
(
'overwrite'
)
...
...
@@ -84,7 +80,8 @@ def main():
# 追加数据
new_list_data
.
append
(
data_dict
)
helper
.
save_xls
(
new_list_data
,
'退货明细.xlsx'
)
excel
.
save_xls
(
new_list_data
,
'退货明细.xlsx'
)
page
.
close
()
def
asin_sku_relations
():
...
...
@@ -103,8 +100,8 @@ def export_list():
open_url
(
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
# 导出退货单
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
mission
.
wait
()
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
page
.
download
.
wait
()
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
...
...
@@ -116,8 +113,8 @@ def export_item(return_id):
if
not
os
.
path
.
isfile
(
file_name
):
# 打开退回详情下载明细
open_url
(
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
.
wait
()
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
page
.
download
.
wait
()
# 读取回退商品详情
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment