Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
34e8dcb0
Commit
34e8dcb0
authored
Oct 16, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
退款明细爬虫处理
parent
a8d37c4a
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
274 additions
and
2 deletions
+274
-2
.gitignore
.gitignore
+3
-1
fetch_email.py
example/fetch_email.py
+85
-0
relations.xlsx
relations.xlsx
+0
-0
requirements.txt
requirements.txt
+5
-1
return_reports.py
return_reports.py
+181
-0
No files found.
.gitignore
View file @
34e8dcb0
...
...
@@ -2,3 +2,5 @@
.vscode
.venv
*.pyc
Return_Summary.xls
items
\ No newline at end of file
example/fetch_email.py
0 → 100644
View file @
34e8dcb0
import
email
import
imaplib
from
email.header
import
decode_header
# 邮箱账户信息
username
=
'us-cs001@khdtek.com'
# 更换为你的邮箱
password
=
'khd=20221208'
# 更换为你的邮箱密码
imap_server
=
'imap.qiye.aliyun.com'
def
get_latest_unread_email
():
# 连接到 IMAP 服务器
mail
=
imaplib
.
IMAP4_SSL
(
imap_server
)
mail
.
login
(
username
,
password
)
# 选择收件箱
mail
.
select
(
"inbox"
)
# 搜索未读邮件
status
,
messages
=
mail
.
search
(
None
,
'UNSEEN'
)
if
status
!=
'OK'
:
print
(
"没有找到未读邮件"
)
return
# 获取邮件 ID
email_ids
=
messages
[
0
]
.
split
()
if
not
email_ids
:
print
(
"没有未读邮件"
)
return
# 获取最新的未读邮件 ID(最后一个 ID)
latest_email_id
=
email_ids
[
-
1
]
# 获取邮件内容
status
,
msg_data
=
mail
.
fetch
(
latest_email_id
,
'(RFC822)'
)
if
status
!=
'OK'
:
print
(
"无法获取邮件"
)
return
# 解析邮件内容
for
response_part
in
msg_data
:
if
isinstance
(
response_part
,
tuple
):
msg
=
email
.
message_from_bytes
(
response_part
[
1
])
# 获取邮件主题
subject
,
encoding
=
decode_header
(
msg
[
"Subject"
])[
0
]
if
isinstance
(
subject
,
bytes
):
subject
=
subject
.
decode
(
encoding
if
encoding
else
"utf-8"
)
# 获取发件人
from_
=
msg
.
get
(
"From"
)
# 获取发送时间
date_
=
msg
.
get
(
"Date"
)
print
(
f
"最新未读邮件主题: {subject}"
)
print
(
f
"发件人: {from_}"
)
print
(
f
"发送时间: {date_}"
)
# 获取邮件正文内容
if
msg
.
is_multipart
():
# 如果邮件是多部分的
for
part
in
msg
.
walk
():
content_type
=
part
.
get_content_type
()
content_disposition
=
str
(
part
.
get
(
"Content-Disposition"
))
# 只处理文本或 HTML 内容
if
content_type
==
"text/plain"
and
"attachment"
not
in
content_disposition
:
body
=
part
.
get_payload
(
decode
=
True
)
.
decode
()
# 解码邮件内容
print
(
"邮件正文(纯文本):"
,
body
)
elif
content_type
==
"text/html"
and
"attachment"
not
in
content_disposition
:
html_body
=
part
.
get_payload
(
decode
=
True
)
.
decode
()
# 解码邮件内容
print
(
"邮件正文(HTML):"
,
html_body
)
else
:
# 如果邮件不是多部分的
body
=
msg
.
get_payload
(
decode
=
True
)
.
decode
()
print
(
"邮件正文:"
,
body
)
# 登出
mail
.
logout
()
# 执行获取最新未读邮件的操作
get_latest_unread_email
()
relations.xlsx
0 → 100644
View file @
34e8dcb0
File added
requirements.txt
View file @
34e8dcb0
...
...
@@ -7,3 +7,7 @@ pymysql==1.1.1
huey
==2.5.1
redis
==5.0.8
selenium
==4.25.0
requests
==2.32.3
xlrd
==2.0.1
pandas
==2.2.3
openpyxl
==3.1.5
\ No newline at end of file
return_reports.py
0 → 100644
View file @
34e8dcb0
# coding: utf-8
# 导出退款记录
import
os
import
time
import
pandas
as
pd
import
xlrd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
openpyxl.reader.excel
import
load_workbook
email
=
None
password
=
None
def
open_url
(
page
,
url
):
# 访问网页
page
.
get
(
url
)
element
=
page
.
ele
(
'#ap_email'
,
timeout
=
1
)
if
element
:
page
.
ele
(
'#ap_email'
)
.
input
(
email
)
page
.
ele
(
'#continue'
)
.
click
()
page
.
ele
(
'#ap_password'
)
.
input
(
password
)
page
.
ele
(
'#signInSubmit'
)
.
click
()
# 判断是否有图形码
while
True
:
time
.
sleep
(
1
)
if
not
page
.
ele
(
'.a-section a-text-center cvf-captcha-img'
):
break
print
(
"请填入图形码内容"
)
# todo 识别图形码
def
main
():
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
# 设置为normal模式
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
# 读取asin和sku映射关系
relations_dict
=
asin_sku_relations
()
# 下载并读取list数据
list_data
=
export_list
(
page
)
new_list_data
=
[]
for
_
,
data
in
list_data
.
iterrows
():
return_id
=
data
.
get
(
'Return ID'
)
# 下载退货详情表格读取数据
item_data
=
export_item
(
page
,
return_id
)
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
item_data_result
=
item_data
.
groupby
([
'Purchase order'
,
'ASIN'
],
as_index
=
False
)
.
agg
({
'Quantity'
:
'sum'
,
'Total amount'
:
'sum'
,
})
for
_
,
item_row
in
item_data_result
.
iterrows
():
data_dict
=
data
.
to_dict
()
data_dict
[
'Return Date'
]
=
data_dict
[
'Return Date'
]
.
strftime
(
'
%
m/
%
d/
%
Y'
)
data_dict
[
'Return ID'
]
=
str
(
data_dict
[
'Return ID'
])
data_dict
[
'PO'
]
=
item_row
.
get
(
'Purchase order'
)
data_dict
[
'ASIN'
]
=
item_row
.
get
(
'ASIN'
)
data_dict
[
'SKU'
]
=
relations_dict
[
item_row
.
get
(
'ASIN'
)]
data_dict
[
'Quantity'
]
=
item_row
.
get
(
'Quantity'
)
# 替换回会数量和金额为详情里面的值
data_dict
[
'Return quantity'
]
=
item_row
.
get
(
'Quantity'
)
data_dict
[
'Total cost'
]
=
item_row
.
get
(
'Total amount'
)
# 追加数据
new_list_data
.
append
(
data_dict
)
save_xls
(
new_list_data
,
'退货明细.xlsx'
)
def
asin_sku_relations
():
relations_dict
=
{}
# 读取ASIN和sku映射关系
df
=
pd
.
read_excel
(
'relations.xlsx'
)
for
index
,
row
in
df
.
iterrows
():
row_dict
=
row
.
to_dict
()
relations_dict
[
row_dict
[
'ASIN'
]]
=
row_dict
[
'SKU'
]
return
relations_dict
def
export_list
(
page
):
# 访问网页
open_url
(
page
,
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
# 导出退货单
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
mission
.
wait
()
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
def
export_item
(
page
,
return_id
):
items_dir
=
"items"
make_dir
(
items_dir
)
file_name
=
f
"{items_dir}
\\
{return_id}.xls"
if
not
os
.
path
.
isfile
(
file_name
):
# 打开退回详情下载明细
open_url
(
page
,
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
.
wait
()
# 读取回退商品详情
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
def
open_xls
(
file_path
):
# 开始处理excel数据
workbook
=
xlrd
.
open_workbook
(
filename
=
file_path
)
# 选择工作表
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
pass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
def
make_dir
(
path
):
# 检查下载目录是否存在,如果不存在则创建
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
return
False
return
True
def
get_input_with_default
(
prompt
,
default
):
user_input
=
input
(
f
"{prompt}(默认为 '{default}'):"
)
return
user_input
or
default
if
__name__
==
'__main__'
:
try
:
email
=
get_input_with_default
(
"请输入账户"
,
"us-cs001@khdtek.com"
)
print
(
f
"您输入的账户是:{email}"
)
password
=
get_input_with_default
(
"请输入密码"
,
"khd=20221208"
)
print
(
f
"您输入的账户是:{password}"
)
main
()
except
KeyboardInterrupt
:
pass
except
PageDisconnectedError
as
e
:
print
(
"与页面的连接已断开"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment