Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
8d5adcae
Commit
8d5adcae
authored
Oct 16, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
爬虫处理
parent
34e8dcb0
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
188 additions
and
78 deletions
+188
-78
.gitignore
.gitignore
+4
-1
coop.py
coop.py
+55
-0
helper.py
helper/helper.py
+54
-0
invoices.py
invoices.py
+46
-0
models.py
models/models.py
+0
-0
returns.py
returns.py
+28
-76
tasks.py
tasks.py
+1
-1
No files found.
.gitignore
View file @
8d5adcae
...
@@ -3,4 +3,7 @@
...
@@ -3,4 +3,7 @@
.venv
.venv
*.pyc
*.pyc
Return_Summary.xls
Return_Summary.xls
items
ContraCogsInvoices.xls
\ No newline at end of file
returns
invoices
coop
\ No newline at end of file
coop.py
0 → 100644
View file @
8d5adcae
# coding: utf-8
# 回款明细
import
os
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
helper
import
helper
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
helper
.
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
def
main
():
page
.
get
(
"https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav"
)
# 全选
page
.
ele
(
"#select-all"
)
.
click
()
# 点击选项卡
page
.
ele
(
"#cc-invoice-actions-dropdown"
)
.
click
()
# 点击下载报表
mission
=
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
mission
.
wait
()
file_name
=
"ContraCogsInvoices.xls"
coop_data
=
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
for
_
,
data
in
coop_data
.
iterrows
():
# 根据回款id搜索下载报表
invoice_id
=
data
.
get
(
"Invoice ID"
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
file_name
=
f
"coop
\\
{invoice_id}.csv"
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_1"
)
.
click
.
to_download
(
rename
=
file_name
)
# invoice_data = pd.read_csv(file_name)
# for _, invoice, in invoice_data.iterrows():
# print(f"{invoice.to_dict()}")
if
__name__
==
'__main__'
:
try
:
main
()
except
KeyboardInterrupt
:
pass
except
PageDisconnectedError
as
e
:
print
(
"与页面的连接已断开"
)
helper/helper.py
0 → 100644
View file @
8d5adcae
# coding: utf-8
import
os
import
pandas
as
pd
import
xlrd
from
openpyxl.reader.excel
import
load_workbook
def
open_xls
(
file_path
):
# 开始处理excel数据
workbook
=
xlrd
.
open_workbook
(
filename
=
file_path
)
# 选择工作表
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
pass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
def
make_dir
(
path
):
# 检查下载目录是否存在,如果不存在则创建
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
return
False
return
True
def
get_input_with_default
(
prompt
,
default
):
user_input
=
input
(
f
"{prompt}(默认为 '{default}'):"
)
return
user_input
or
default
invoices.py
0 → 100644
View file @
8d5adcae
# coding: utf-8
# 回款明细
import
os
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
helper
import
helper
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
helper
.
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
def
main
():
po
=
"74HDTI2S"
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?invoiceNumber={po}&payeeCode=VECET&activeTab=lineItems"
)
try
:
invoice_dir
=
"invoices"
file_name
=
f
"{invoice_dir}
\\
{po}.csv"
mission
=
page
.
ele
(
"#line-items-export-to-spreadsheet-announce"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
.
wait
()
detail_data
=
pd
.
read_csv
(
file_name
,
skiprows
=
2
)
for
_
,
data
,
in
detail_data
.
iterrows
():
print
(
f
"{data.to_dict()}"
)
except
Exception
as
err
:
print
(
f
"读取 CSV 文件时出错: {err}"
)
if
__name__
==
'__main__'
:
try
:
main
()
except
KeyboardInterrupt
:
pass
except
PageDisconnectedError
as
e
:
print
(
"与页面的连接已断开"
)
models.py
→
models
/models
.py
View file @
8d5adcae
File moved
return
_report
s.py
→
returns.py
View file @
8d5adcae
...
@@ -4,16 +4,27 @@ import os
...
@@ -4,16 +4,27 @@ import os
import
time
import
time
import
pandas
as
pd
import
pandas
as
pd
import
xlrd
from
DrissionPage
import
ChromiumPage
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
DrissionPage.errors
import
PageDisconnectedError
from
openpyxl.reader.excel
import
load_workbook
from
helper
import
helper
email
=
None
email
=
None
password
=
None
password
=
None
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
# 设置为normal模式
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
helper
.
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
def
open_url
(
page
,
url
):
def
open_url
(
url
):
# 访问网页
# 访问网页
page
.
get
(
url
)
page
.
get
(
url
)
...
@@ -36,28 +47,17 @@ def open_url(page, url):
...
@@ -36,28 +47,17 @@ def open_url(page, url):
def
main
():
def
main
():
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
# 设置为normal模式
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
# 读取asin和sku映射关系
# 读取asin和sku映射关系
relations_dict
=
asin_sku_relations
()
relations_dict
=
asin_sku_relations
()
# 下载并读取list数据
# 下载并读取list数据
list_data
=
export_list
(
page
)
list_data
=
export_list
()
new_list_data
=
[]
new_list_data
=
[]
for
_
,
data
in
list_data
.
iterrows
():
for
_
,
data
in
list_data
.
iterrows
():
return_id
=
data
.
get
(
'Return ID'
)
return_id
=
data
.
get
(
'Return ID'
)
# 下载退货详情表格读取数据
# 下载退货详情表格读取数据
item_data
=
export_item
(
page
,
return_id
)
item_data
=
export_item
(
return_id
)
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
item_data_result
=
item_data
.
groupby
([
'Purchase order'
,
'ASIN'
],
as_index
=
False
)
.
agg
({
item_data_result
=
item_data
.
groupby
([
'Purchase order'
,
'ASIN'
],
as_index
=
False
)
.
agg
({
'Quantity'
:
'sum'
,
'Quantity'
:
'sum'
,
...
@@ -80,7 +80,7 @@ def main():
...
@@ -80,7 +80,7 @@ def main():
# 追加数据
# 追加数据
new_list_data
.
append
(
data_dict
)
new_list_data
.
append
(
data_dict
)
save_xls
(
new_list_data
,
'退货明细.xlsx'
)
helper
.
save_xls
(
new_list_data
,
'退货明细.xlsx'
)
def
asin_sku_relations
():
def
asin_sku_relations
():
...
@@ -94,9 +94,9 @@ def asin_sku_relations():
...
@@ -94,9 +94,9 @@ def asin_sku_relations():
return
relations_dict
return
relations_dict
def
export_list
(
page
):
def
export_list
():
# 访问网页
# 访问网页
open_url
(
page
,
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
open_url
(
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
# 导出退货单
# 导出退货单
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
...
@@ -104,14 +104,14 @@ def export_list(page):
...
@@ -104,14 +104,14 @@ def export_list(page):
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
def
export_item
(
page
,
return_id
):
def
export_item
(
return_id
):
items_dir
=
"item
s"
returns_dir
=
"return
s"
make_dir
(
item
s_dir
)
helper
.
make_dir
(
return
s_dir
)
file_name
=
f
"{
item
s_dir}
\\
{return_id}.xls"
file_name
=
f
"{
return
s_dir}
\\
{return_id}.xls"
if
not
os
.
path
.
isfile
(
file_name
):
if
not
os
.
path
.
isfile
(
file_name
):
# 打开退回详情下载明细
# 打开退回详情下载明细
open_url
(
page
,
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
open_url
(
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
.
wait
()
mission
.
wait
()
...
@@ -119,60 +119,12 @@ def export_item(page, return_id):
...
@@ -119,60 +119,12 @@ def export_item(page, return_id):
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
def
open_xls
(
file_path
):
# 开始处理excel数据
workbook
=
xlrd
.
open_workbook
(
filename
=
file_path
)
# 选择工作表
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
pass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
def
make_dir
(
path
):
# 检查下载目录是否存在,如果不存在则创建
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
return
False
return
True
def
get_input_with_default
(
prompt
,
default
):
user_input
=
input
(
f
"{prompt}(默认为 '{default}'):"
)
return
user_input
or
default
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
try
:
try
:
email
=
get_input_with_default
(
"请输入账户"
,
"us-cs001@khdtek.com"
)
email
=
helper
.
get_input_with_default
(
"请输入账户"
,
"us-cs001@khdtek.com"
)
print
(
f
"您输入的账户是
:
{email}"
)
print
(
f
"您输入的账户是
:
{email}"
)
password
=
get_input_with_default
(
"请输入密码"
,
"khd=20221208"
)
password
=
helper
.
get_input_with_default
(
"请输入密码"
,
"khd=20221208"
)
print
(
f
"您输入的
账户是:
{password}"
)
print
(
f
"您输入的
密码是:
{password}"
)
main
()
main
()
except
KeyboardInterrupt
:
except
KeyboardInterrupt
:
...
...
tasks.py
View file @
8d5adcae
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
from
huey
import
RedisHuey
from
huey
import
RedisHuey
import
bootstrap
import
bootstrap
from
models
import
Store
,
Goods
from
models
.models
import
Store
,
Goods
bootstrap
.
init
()
bootstrap
.
init
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment