Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
8d5adcae
Commit
8d5adcae
authored
Oct 16, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
爬虫处理
parent
34e8dcb0
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
188 additions
and
78 deletions
+188
-78
.gitignore
.gitignore
+4
-1
coop.py
coop.py
+55
-0
helper.py
helper/helper.py
+54
-0
invoices.py
invoices.py
+46
-0
models.py
models/models.py
+0
-0
returns.py
returns.py
+28
-76
tasks.py
tasks.py
+1
-1
No files found.
.gitignore
View file @
8d5adcae
...
...
@@ -3,4 +3,7 @@
.venv
*.pyc
Return_Summary.xls
items
\ No newline at end of file
ContraCogsInvoices.xls
returns
invoices
coop
\ No newline at end of file
coop.py
0 → 100644
View file @
8d5adcae
# coding: utf-8
# 回款明细
import
os
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
helper
import
helper
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
helper
.
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
def
main
():
page
.
get
(
"https://vendorcentral.amazon.com/hz/vendor/members/coop?ref_=vc_xx_subNav"
)
# 全选
page
.
ele
(
"#select-all"
)
.
click
()
# 点击选项卡
page
.
ele
(
"#cc-invoice-actions-dropdown"
)
.
click
()
# 点击下载报表
mission
=
page
.
ele
(
"#cc-invoice-actions-dropdown_2"
)
.
click
.
to_download
()
mission
.
wait
()
file_name
=
"ContraCogsInvoices.xls"
coop_data
=
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
for
_
,
data
in
coop_data
.
iterrows
():
# 根据回款id搜索下载报表
invoice_id
=
data
.
get
(
"Invoice ID"
)
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/coop?searchText={invoice_id}"
)
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
file_name
=
f
"coop
\\
{invoice_id}.csv"
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_1"
)
.
click
.
to_download
(
rename
=
file_name
)
# invoice_data = pd.read_csv(file_name)
# for _, invoice, in invoice_data.iterrows():
# print(f"{invoice.to_dict()}")
if
__name__
==
'__main__'
:
try
:
main
()
except
KeyboardInterrupt
:
pass
except
PageDisconnectedError
as
e
:
print
(
"与页面的连接已断开"
)
helper/helper.py
0 → 100644
View file @
8d5adcae
# coding: utf-8
import
os
import
pandas
as
pd
import
xlrd
from
openpyxl.reader.excel
import
load_workbook
def
open_xls
(
file_path
):
# 开始处理excel数据
workbook
=
xlrd
.
open_workbook
(
filename
=
file_path
)
# 选择工作表
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
pass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
def
make_dir
(
path
):
# 检查下载目录是否存在,如果不存在则创建
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
return
False
return
True
def
get_input_with_default
(
prompt
,
default
):
user_input
=
input
(
f
"{prompt}(默认为 '{default}'):"
)
return
user_input
or
default
invoices.py
0 → 100644
View file @
8d5adcae
# coding: utf-8
# 回款明细
import
os
import
pandas
as
pd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
helper
import
helper
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
helper
.
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
def
main
():
po
=
"74HDTI2S"
page
.
get
(
f
"https://vendorcentral.amazon.com/hz/vendor/members/inv-mgmt/invoice-details?invoiceNumber={po}&payeeCode=VECET&activeTab=lineItems"
)
try
:
invoice_dir
=
"invoices"
file_name
=
f
"{invoice_dir}
\\
{po}.csv"
mission
=
page
.
ele
(
"#line-items-export-to-spreadsheet-announce"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
.
wait
()
detail_data
=
pd
.
read_csv
(
file_name
,
skiprows
=
2
)
for
_
,
data
,
in
detail_data
.
iterrows
():
print
(
f
"{data.to_dict()}"
)
except
Exception
as
err
:
print
(
f
"读取 CSV 文件时出错: {err}"
)
if
__name__
==
'__main__'
:
try
:
main
()
except
KeyboardInterrupt
:
pass
except
PageDisconnectedError
as
e
:
print
(
"与页面的连接已断开"
)
models.py
→
models
/models
.py
View file @
8d5adcae
File moved
return
_report
s.py
→
returns.py
View file @
8d5adcae
...
...
@@ -4,16 +4,27 @@ import os
import
time
import
pandas
as
pd
import
xlrd
from
DrissionPage
import
ChromiumPage
from
DrissionPage.errors
import
PageDisconnectedError
from
openpyxl.reader.excel
import
load_workbook
from
helper
import
helper
email
=
None
password
=
None
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
# 设置为normal模式
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
helper
.
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
def
open_url
(
page
,
url
):
def
open_url
(
url
):
# 访问网页
page
.
get
(
url
)
...
...
@@ -36,28 +47,17 @@ def open_url(page, url):
def
main
():
page
=
ChromiumPage
()
page
.
set
.
load_mode
.
normal
()
# 设置为normal模式
page
.
set
.
when_download_file_exists
(
'overwrite'
)
# 下载目录
download_path
=
os
.
getcwd
()
# 检查下载目录是否存在,如果不存在则创建
make_dir
(
download_path
)
# 设置下载路径,确保在打开浏览器前设置
page
.
set
.
download_path
(
download_path
)
# 读取asin和sku映射关系
relations_dict
=
asin_sku_relations
()
# 下载并读取list数据
list_data
=
export_list
(
page
)
list_data
=
export_list
()
new_list_data
=
[]
for
_
,
data
in
list_data
.
iterrows
():
return_id
=
data
.
get
(
'Return ID'
)
# 下载退货详情表格读取数据
item_data
=
export_item
(
page
,
return_id
)
item_data
=
export_item
(
return_id
)
# 按 'Purchase order' 和 'ASIN' 分组,并对 'Quantity' 和 Total amount 进行求和
item_data_result
=
item_data
.
groupby
([
'Purchase order'
,
'ASIN'
],
as_index
=
False
)
.
agg
({
'Quantity'
:
'sum'
,
...
...
@@ -80,7 +80,7 @@ def main():
# 追加数据
new_list_data
.
append
(
data_dict
)
save_xls
(
new_list_data
,
'退货明细.xlsx'
)
helper
.
save_xls
(
new_list_data
,
'退货明细.xlsx'
)
def
asin_sku_relations
():
...
...
@@ -94,9 +94,9 @@ def asin_sku_relations():
return
relations_dict
def
export_list
(
page
):
def
export_list
():
# 访问网页
open_url
(
page
,
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
open_url
(
"https://vendorcentral.amazon.com/hz/vendor/members/returns?ref_=vc_xx_subNav"
)
# 导出退货单
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
()
...
...
@@ -104,14 +104,14 @@ def export_list(page):
return
pd
.
read_excel
(
'Return_Summary.xls'
,
engine
=
'xlrd'
)
def
export_item
(
page
,
return_id
):
items_dir
=
"item
s"
make_dir
(
item
s_dir
)
def
export_item
(
return_id
):
returns_dir
=
"return
s"
helper
.
make_dir
(
return
s_dir
)
file_name
=
f
"{
item
s_dir}
\\
{return_id}.xls"
file_name
=
f
"{
return
s_dir}
\\
{return_id}.xls"
if
not
os
.
path
.
isfile
(
file_name
):
# 打开退回详情下载明细
open_url
(
page
,
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
open_url
(
f
"https://vendorcentral.amazon.com/katalmonsapp/vendor/members/returns/{return_id}"
)
mission
=
page
.
ele
(
"#file-download-button"
)
.
click
.
to_download
(
rename
=
file_name
)
mission
.
wait
()
...
...
@@ -119,60 +119,12 @@ def export_item(page, return_id):
return
pd
.
read_excel
(
file_name
,
engine
=
'xlrd'
)
def
open_xls
(
file_path
):
# 开始处理excel数据
workbook
=
xlrd
.
open_workbook
(
filename
=
file_path
)
# 选择工作表
return
workbook
.
sheet_by_index
(
0
)
# 选择第一个工作表
def
save_xls
(
data
,
output_file
):
df
=
pd
.
DataFrame
(
data
)
# 将 DataFrame 写入 Excel 文件
df
.
to_excel
(
output_file
,
index
=
False
)
# index=False 表示不写入行索引
# 使用 openpyxl 重新加载工作簿
wb
=
load_workbook
(
output_file
)
ws
=
wb
.
active
# 获取活动工作表
# 自动调整列宽
for
column
in
ws
.
columns
:
max_length
=
0
# 获取列字母
column_letter
=
column
[
0
]
.
column_letter
for
cell
in
column
:
try
:
if
len
(
str
(
cell
.
value
))
>
max_length
:
max_length
=
len
(
str
(
cell
.
value
))
except
:
pass
# 增加一些宽度以美观
adjusted_width
=
(
max_length
+
2
)
ws
.
column_dimensions
[
column_letter
]
.
width
=
adjusted_width
# 保存调整后的工作簿
wb
.
save
(
output_file
)
def
make_dir
(
path
):
# 检查下载目录是否存在,如果不存在则创建
if
not
os
.
path
.
exists
(
path
):
os
.
makedirs
(
path
)
return
False
return
True
def
get_input_with_default
(
prompt
,
default
):
user_input
=
input
(
f
"{prompt}(默认为 '{default}'):"
)
return
user_input
or
default
if
__name__
==
'__main__'
:
try
:
email
=
get_input_with_default
(
"请输入账户"
,
"us-cs001@khdtek.com"
)
print
(
f
"您输入的账户是
:
{email}"
)
password
=
get_input_with_default
(
"请输入密码"
,
"khd=20221208"
)
print
(
f
"您输入的
账户是:
{password}"
)
email
=
helper
.
get_input_with_default
(
"请输入账户"
,
"us-cs001@khdtek.com"
)
print
(
f
"您输入的账户是
:
{email}"
)
password
=
helper
.
get_input_with_default
(
"请输入密码"
,
"khd=20221208"
)
print
(
f
"您输入的
密码是:
{password}"
)
main
()
except
KeyboardInterrupt
:
...
...
tasks.py
View file @
8d5adcae
...
...
@@ -2,7 +2,7 @@
from
huey
import
RedisHuey
import
bootstrap
from
models
import
Store
,
Goods
from
models
.models
import
Store
,
Goods
bootstrap
.
init
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment