Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
amazon_reports
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
common
amazon_reports
Commits
c46503f3
Commit
c46503f3
authored
Dec 21, 2024
by
邱阿朋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
spa查询
parent
5d07be2a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
27 additions
and
16 deletions
+27
-16
spa_search.py
src/spa_search.py
+27
-16
No files found.
src/spa_search.py
View file @
c46503f3
...
...
@@ -64,16 +64,27 @@ def get_report_table_html(invoice_id):
def
get_report_agreement_text
(
invoice_id
):
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
page
.
wait
(
1
)
# 下载报表
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_3"
)
.
click
()
page
.
wait
(
5
)
page
.
ele
(
"#return-to-invoice-overview-announce"
)
.
click
()
while
True
:
try
:
# 点击选项卡
page
.
ele
(
"#a-autoid-2-announce"
)
.
click
()
# 下载报表
page
.
ele
(
f
"#invoiceDownloads-{invoice_id}_3"
)
.
click
()
page
.
wait
(
3
)
break
except
ElementNotFoundError
:
log
.
warning
(
"元素未找到,刷新网页"
)
page
.
refresh
()
# 获取报表表单内容
tree
=
etree
.
HTML
(
page
.
html
)
# 找到包含表格的部分
table
=
tree
.
xpath
(
'//table[@width="90
%
"]'
)[
0
]
# 获取第一个(也是唯一一个)匹配的表格元素
# 找到包含表格的部分, # 获取第一个(也是唯一一个)匹配的表格元素
table
=
tree
.
xpath
(
'//table[@width="90
%
"]'
)
if
len
(
table
)
==
0
:
return
[]
table
=
table
[
0
]
# 获取所有表格行(tr),跳过表头
rows
=
table
.
xpath
(
'.//tr[position()>1]'
)
...
...
@@ -118,7 +129,7 @@ def export_item_read_data(invoice_id):
if
len
(
links
)
==
0
:
page
.
refresh
()
data_list
=
get_report_agreement_text
(
invoice_id
)
return
{
"
Sheet1
"
:
pd
.
DataFrame
(
data_list
)}
return
{
"
Accrual For Current Period
"
:
pd
.
DataFrame
(
data_list
)}
for
link
in
links
:
# 解析链接中的查询参数
...
...
@@ -128,12 +139,12 @@ def export_item_read_data(invoice_id):
filename
=
query_params
.
get
(
'fileName'
,
[
'未找到文件名'
])[
0
]
report_file_tmp_dir
=
f
"spa
\\
{invoice_id}
\\
{filename}
\\
"
host
=
domain
.
switch_domain
(
country
)
page
.
download
(
host
+
link
,
report_file_tmp_dir
,
show_msg
=
True
)
report_file
=
report_file_tmp_dir
+
"BackupReport.xls"
is_down
=
file
.
wait_for_downloads
(
report_file
,
60
)
if
is_down
is
False
:
raise
FileNotFoundError
(
f
"{report_file},文件不存在"
)
while
True
:
page
.
download
(
host
+
link
,
report_file_tmp_dir
,
show_msg
=
False
)
is_down
=
file
.
wait_for_downloads
(
report_file
,
60
)
if
is_down
:
break
log
.
warning
(
f
"下载 {invoice_id} 失败,重新下载"
)
try
:
df
=
pd
.
read_excel
(
report_file
)
...
...
@@ -196,7 +207,7 @@ def main():
if
processed_items
:
large_sheet_data
[
invoice_id_num
]
=
processed_items
else
:
item_list
=
item_dict
[
'Sheet1'
]
item_list
=
item_dict
.
get
(
'Accrual For Current Period'
)
# 如果是列表且长度 >= 10 则新增sheet
if
len
(
item_list
)
>=
10
:
processed_items
=
process_large_items
(
item_list
,
relation_data
)
...
...
@@ -270,7 +281,7 @@ def write_sheet(writer, data, sheet_name):
def
save_excel
(
sheet_data
,
large_sheet_data
,
new_file_name
):
"""保存数据到 Excel 文件"""
# 初始化 Excel 写入器
with
pd
.
ExcelWriter
(
new_file_name
,
engine
=
"openpyxl"
)
as
writer
:
with
pd
.
ExcelWriter
(
new_file_name
)
as
writer
:
# 写入小数据
if
sheet_data
:
log
.
info
(
f
"保存小数据,共计 {len(sheet_data)} 条"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment