palywright截图
palywright元素截图
from playwright.sync_api import sync_playwright
port=9222
with sync_playwright() as playwright:
chrome_context = playwright.chromium.connect_over_cdp("http://127.0.0.1:" + str(port)).contexts[0]
page = chrome_context.pages[0]
# 等待搜索框加载完成
search_box = page.wait_for_selector('#wrap-base')
# 获取搜索框的边界框
box = search_box.bounding_box()
# 截取搜索框的截图
screenshot = search_box.screenshot(path='screenshot.png')
print(f'截图已保存到 {screenshot}')
遍历子元素,并对每个子元素进行截图;
from playwright.sync_api import sync_playwright
import docx
doc = docx.Document()
port=9222
with sync_playwright() as playwright:
chrome_context = playwright.chromium.connect_over_cdp("http://127.0.0.1:" + str(port)).contexts[0]
page = chrome_context.pages[0]
# 等待搜索框加载完成
# search_box = page.wait_for_selector('#wrap-base')
# 获取父元素
parent_selector = '#content1'
parent_element = page.wait_for_selector(parent_selector)
print(parent_element,type(parent_element))
# 获取所有子元素
child_elements = parent_element.query_selector_all('div')
# print(child_elements)
# 遍历子元素列表,对每个子元素进行操作
for i, child_element in enumerate(child_elements):
# 获取子元素的边界框
print(i)
box = child_element.bounding_box()
# 截图子元素
try:
screenshot = child_element.screenshot(path=f'{i}.png')
except:
pass
doc.add_picture('1.png')
doc.save('截图.docx')
用playwright截图的方法,和将图片保存到文档里面的方法;保存文档后,图片自适应调整宽度;
def img_screenshot(page,path_selector):
'''
palywright截图;给定selector元素进行截图
:path_selector 需要截图的路径
:param page:
:return:
'''
# 等待搜索框加载完成
print('---------进入截图模块---------')
search_box = page.wait_for_selector(path_selector)
# 获取搜索框的边界框
box = search_box.bounding_box()
# 截取搜索框的截图
save_img_path = 'screenshot.png'
screenshot = search_box.screenshot(path=save_img_path)
print(f'截图已保存到 {screenshot}')
return save_img_path
def imgToDocx(doc,text_content,imgName):
'''
将文字和图片追加写入文档内;
图片自动调整适应文档宽度,
:param doc: 文档对象
:param textcotun: 需要追加写入的文本
:param imgName: 追加添加的图片
:return:
'''
doc.add_paragraph(text_content)#追加将文本写入文档
doc.add_picture(imgName)#追加将图片写入文档内
# 获取插入的图片
shape = doc.inline_shapes[-1]
# 获取文档的页边距、页面宽度、页面高度等信息
section = doc.sections[-1]
page_width = section.page_width - section.left_margin - section.right_margin
page_height = section.page_height - section.top_margin - section.bottom_margin
# 计算图片的宽高比例
width, height = shape.width, shape.height
aspect_ratio = width / height
# 根据文档的尺寸和图片的宽高比例,调整图片的大小
if aspect_ratio > page_width / page_height:
shape.width = int(page_width)
shape.height = int(page_width / aspect_ratio)
else:
shape.height = int(page_height)
shape.width = int(page_height * aspect_ratio)
pass
调用截图和保存文档的方法
from playwright.sync_api import sync_playwright
import docx
from public import img_screenshot, imgToDocx
doc = docx.Document()
port=9222
with sync_playwright() as playwright:
chrome_context = playwright.chromium.connect_over_cdp("http://127.0.0.1:" + str(port)).contexts[0]
page = chrome_context.pages[0]
print(page)
print('开始截图')
path_selector='body > div.container > div.container1 > div.result > div.page > div.nameBox.clearfix'
imgName=img_screenshot(page, path_selector)
print('截图完成')
text_content='保存图片。。。。。。。'
imgToDocx(doc, text_content, imgName)
doc.save('截图保存.docx')
屏幕截图
from playwright.sync_api import Playwright, sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto('https://www.baidu.com')
page.screenshot(path=r'截图\baidu.png')
browser.close()
页面刷新
在 Playwright 中刷新页面可以使用 page.reload() 方法。这个方法会重新加载当前页,类似于手动点击浏览器的刷新按钮。
以下是使用 Playwright 实现页面刷新的示例代码:
from playwright.sync_api import Playwright, sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.new_page()
page.goto('https://www.example.com')
# 等待页面加载
page.wait_for_load_state()
# 刷新页面
page.reload()
# 等待页面加载
page.wait_for_load_state()
browser.close()
在上面的代码中,我们首先使用 p.chromium.launch() 方法启动 Chromium 浏览器。然后使用 browser.new_page() 方法创建一个新页面,并使用 page.goto() 方法打开 https://www.example.com 网址。接着使用 page.wait_for_load_state() 等待页面加载完成,然后调用 page.reload() 方法刷新页面。最后再次使用 page.wait_for_load_state() 等待页面加载完成,最后使用 browser.close() 方法关闭浏览器。
滚动页面
import time
from playwright.sync_api import Playwright, sync_playwright
def main(playwright: Playwright) -> None:
browser = playwright.chromium.launch()
page = browser.new_page()
page.goto('https://www.example.com')
# 将页面滚动到最下方
last_position = None
while True:
page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
time.sleep(1)
current_position = page.evaluate_handle('window.pageYOffset').json_value()
if current_position == last_position:
break
last_position = current_position
# 关闭浏览器
browser.close()
with sync_playwright() as playwright:
main(playwright)