Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions components/ecoindex/scraper/scrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from ecoindex.models.compute import PageMetrics, Result, ScreenShot, WindowSize
from ecoindex.models.scraper import MimetypeAggregation, RequestItem, Requests
from ecoindex.utils.screenshots import convert_screenshot_to_webp, set_screenshot_rights
from playwright._impl._api_structures import SetCookieParam
from playwright.async_api import async_playwright
from playwright_stealth import stealth_async
from typing_extensions import deprecated


Expand All @@ -26,6 +26,8 @@ def __init__(
screenshot_gid: int | None = None,
page_load_timeout: int = 20,
headless: bool = True,
basic_auth: str | None = None,
cookies: list[SetCookieParam] = [],
):
self.url = url
self.window_size = window_size
Expand All @@ -41,6 +43,8 @@ def __init__(
f"/tmp/ecoindex-{self.now.strftime('%Y-%m-%d-%H-%M-%S-%f')}-{uuid4()}.har"
)
self.headless = headless
self.basic_auth = basic_auth
self.cookies = cookies

@deprecated("This method is useless with new version of EcoindexScraper")
def init_chromedriver(self):
Expand All @@ -67,12 +71,19 @@ async def get_requests_by_category(self) -> MimetypeAggregation:
async def scrap_page(self) -> PageMetrics:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=self.headless)
self.page = await browser.new_page(
self.context = await browser.new_context(
record_har_path=self.har_temp_file_path,
screen=self.window_size.model_dump(),
ignore_https_errors=True,
http_credentials={
"username": self.basic_auth.split(":")[0],
"password": self.basic_auth.split(":")[1],
}
if self.basic_auth
else None,
)
await stealth_async(self.page)
await self.context.add_cookies(self.cookies)
self.page = await self.context.new_page()
response = await self.page.goto(self.url)
await self.check_page_response(response)

Expand Down