From 9bde7e73871da081b2426f8e039379af1f160fee Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Sat, 22 Mar 2025 15:07:59 -0400 Subject: [PATCH 1/2] Update CDP Mode --- examples/cdp_mode/ReadMe.md | 1 + seleniumbase/core/browser_launcher.py | 12 ++++++- seleniumbase/core/sb_cdp.py | 6 ++++ seleniumbase/undetected/__init__.py | 8 +++-- seleniumbase/undetected/cdp_driver/tab.py | 40 ++++++++++++++++++++--- 5 files changed, 60 insertions(+), 7 deletions(-) diff --git a/examples/cdp_mode/ReadMe.md b/examples/cdp_mode/ReadMe.md index fb339ee2a49..603513bae16 100644 --- a/examples/cdp_mode/ReadMe.md +++ b/examples/cdp_mode/ReadMe.md @@ -512,6 +512,7 @@ sb.cdp.scroll_to_bottom() sb.cdp.scroll_up(amount=25) sb.cdp.scroll_down(amount=25) sb.cdp.save_screenshot(name, folder=None, selector=None) +sb.cdp.print_to_pdf(name, folder=None) ``` -------- diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py index 617ae229dcf..a066b5ad75e 100644 --- a/seleniumbase/core/browser_launcher.py +++ b/seleniumbase/core/browser_launcher.py @@ -1,4 +1,5 @@ import fasteners +import json import logging import os import platform @@ -769,6 +770,7 @@ def uc_open_with_cdp_mode(driver, url=None): cdp.scroll_up = CDPM.scroll_up cdp.scroll_down = CDPM.scroll_down cdp.save_screenshot = CDPM.save_screenshot + cdp.print_to_pdf = CDPM.print_to_pdf cdp.page = page # async world cdp.driver = driver.cdp_base # async world cdp.tab = cdp.page # shortcut (original) @@ -2125,6 +2127,15 @@ def _set_chrome_options( prefs["enable_do_not_track"] = True if external_pdf: prefs["plugins.always_open_pdf_externally"] = True + pdf_settings = { + "recentDestinations": [ + {"id": "Save as PDF", "origin": "local", "account": ""} + ], + "selectedDestinationId": "Save as PDF", + "version": 2, + } + app_state = "printing.print_preview_sticky_settings.appState" + prefs[app_state] = json.dumps(pdf_settings) if proxy_string or proxy_pac_url: # Implementation of https://stackoverflow.com/q/65705775/7058266 prefs["webrtc.ip_handling_policy"] = "disable_non_proxied_udp" @@ -3299,7 +3310,6 @@ def get_remote_driver( from seleniumbase.core import capabilities_parser desired_caps = capabilities_parser.get_desired_capabilities(cap_file) if cap_string: - import json try: extra_caps = json.loads(str(cap_string)) except Exception as e: diff --git a/seleniumbase/core/sb_cdp.py b/seleniumbase/core/sb_cdp.py index ed017b17a01..a19f29e719c 100644 --- a/seleniumbase/core/sb_cdp.py +++ b/seleniumbase/core/sb_cdp.py @@ -2181,6 +2181,12 @@ def save_screenshot(self, name, folder=None, selector=None): else: self.select(selector).save_screenshot(filename) + def print_to_pdf(self, name, folder=None): + filename = name + if folder: + filename = os.path.join(folder, name) + self.loop.run_until_complete(self.page.print_to_pdf(filename)) + class Chrome(CDPMethods): def __init__(self, url=None, **kwargs): diff --git a/seleniumbase/undetected/__init__.py b/seleniumbase/undetected/__init__.py index 5635d63562f..b2c5962252f 100644 --- a/seleniumbase/undetected/__init__.py +++ b/seleniumbase/undetected/__init__.py @@ -459,7 +459,9 @@ def reconnect(self, timeout=0.1): if self.current_url.startswith( "chrome-extension://" ): - self.close() + # https://issues.chromium.org/issues/396611138 + # (Uncomment below when resolved) + # self.close() if self.service.is_connectable(): self.stop_client() self.service.stop() @@ -496,7 +498,9 @@ def connect(self): if self.current_url.startswith( "chrome-extension://" ): - self.close() + # https://issues.chromium.org/issues/396611138 + # (Uncomment below when resolved) + # self.close() if self.service.is_connectable(): self.stop_client() self.service.stop() diff --git a/seleniumbase/undetected/cdp_driver/tab.py b/seleniumbase/undetected/cdp_driver/tab.py index 3154ff209fb..f46d2583470 100644 --- a/seleniumbase/undetected/cdp_driver/tab.py +++ b/seleniumbase/undetected/cdp_driver/tab.py @@ -1,7 +1,10 @@ from __future__ import annotations import asyncio +import base64 +import datetime import logging import pathlib +import urllib.parse import warnings from typing import Dict, List, Union, Optional, Tuple from . import browser as cdp_browser @@ -1133,9 +1136,6 @@ async def save_screenshot( :return: The path/filename of the saved screenshot. :rtype: str """ - import urllib.parse - import datetime - await self.sleep() # Update the target's URL path = None if format.lower() in ["jpg", "jpeg"]: @@ -1166,8 +1166,40 @@ async def save_screenshot( "Most possible cause is the page " "has not finished loading yet." ) - import base64 + data_bytes = base64.b64decode(data) + if not path: + raise RuntimeError("Invalid filename or path: '%s'" % filename) + path.write_bytes(data_bytes) + return str(path) + async def print_to_pdf( + self, + filename: Optional[PathLike] = "auto", + ) -> str: + """ + Saves a webpage as a PDF. + :param filename: uses this as the save path + :type filename: PathLike + :return: The path/filename of the saved screenshot. + :rtype: str + """ + await self.sleep() # Update the target's URL + path = None + ext = ".pdf" + if not filename or filename == "auto": + parsed = urllib.parse.urlparse(self.target.url) + parts = parsed.path.split("/") + last_part = parts[-1] + last_part = last_part.rsplit("?", 1)[0] + dt_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + candidate = f"{parsed.hostname}__{last_part}_{dt_str}" + path = pathlib.Path(candidate + ext) # noqa + else: + path = pathlib.Path(filename) + path.parent.mkdir(parents=True, exist_ok=True) + data, _ = await self.send(cdp.page.print_to_pdf()) + if not data: + raise ProtocolException("Could not save PDF.") data_bytes = base64.b64decode(data) if not path: raise RuntimeError("Invalid filename or path: '%s'" % filename) From 3d12e023c18e5724697b60f498470ef23a63ac91 Mon Sep 17 00:00:00 2001 From: Michael Mintz Date: Sat, 22 Mar 2025 15:08:09 -0400 Subject: [PATCH 2/2] Version 4.36.1 --- seleniumbase/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py index 788b6138789..5b06a61e690 100755 --- a/seleniumbase/__version__.py +++ b/seleniumbase/__version__.py @@ -1,2 +1,2 @@ # seleniumbase package -__version__ = "4.36.0" +__version__ = "4.36.1"