# v7 from __future__ import annotations import asyncio import json import logging import threading import time import uuid from datetime import UTC, datetime from typing import Any from urllib.parse import urlparse from PySide6.QtCore import QObject, Signal log = logging.getLogger(__name__) try: import websockets except Exception: websockets = None class CDPEvents(QObject): connected = Signal() disconnected = Signal() requestCaptured = Signal(dict) responseCaptured = Signal(dict) errorOccurred = Signal(str) class CDPClient: def __init__( self, db, websocket_url: str, capture_bodies: bool = False, backend=None, ) -> None: self.db = db self.websocket_url = websocket_url self.capture_bodies = capture_bodies self.backend = backend self.events = CDPEvents() self._loop = None self._thread = None self._socket = None self._running = False self._message_id = 0 self._requests: dict[ str, dict[str, Any] ] = {} self._pending: dict[ int, asyncio.Future, ] = {} self._reconnect_delay = 5 def start( self, ) -> None: if websockets is None: log.warning( "websockets package not installed" ) return if self._running: return self._running = True self._thread = threading.Thread( target=self._thread_main, daemon=True, name="CDPThread", ) self._thread.start() def stop( self, ) -> None: self._running = False if ( self._loop and self._socket ): try: asyncio.run_coroutine_threadsafe( self._socket.close(), self._loop, ) except Exception: pass if ( self._thread and self._thread.is_alive() ): self._thread.join( timeout=5 ) self._socket = None self._thread = None self._pending.clear() self._requests.clear() def send( self, method: str, params: dict | None = None, ) -> None: if ( not self._loop or not self._running ): return asyncio.run_coroutine_threadsafe( self._send( method, params or {}, ), self._loop, ) def _thread_main( self, ) -> None: self._loop = asyncio.new_event_loop() asyncio.set_event_loop( self._loop ) try: self._loop.run_until_complete( self._run() ) except Exception: log.exception( "cdp loop failed" ) finally: self._socket = None self._loop = None async def _run( self, ) -> None: while self._running: try: await self._connect() except Exception as exc: log.exception( "cdp connect failed" ) self.events.errorOccurred.emit( str(exc) ) if not self._running: break await asyncio.sleep( self._reconnect_delay ) async def _connect( self, ) -> None: async with websockets.connect( self.websocket_url, max_size=32 * 1024 * 1024, ping_interval=30, ping_timeout=30, close_timeout=10, ) as socket: self._socket = socket self._requests.clear() self.events.connected.emit() await self._send( "Network.enable", {}, ) await self._send( "Page.enable", {}, ) await self._send( "Runtime.enable", {}, ) await self._send( "Fetch.enable", { "patterns": [ { "urlPattern": "*", "requestStage": "Request", } ] }, ) while self._running: raw = await socket.recv() try: message = json.loads( raw ) except Exception: continue if "id" in message: future = self._pending.pop( message["id"], None, ) if ( future and not future.done() ): future.set_result( message ) continue await self._handle_message( message ) self._socket = None for future in ( self._pending.values() ): try: if not future.done(): future.cancel() except Exception: pass self._pending.clear() self.events.disconnected.emit() async def _send( self, method: str, params: dict, ) -> None: if not self._socket: return self._message_id += 1 payload = { "id": self._message_id, "method": method, "params": params, } try: await self._socket.send( json.dumps(payload) ) except Exception: log.exception( "cdp send failed" ) async def _send_wait( self, method: str, params: dict | None = None, timeout: float = 5.0, ) -> dict | None: if ( not self._socket or not self._loop ): return None self._message_id += 1 message_id = ( self._message_id ) payload = { "id": message_id, "method": method, "params": params or {}, } future = ( self._loop.create_future() ) self._pending[ message_id ] = future await self._socket.send( json.dumps(payload) ) try: return await asyncio.wait_for( future, timeout, ) except Exception: return None finally: self._pending.pop( message_id, None, ) async def _handle_message( self, message: dict, ) -> None: method = message.get( "method" ) if not method: return params = message.get( "params", {}, ) if ( method == "Network.requestWillBeSent" ): self._handle_request( params ) elif ( method == "Network.responseReceived" ): await self._handle_response( params ) elif ( method == "Network.loadingFailed" ): request_id = params.get( "requestId" ) if request_id: self._requests.pop( request_id, None, ) elif ( method == "Fetch.requestPaused" ): await self._continue_fetch( params ) async def _continue_fetch( self, params: dict, ) -> None: request_id = params.get( "requestId" ) if not request_id: return if self.backend: try: request_data = params.get( "request", {}, ) url = request_data.get( "url", "", ) try: host = ( urlparse(url) .hostname or "" ).lower() except Exception: host = "" request = { "url": url, "host": host, "headers": request_data.get( "headers", {}, ), } _, blocked = ( self.backend.process_request( request ) ) if blocked: await self._send( "Fetch.failRequest", { "requestId": request_id, "errorReason": "BlockedByClient", }, ) return except Exception: log.exception( "fetch policy evaluation failed" ) await self._send( "Fetch.continueRequest", { "requestId": request_id, }, ) def _handle_request( self, params: dict, ) -> None: request = params.get( "request", {}, ) request_id = ( params.get( "requestId" ) or str( uuid.uuid4() ) ) data = { "request_id": request_id, "timestamp": datetime.now( UTC ).isoformat(), "method": request.get( "method" ), "url": request.get( "url" ), "headers": request.get( "headers", {}, ), "start": time.time(), } if self.backend: try: data, blocked = ( self.backend.process_request( data ) ) if blocked: return except Exception: log.exception( "network pipeline failed" ) self._requests[ request_id ] = data self.events.requestCaptured.emit( data ) # v7 async def _fetch_body( self, request_id: str, ) -> str | None: result = await self._send_wait( "Network.getResponseBody", { "requestId": request_id, }, ) if not result: return None return ( result.get( "result", {}, ).get( "body" ) ) async def _handle_response( self, params: dict, ) -> None: request_id = params.get( "requestId" ) response = params.get( "response", {}, ) existing = ( self._requests.pop( request_id, {}, ) ) duration = None if "start" in existing: duration = ( time.time() - existing["start"] ) * 1000 url = existing.get( "url" ) try: host = ( urlparse( url ).hostname if url else None ) except Exception: host = None response_body = None if ( self.capture_bodies and request_id ): try: response_body = ( await self._fetch_body( request_id ) ) except Exception: pass record = { "request_id": request_id, "timestamp": existing.get( "timestamp" ), "method": existing.get( "method" ), "url": url, "host": host, "status": response.get( "status" ), "request_headers": existing.get( "headers", {}, ), "response_headers": response.get( "headers", {}, ), "duration_ms": duration, } try: self.db.queue_request( request_id=record[ "request_id" ], timestamp=record[ "timestamp" ], method=record[ "method" ], url=record[ "url" ], host=record[ "host" ], status_code=record[ "status" ], resource_type=response.get( "mimeType" ), request_headers=record[ "request_headers" ], response_headers=record[ "response_headers" ], request_body=None, response_body=response_body, duration_ms=record[ "duration_ms" ], ) except Exception: log.exception( "failed to persist request" ) self.events.responseCaptured.emit( record )