This commit is contained in:
2026-06-08 06:16:54 +00:00
parent d65d8bb223
commit db1c6969a3
5 changed files with 3789 additions and 0 deletions

714
cdp.py Normal file
View File

@@ -0,0 +1,714 @@
# v7
from __future__ import annotations
import asyncio
import json
import logging
import threading
import time
import uuid
from datetime import UTC, datetime
from typing import Any
from urllib.parse import urlparse
from PySide6.QtCore import QObject, Signal
log = logging.getLogger(__name__)
try:
import websockets
except Exception:
websockets = None
class CDPEvents(QObject):
connected = Signal()
disconnected = Signal()
requestCaptured = Signal(dict)
responseCaptured = Signal(dict)
errorOccurred = Signal(str)
class CDPClient:
def __init__(
self,
db,
websocket_url: str,
capture_bodies: bool = False,
backend=None,
) -> None:
self.db = db
self.websocket_url = websocket_url
self.capture_bodies = capture_bodies
self.backend = backend
self.events = CDPEvents()
self._loop = None
self._thread = None
self._socket = None
self._running = False
self._message_id = 0
self._requests: dict[
str,
dict[str, Any]
] = {}
self._pending: dict[
int,
asyncio.Future,
] = {}
self._reconnect_delay = 5
def start(
self,
) -> None:
if websockets is None:
log.warning(
"websockets package not installed"
)
return
if self._running:
return
self._running = True
self._thread = threading.Thread(
target=self._thread_main,
daemon=True,
name="CDPThread",
)
self._thread.start()
def stop(
self,
) -> None:
self._running = False
if (
self._loop
and self._socket
):
try:
asyncio.run_coroutine_threadsafe(
self._socket.close(),
self._loop,
)
except Exception:
pass
if (
self._thread
and self._thread.is_alive()
):
self._thread.join(
timeout=5
)
self._socket = None
self._thread = None
self._pending.clear()
self._requests.clear()
def send(
self,
method: str,
params: dict | None = None,
) -> None:
if (
not self._loop
or not self._running
):
return
asyncio.run_coroutine_threadsafe(
self._send(
method,
params or {},
),
self._loop,
)
def _thread_main(
self,
) -> None:
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(
self._loop
)
try:
self._loop.run_until_complete(
self._run()
)
except Exception:
log.exception(
"cdp loop failed"
)
finally:
self._socket = None
self._loop = None
async def _run(
self,
) -> None:
while self._running:
try:
await self._connect()
except Exception as exc:
log.exception(
"cdp connect failed"
)
self.events.errorOccurred.emit(
str(exc)
)
if not self._running:
break
await asyncio.sleep(
self._reconnect_delay
)
async def _connect(
self,
) -> None:
async with websockets.connect(
self.websocket_url,
max_size=32 * 1024 * 1024,
ping_interval=30,
ping_timeout=30,
close_timeout=10,
) as socket:
self._socket = socket
self._requests.clear()
self.events.connected.emit()
await self._send(
"Network.enable",
{},
)
await self._send(
"Page.enable",
{},
)
await self._send(
"Runtime.enable",
{},
)
await self._send(
"Fetch.enable",
{
"patterns": [
{
"urlPattern": "*",
"requestStage": "Request",
}
]
},
)
while self._running:
raw = await socket.recv()
try:
message = json.loads(
raw
)
except Exception:
continue
if "id" in message:
future = self._pending.pop(
message["id"],
None,
)
if (
future
and not future.done()
):
future.set_result(
message
)
continue
await self._handle_message(
message
)
self._socket = None
for future in (
self._pending.values()
):
try:
if not future.done():
future.cancel()
except Exception:
pass
self._pending.clear()
self.events.disconnected.emit()
async def _send(
self,
method: str,
params: dict,
) -> None:
if not self._socket:
return
self._message_id += 1
payload = {
"id": self._message_id,
"method": method,
"params": params,
}
try:
await self._socket.send(
json.dumps(payload)
)
except Exception:
log.exception(
"cdp send failed"
)
async def _send_wait(
self,
method: str,
params: dict | None = None,
timeout: float = 5.0,
) -> dict | None:
if (
not self._socket
or not self._loop
):
return None
self._message_id += 1
message_id = (
self._message_id
)
payload = {
"id": message_id,
"method": method,
"params": params or {},
}
future = (
self._loop.create_future()
)
self._pending[
message_id
] = future
await self._socket.send(
json.dumps(payload)
)
try:
return await asyncio.wait_for(
future,
timeout,
)
except Exception:
return None
finally:
self._pending.pop(
message_id,
None,
)
async def _handle_message(
self,
message: dict,
) -> None:
method = message.get(
"method"
)
if not method:
return
params = message.get(
"params",
{},
)
if (
method
== "Network.requestWillBeSent"
):
self._handle_request(
params
)
elif (
method
== "Network.responseReceived"
):
await self._handle_response(
params
)
elif (
method
== "Network.loadingFailed"
):
request_id = params.get(
"requestId"
)
if request_id:
self._requests.pop(
request_id,
None,
)
elif (
method
== "Fetch.requestPaused"
):
await self._continue_fetch(
params
)
async def _continue_fetch(
self,
params: dict,
) -> None:
request_id = params.get(
"requestId"
)
if not request_id:
return
if self.backend:
try:
request_data = params.get(
"request",
{},
)
url = request_data.get(
"url",
"",
)
try:
host = (
urlparse(url)
.hostname
or ""
).lower()
except Exception:
host = ""
request = {
"url": url,
"host": host,
"headers": request_data.get(
"headers",
{},
),
}
_, blocked = (
self.backend.process_request(
request
)
)
if blocked:
await self._send(
"Fetch.failRequest",
{
"requestId":
request_id,
"errorReason":
"BlockedByClient",
},
)
return
except Exception:
log.exception(
"fetch policy evaluation failed"
)
await self._send(
"Fetch.continueRequest",
{
"requestId":
request_id,
},
)
def _handle_request(
self,
params: dict,
) -> None:
request = params.get(
"request",
{},
)
request_id = (
params.get(
"requestId"
)
or str(
uuid.uuid4()
)
)
data = {
"request_id":
request_id,
"timestamp":
datetime.now(
UTC
).isoformat(),
"method":
request.get(
"method"
),
"url":
request.get(
"url"
),
"headers":
request.get(
"headers",
{},
),
"start":
time.time(),
}
if self.backend:
try:
data, blocked = (
self.backend.process_request(
data
)
)
if blocked:
return
except Exception:
log.exception(
"network pipeline failed"
)
self._requests[
request_id
] = data
self.events.requestCaptured.emit(
data
)
# v7
async def _fetch_body(
self,
request_id: str,
) -> str | None:
result = await self._send_wait(
"Network.getResponseBody",
{
"requestId":
request_id,
},
)
if not result:
return None
return (
result.get(
"result",
{},
).get(
"body"
)
)
async def _handle_response(
self,
params: dict,
) -> None:
request_id = params.get(
"requestId"
)
response = params.get(
"response",
{},
)
existing = (
self._requests.pop(
request_id,
{},
)
)
duration = None
if "start" in existing:
duration = (
time.time()
- existing["start"]
) * 1000
url = existing.get(
"url"
)
try:
host = (
urlparse(
url
).hostname
if url
else None
)
except Exception:
host = None
response_body = None
if (
self.capture_bodies
and request_id
):
try:
response_body = (
await self._fetch_body(
request_id
)
)
except Exception:
pass
record = {
"request_id":
request_id,
"timestamp":
existing.get(
"timestamp"
),
"method":
existing.get(
"method"
),
"url":
url,
"host":
host,
"status":
response.get(
"status"
),
"request_headers":
existing.get(
"headers",
{},
),
"response_headers":
response.get(
"headers",
{},
),
"duration_ms":
duration,
}
try:
self.db.queue_request(
request_id=record[
"request_id"
],
timestamp=record[
"timestamp"
],
method=record[
"method"
],
url=record[
"url"
],
host=record[
"host"
],
status_code=record[
"status"
],
resource_type=response.get(
"mimeType"
),
request_headers=record[
"request_headers"
],
response_headers=record[
"response_headers"
],
request_body=None,
response_body=response_body,
duration_ms=record[
"duration_ms"
],
)
except Exception:
log.exception(
"failed to persist request"
)
self.events.responseCaptured.emit(
record
)