- Implement GhostCursorEngine with Bezier curves/Fitts's Law in src/browser/ghost_cursor.py - Implement EntropyScheduler (Gaussian jitter/phase drift) in src/core/scheduler.py - Implement MobileProxyRotator (sticky sessions) in src/core/proxy.py - Update CamoufoxManager to target Chrome 124 for TLS consistency - Add manual TLS verification script (tests/manual/verify_tls.py) - Update implementation plan and walkthrough documentation
121 lines
4.4 KiB
Python
121 lines
4.4 KiB
Python
import asyncio
|
|
import logging
|
|
from typing import Optional, Dict, Any
|
|
from playwright.async_api import async_playwright, BrowserContext, Page, Browser
|
|
from src.core.session import SessionState
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class CamoufoxManager:
|
|
"""
|
|
Manages the lifecycle of a Camoufox (Playwright) browser instance.
|
|
Handles initialization, navigation, and session state extraction.
|
|
"""
|
|
def __init__(self, headless: bool = True, proxy: Optional[Dict[str, str]] = None):
|
|
self.headless = headless
|
|
self.proxy = proxy
|
|
self.playwright = None
|
|
self.browser: Optional[Browser] = None
|
|
self.context: Optional[BrowserContext] = None
|
|
self.page: Optional[Page] = None
|
|
# Updated to Chrome 124 to align with newer Playwright builds and curl_cffi support
|
|
self._dummy_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
|
|
|
async def __aenter__(self):
|
|
await self.initialize()
|
|
return self
|
|
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
await self.close()
|
|
|
|
async def initialize(self) -> None:
|
|
"""
|
|
Launch the browser and create a context.
|
|
"""
|
|
logger.info("Initializing CamoufoxManager...")
|
|
self.playwright = await async_playwright().start()
|
|
|
|
# Launch options
|
|
launch_args = ["--disable-blink-features=AutomationControlled"]
|
|
|
|
self.browser = await self.playwright.chromium.launch(
|
|
headless=self.headless,
|
|
args=launch_args,
|
|
proxy=self.proxy
|
|
)
|
|
|
|
# Context options usually come from a profile (e.g. BrowserForge)
|
|
# For MVP, we set a fixed User-Agent and Viewport
|
|
self.context = await self.browser.new_context(
|
|
user_agent=self._dummy_user_agent,
|
|
viewport={"width": 1920, "height": 1080},
|
|
locale="en-US",
|
|
timezone_id="America/New_York"
|
|
)
|
|
|
|
# Anti-detection scripts would be injected here
|
|
await self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
|
|
self.page = await self.context.new_page()
|
|
logger.info("Browser initialized.")
|
|
|
|
async def close(self) -> None:
|
|
"""
|
|
Clean up resources aggressively.
|
|
"""
|
|
logger.info("Closing CamoufoxManager resources...")
|
|
if self.context:
|
|
await self.context.close()
|
|
if self.browser:
|
|
await self.browser.close()
|
|
if self.playwright:
|
|
await self.playwright.stop()
|
|
logger.info("Resources cleaned up.")
|
|
|
|
async def navigate(self, url: str) -> None:
|
|
"""
|
|
Navigate to a URL properly.
|
|
"""
|
|
if not self.page:
|
|
raise RuntimeError("Browser not initialized")
|
|
logger.info(f"Navigating to {url}")
|
|
await self.page.goto(url, wait_until='domcontentloaded')
|
|
|
|
async def extract_session_state(self) -> SessionState:
|
|
"""
|
|
Extract cookies, storage, and fingerprint details into SessionState.
|
|
"""
|
|
if not self.context or not self.page:
|
|
raise RuntimeError("Browser not initialized")
|
|
|
|
logger.info("Extracting session state...")
|
|
|
|
# 1. Cookies
|
|
cookies = await self.context.cookies()
|
|
|
|
# 2. Local Storage
|
|
local_storage = await self.page.evaluate("() => JSON.stringify(window.localStorage)")
|
|
import json
|
|
local_storage_dict = json.loads(local_storage)
|
|
|
|
# 3. Session Storage
|
|
session_storage = await self.page.evaluate("() => JSON.stringify(window.sessionStorage)")
|
|
session_storage_dict = json.loads(session_storage)
|
|
|
|
# 4. CF Clearance (Search in cookies)
|
|
cf_clearance = next((c for c in cookies if c['name'] == 'cf_clearance'), None)
|
|
|
|
# 5. TLS Fingerprint (In a real scenario, this matches the browser build)
|
|
# Updated to match the UA
|
|
tls_fingerprint = "chrome124"
|
|
|
|
return SessionState(
|
|
cookies=cookies,
|
|
local_storage=local_storage_dict,
|
|
session_storage=session_storage_dict,
|
|
cf_clearance=cf_clearance,
|
|
user_agent=self._dummy_user_agent,
|
|
tls_fingerprint=tls_fingerprint
|
|
)
|