import asyncio import logging from typing import Optional, Dict, Any from playwright.async_api import async_playwright, BrowserContext, Page, Browser from src.core.session import SessionState # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class CamoufoxManager: """ Manages the lifecycle of a Camoufox (Playwright) browser instance. Handles initialization, navigation, and session state extraction. """ def __init__(self, headless: bool = True, proxy: Optional[Dict[str, str]] = None): self.headless = headless self.proxy = proxy self.playwright = None self.browser: Optional[Browser] = None self.context: Optional[BrowserContext] = None self.page: Optional[Page] = None # Updated to Chrome 124 to align with newer Playwright builds and curl_cffi support self._dummy_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" async def __aenter__(self): await self.initialize() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.close() async def initialize(self) -> None: """ Launch the browser and create a context. """ logger.info("Initializing CamoufoxManager...") self.playwright = await async_playwright().start() # Launch options launch_args = ["--disable-blink-features=AutomationControlled"] self.browser = await self.playwright.chromium.launch( headless=self.headless, args=launch_args, proxy=self.proxy ) # Context options usually come from a profile (e.g. BrowserForge) # For MVP, we set a fixed User-Agent and Viewport self.context = await self.browser.new_context( user_agent=self._dummy_user_agent, viewport={"width": 1920, "height": 1080}, locale="en-US", timezone_id="America/New_York" ) # Anti-detection scripts would be injected here await self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") self.page = await self.context.new_page() logger.info("Browser initialized.") async def close(self) -> None: """ Clean up resources aggressively. """ logger.info("Closing CamoufoxManager resources...") if self.context: await self.context.close() if self.browser: await self.browser.close() if self.playwright: await self.playwright.stop() logger.info("Resources cleaned up.") async def navigate(self, url: str) -> None: """ Navigate to a URL properly. """ if not self.page: raise RuntimeError("Browser not initialized") logger.info(f"Navigating to {url}") await self.page.goto(url, wait_until='domcontentloaded') async def extract_session_state(self) -> SessionState: """ Extract cookies, storage, and fingerprint details into SessionState. """ if not self.context or not self.page: raise RuntimeError("Browser not initialized") logger.info("Extracting session state...") # 1. Cookies cookies = await self.context.cookies() # 2. Local Storage local_storage = await self.page.evaluate("() => JSON.stringify(window.localStorage)") import json local_storage_dict = json.loads(local_storage) # 3. Session Storage session_storage = await self.page.evaluate("() => JSON.stringify(window.sessionStorage)") session_storage_dict = json.loads(session_storage) # 4. CF Clearance (Search in cookies) cf_clearance = next((c for c in cookies if c['name'] == 'cf_clearance'), None) # 5. TLS Fingerprint (In a real scenario, this matches the browser build) # Updated to match the UA tls_fingerprint = "chrome124" return SessionState( cookies=cookies, local_storage=local_storage_dict, session_storage=session_storage_dict, cf_clearance=cf_clearance, user_agent=self._dummy_user_agent, tls_fingerprint=tls_fingerprint )