From a15ca58ef8cea34434f78ffe8f1be11307670b71 Mon Sep 17 00:00:00 2001 From: Luciabrightcode Date: Mon, 22 Dec 2025 17:37:53 +0800 Subject: [PATCH] to implement e2e tests --- src/browser/manager.py | 120 +++++++++++++++++++++++++++++++++++++ src/extractor/client.py | 73 ++++++++++++++++++++++ tests/e2e/test_handover.py | 66 ++++++++++++++++++++ tests/manual/verify_tls.py | 75 +++++++++++++++++++++++ 4 files changed, 334 insertions(+) create mode 100644 src/browser/manager.py create mode 100644 src/extractor/client.py create mode 100644 tests/e2e/test_handover.py create mode 100644 tests/manual/verify_tls.py diff --git a/src/browser/manager.py b/src/browser/manager.py new file mode 100644 index 0000000..ffac2b6 --- /dev/null +++ b/src/browser/manager.py @@ -0,0 +1,120 @@ +import asyncio +import logging +from typing import Optional, Dict, Any +from playwright.async_api import async_playwright, BrowserContext, Page, Browser +from src.core.session import SessionState + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class CamoufoxManager: + """ + Manages the lifecycle of a Camoufox (Playwright) browser instance. + Handles initialization, navigation, and session state extraction. + """ + def __init__(self, headless: bool = True, proxy: Optional[Dict[str, str]] = None): + self.headless = headless + self.proxy = proxy + self.playwright = None + self.browser: Optional[Browser] = None + self.context: Optional[BrowserContext] = None + self.page: Optional[Page] = None + self._dummy_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + + async def __aenter__(self): + await self.initialize() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + async def initialize(self) -> None: + """ + Launch the browser and create a context. + """ + logger.info("Initializing CamoufoxManager...") + self.playwright = await async_playwright().start() + + # Launch options + launch_args = ["--disable-blink-features=AutomationControlled"] + + self.browser = await self.playwright.chromium.launch( + headless=self.headless, + args=launch_args, + proxy=self.proxy + ) + + # Context options usually come from a profile (e.g. BrowserForge) + # For MVP, we set a fixed User-Agent and Viewport + self.context = await self.browser.new_context( + user_agent=self._dummy_user_agent, + viewport={"width": 1920, "height": 1080}, + locale="en-US", + timezone_id="America/New_York" + ) + + # Anti-detection scripts would be injected here + await self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") + + self.page = await self.context.new_page() + logger.info("Browser initialized.") + + async def close(self) -> None: + """ + Clean up resources aggressively. + """ + logger.info("Closing CamoufoxManager resources...") + if self.context: + await self.context.close() + if self.browser: + await self.browser.close() + if self.playwright: + await self.playwright.stop() + logger.info("Resources cleaned up.") + + async def navigate(self, url: str) -> None: + """ + Navigate to a URL properly. + """ + if not self.page: + raise RuntimeError("Browser not initialized") + logger.info(f"Navigating to {url}") + await self.page.goto(url, wait_until='domcontentloaded') + + async def extract_session_state(self) -> SessionState: + """ + Extract cookies, storage, and fingerprint details into SessionState. + """ + if not self.context or not self.page: + raise RuntimeError("Browser not initialized") + + logger.info("Extracting session state...") + + # 1. Cookies + cookies = await self.context.cookies() + + # 2. Local Storage + local_storage = await self.page.evaluate("() => JSON.stringify(window.localStorage)") + import json + local_storage_dict = json.loads(local_storage) + + # 3. Session Storage + session_storage = await self.page.evaluate("() => JSON.stringify(window.sessionStorage)") + session_storage_dict = json.loads(session_storage) + + # 4. CF Clearance (Search in cookies) + cf_clearance = next((c for c in cookies if c['name'] == 'cf_clearance'), None) + + # 5. TLS Fingerprint (In a real scenario, this matches the browser build) + # For now, we hardcode what we expect to match the Extractor + tls_fingerprint = "chrome120" + + return SessionState( + cookies=cookies, + local_storage=local_storage_dict, + session_storage=session_storage_dict, + cf_clearance=cf_clearance, + user_agent=self._dummy_user_agent, + tls_fingerprint=tls_fingerprint + ) diff --git a/src/extractor/client.py b/src/extractor/client.py new file mode 100644 index 0000000..84ca88e --- /dev/null +++ b/src/extractor/client.py @@ -0,0 +1,73 @@ +from curl_cffi.requests import AsyncSession +from src.core.session import SessionState +import logging + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class CurlClient: + """ + High-performance extraction client using curl_cffi. + Mimics the TLS fingerprint and header profile of the browser session. + """ + def __init__(self, session_state: SessionState): + self.session_state = session_state + self.session = None + + async def __aenter__(self): + await self.initialize() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + async def initialize(self) -> None: + """ + Configure the curl_cffi session with matching fingerprint. + """ + logger.info("Initializing CurlClient...") + + # impersonate argument controls TLS Client Hello + # 'chrome120' matches our hardcoded Camoufox build in this MVP + self.session = AsyncSession(impersonate=self.session_state.tls_fingerprint) + + # 1. Inject Cookies + for cookie in self.session_state.cookies: + # curl_cffi expects specific arguments for setting cookies if done manually, + # or we can use the cookies parameter in requests. + # But AsyncSession has a cookie jar. + self.session.cookies.set( + name=cookie['name'], + value=cookie['value'], + domain=cookie['domain'], + path=cookie.get('path', '/') + # secure is handled by protocol + ) + + # 2. Set Headers + # We need to explicitly set headers that match the browser + self.session.headers = { + "User-Agent": self.session_state.user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "Accept-Language": "en-US,en;q=0.9", + # Add sec-ch-ua derivation here if strict mode + } + + logger.info(f"CurlClient initialized with impersonation: {self.session_state.tls_fingerprint}") + + async def close(self) -> None: + if self.session: + self.session.close() + + async def fetch(self, url: str) -> str: + """ + Execute a GET request using the impersonated session. + """ + if not self.session: + raise RuntimeError("Client not initialized") + + logger.info(f"Fetching {url}...") + response = await self.session.get(url) + logger.info(f"Response status: {response.status_code}") + return response.text diff --git a/tests/e2e/test_handover.py b/tests/e2e/test_handover.py new file mode 100644 index 0000000..d6adfde --- /dev/null +++ b/tests/e2e/test_handover.py @@ -0,0 +1,66 @@ +import asyncio +import pytest +from aiohttp import web +from src.browser.manager import CamoufoxManager +from src.extractor.client import CurlClient + +# Global to store received headers for verification +received_headers = [] + +async def handle_request(request): + """ + Mock endpoint that logs headers. + """ + headers = dict(request.headers) + received_headers.append(headers) + return web.json_response({"status": "ok", "headers": headers}) + +async def start_mock_server(port=8080): + app = web.Application() + app.router.add_get('/', handle_request) + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, 'localhost', port) + await site.start() + return runner + +@pytest.mark.asyncio +async def test_handover_e2e(): + """ + Test the full loop: + 1. Browser authenticates (hits mock) and extracts state. + 2. Extractor initializes with state and hits mock. + 3. Verify User-Agent consistency. + """ + port = 8888 + base_url = f"http://localhost:{port}/" + runner = await start_mock_server(port) + + received_headers.clear() + + try: + # 1. Browser Phase + async with CamoufoxManager() as browser: + await browser.navigate(base_url) + session_state = await browser.extract_session_state() + + assert len(received_headers) == 1 + browser_headers = received_headers[0] + + # 2. Extractor Phase + async with CurlClient(session_state) as extractor: + await extractor.fetch(base_url) + + assert len(received_headers) == 2 + extractor_headers = received_headers[1] + + # 3. Verification + print(f"Browser UA: {browser_headers.get('User-Agent')}") + print(f"Extractor UA: {extractor_headers.get('User-Agent')}") + + assert browser_headers.get('User-Agent') == extractor_headers.get('User-Agent') + # Note: Other headers might vary slightly due to browser vs curl defaults, + # but UA must be exact. + + finally: + await runner.cleanup() diff --git a/tests/manual/verify_tls.py b/tests/manual/verify_tls.py new file mode 100644 index 0000000..7a0ee22 --- /dev/null +++ b/tests/manual/verify_tls.py @@ -0,0 +1,75 @@ +import asyncio +import json +from src.browser.manager import CamoufoxManager +from src.extractor.client import CurlClient + +TARGET_URL = "https://tls.peet.ws/api/all" + +async def main(): + print(f"Verifying TLS Fingerprints against {TARGET_URL}...\n") + + # 1. Browser + print(">>> 1. CAMOUFOX BROWSER REQUEST") + browser_fp = None + session_state = None + + try: + async with CamoufoxManager(headless=True) as browser: + await browser.navigate(TARGET_URL) + # Get the page content (JSON) + content = await browser.page.content() + # Playwright content() returns HTML, but usage of verify API returns JSON text usually wrapped in pre or body. + # actually tls.peet.ws/api/all returns JSON. Browser renders it. + # To get strict JSON we can use evaluate + json_text = await browser.page.evaluate("() => document.body.innerText") + try: + browser_fp = json.loads(json_text) + print("Captured Browser Fingerprint:") + print(json.dumps(browser_fp.get('tls', {}), indent=2)) + except: + print("Could not parse JSON from browser page.") + print(json_text[:200]) + + session_state = await browser.extract_session_state() + except Exception as e: + print(f"Browser failed: {e}") + return + + if not session_state: + print("Failed to get session state.") + return + + print("\n------------------------------------------------\n") + + # 2. Extractor + print(">>> 2. CURL EXTRACTOR REQUEST") + try: + async with CurlClient(session_state) as extractor: + json_text = await extractor.fetch(TARGET_URL) + try: + extractor_fp = json.loads(json_text) + print("Captured Extractor Fingerprint:") + print(json.dumps(extractor_fp.get('tls', {}), indent=2)) + + # Comparison + b_ja3 = browser_fp.get('tls', {}).get('ja3_hash') + e_ja3 = extractor_fp.get('tls', {}).get('ja3_hash') + + print(f"\nMatch Result:") + print(f"Browser JA3: {b_ja3}") + print(f"Extractor JA3: {e_ja3}") + + if b_ja3 == e_ja3: + print("✅ SUCCESS: JA3 Hashes Match!") + else: + print("❌ FAILURE: JA3 Mismatch.") + + except: + print("Could not parse JSON from extractor response.") + print(json_text[:200]) + + except Exception as e: + print(f"Extractor failed: {e}") + +if __name__ == "__main__": + asyncio.run(main())