to implement e2e tests

This commit is contained in:
Luciabrightcode 2025-12-22 17:37:53 +08:00
parent ef370dacff
commit a15ca58ef8
4 changed files with 334 additions and 0 deletions

120
src/browser/manager.py Normal file
View file

@ -0,0 +1,120 @@
import asyncio
import logging
from typing import Optional, Dict, Any
from playwright.async_api import async_playwright, BrowserContext, Page, Browser
from src.core.session import SessionState
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CamoufoxManager:
"""
Manages the lifecycle of a Camoufox (Playwright) browser instance.
Handles initialization, navigation, and session state extraction.
"""
def __init__(self, headless: bool = True, proxy: Optional[Dict[str, str]] = None):
self.headless = headless
self.proxy = proxy
self.playwright = None
self.browser: Optional[Browser] = None
self.context: Optional[BrowserContext] = None
self.page: Optional[Page] = None
self._dummy_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
async def __aenter__(self):
await self.initialize()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.close()
async def initialize(self) -> None:
"""
Launch the browser and create a context.
"""
logger.info("Initializing CamoufoxManager...")
self.playwright = await async_playwright().start()
# Launch options
launch_args = ["--disable-blink-features=AutomationControlled"]
self.browser = await self.playwright.chromium.launch(
headless=self.headless,
args=launch_args,
proxy=self.proxy
)
# Context options usually come from a profile (e.g. BrowserForge)
# For MVP, we set a fixed User-Agent and Viewport
self.context = await self.browser.new_context(
user_agent=self._dummy_user_agent,
viewport={"width": 1920, "height": 1080},
locale="en-US",
timezone_id="America/New_York"
)
# Anti-detection scripts would be injected here
await self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
self.page = await self.context.new_page()
logger.info("Browser initialized.")
async def close(self) -> None:
"""
Clean up resources aggressively.
"""
logger.info("Closing CamoufoxManager resources...")
if self.context:
await self.context.close()
if self.browser:
await self.browser.close()
if self.playwright:
await self.playwright.stop()
logger.info("Resources cleaned up.")
async def navigate(self, url: str) -> None:
"""
Navigate to a URL properly.
"""
if not self.page:
raise RuntimeError("Browser not initialized")
logger.info(f"Navigating to {url}")
await self.page.goto(url, wait_until='domcontentloaded')
async def extract_session_state(self) -> SessionState:
"""
Extract cookies, storage, and fingerprint details into SessionState.
"""
if not self.context or not self.page:
raise RuntimeError("Browser not initialized")
logger.info("Extracting session state...")
# 1. Cookies
cookies = await self.context.cookies()
# 2. Local Storage
local_storage = await self.page.evaluate("() => JSON.stringify(window.localStorage)")
import json
local_storage_dict = json.loads(local_storage)
# 3. Session Storage
session_storage = await self.page.evaluate("() => JSON.stringify(window.sessionStorage)")
session_storage_dict = json.loads(session_storage)
# 4. CF Clearance (Search in cookies)
cf_clearance = next((c for c in cookies if c['name'] == 'cf_clearance'), None)
# 5. TLS Fingerprint (In a real scenario, this matches the browser build)
# For now, we hardcode what we expect to match the Extractor
tls_fingerprint = "chrome120"
return SessionState(
cookies=cookies,
local_storage=local_storage_dict,
session_storage=session_storage_dict,
cf_clearance=cf_clearance,
user_agent=self._dummy_user_agent,
tls_fingerprint=tls_fingerprint
)

73
src/extractor/client.py Normal file
View file

@ -0,0 +1,73 @@
from curl_cffi.requests import AsyncSession
from src.core.session import SessionState
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class CurlClient:
"""
High-performance extraction client using curl_cffi.
Mimics the TLS fingerprint and header profile of the browser session.
"""
def __init__(self, session_state: SessionState):
self.session_state = session_state
self.session = None
async def __aenter__(self):
await self.initialize()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.close()
async def initialize(self) -> None:
"""
Configure the curl_cffi session with matching fingerprint.
"""
logger.info("Initializing CurlClient...")
# impersonate argument controls TLS Client Hello
# 'chrome120' matches our hardcoded Camoufox build in this MVP
self.session = AsyncSession(impersonate=self.session_state.tls_fingerprint)
# 1. Inject Cookies
for cookie in self.session_state.cookies:
# curl_cffi expects specific arguments for setting cookies if done manually,
# or we can use the cookies parameter in requests.
# But AsyncSession has a cookie jar.
self.session.cookies.set(
name=cookie['name'],
value=cookie['value'],
domain=cookie['domain'],
path=cookie.get('path', '/')
# secure is handled by protocol
)
# 2. Set Headers
# We need to explicitly set headers that match the browser
self.session.headers = {
"User-Agent": self.session_state.user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "en-US,en;q=0.9",
# Add sec-ch-ua derivation here if strict mode
}
logger.info(f"CurlClient initialized with impersonation: {self.session_state.tls_fingerprint}")
async def close(self) -> None:
if self.session:
self.session.close()
async def fetch(self, url: str) -> str:
"""
Execute a GET request using the impersonated session.
"""
if not self.session:
raise RuntimeError("Client not initialized")
logger.info(f"Fetching {url}...")
response = await self.session.get(url)
logger.info(f"Response status: {response.status_code}")
return response.text

View file

@ -0,0 +1,66 @@
import asyncio
import pytest
from aiohttp import web
from src.browser.manager import CamoufoxManager
from src.extractor.client import CurlClient
# Global to store received headers for verification
received_headers = []
async def handle_request(request):
"""
Mock endpoint that logs headers.
"""
headers = dict(request.headers)
received_headers.append(headers)
return web.json_response({"status": "ok", "headers": headers})
async def start_mock_server(port=8080):
app = web.Application()
app.router.add_get('/', handle_request)
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, 'localhost', port)
await site.start()
return runner
@pytest.mark.asyncio
async def test_handover_e2e():
"""
Test the full loop:
1. Browser authenticates (hits mock) and extracts state.
2. Extractor initializes with state and hits mock.
3. Verify User-Agent consistency.
"""
port = 8888
base_url = f"http://localhost:{port}/"
runner = await start_mock_server(port)
received_headers.clear()
try:
# 1. Browser Phase
async with CamoufoxManager() as browser:
await browser.navigate(base_url)
session_state = await browser.extract_session_state()
assert len(received_headers) == 1
browser_headers = received_headers[0]
# 2. Extractor Phase
async with CurlClient(session_state) as extractor:
await extractor.fetch(base_url)
assert len(received_headers) == 2
extractor_headers = received_headers[1]
# 3. Verification
print(f"Browser UA: {browser_headers.get('User-Agent')}")
print(f"Extractor UA: {extractor_headers.get('User-Agent')}")
assert browser_headers.get('User-Agent') == extractor_headers.get('User-Agent')
# Note: Other headers might vary slightly due to browser vs curl defaults,
# but UA must be exact.
finally:
await runner.cleanup()

View file

@ -0,0 +1,75 @@
import asyncio
import json
from src.browser.manager import CamoufoxManager
from src.extractor.client import CurlClient
TARGET_URL = "https://tls.peet.ws/api/all"
async def main():
print(f"Verifying TLS Fingerprints against {TARGET_URL}...\n")
# 1. Browser
print(">>> 1. CAMOUFOX BROWSER REQUEST")
browser_fp = None
session_state = None
try:
async with CamoufoxManager(headless=True) as browser:
await browser.navigate(TARGET_URL)
# Get the page content (JSON)
content = await browser.page.content()
# Playwright content() returns HTML, but usage of verify API returns JSON text usually wrapped in pre or body.
# actually tls.peet.ws/api/all returns JSON. Browser renders it.
# To get strict JSON we can use evaluate
json_text = await browser.page.evaluate("() => document.body.innerText")
try:
browser_fp = json.loads(json_text)
print("Captured Browser Fingerprint:")
print(json.dumps(browser_fp.get('tls', {}), indent=2))
except:
print("Could not parse JSON from browser page.")
print(json_text[:200])
session_state = await browser.extract_session_state()
except Exception as e:
print(f"Browser failed: {e}")
return
if not session_state:
print("Failed to get session state.")
return
print("\n------------------------------------------------\n")
# 2. Extractor
print(">>> 2. CURL EXTRACTOR REQUEST")
try:
async with CurlClient(session_state) as extractor:
json_text = await extractor.fetch(TARGET_URL)
try:
extractor_fp = json.loads(json_text)
print("Captured Extractor Fingerprint:")
print(json.dumps(extractor_fp.get('tls', {}), indent=2))
# Comparison
b_ja3 = browser_fp.get('tls', {}).get('ja3_hash')
e_ja3 = extractor_fp.get('tls', {}).get('ja3_hash')
print(f"\nMatch Result:")
print(f"Browser JA3: {b_ja3}")
print(f"Extractor JA3: {e_ja3}")
if b_ja3 == e_ja3:
print("✅ SUCCESS: JA3 Hashes Match!")
else:
print("❌ FAILURE: JA3 Mismatch.")
except:
print("Could not parse JSON from extractor response.")
print(json_text[:200])
except Exception as e:
print(f"Extractor failed: {e}")
if __name__ == "__main__":
asyncio.run(main())