to implement e2e tests
This commit is contained in:
parent
ef370dacff
commit
a15ca58ef8
4 changed files with 334 additions and 0 deletions
120
src/browser/manager.py
Normal file
120
src/browser/manager.py
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
|
from playwright.async_api import async_playwright, BrowserContext, Page, Browser
|
||||||
|
from src.core.session import SessionState
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class CamoufoxManager:
|
||||||
|
"""
|
||||||
|
Manages the lifecycle of a Camoufox (Playwright) browser instance.
|
||||||
|
Handles initialization, navigation, and session state extraction.
|
||||||
|
"""
|
||||||
|
def __init__(self, headless: bool = True, proxy: Optional[Dict[str, str]] = None):
|
||||||
|
self.headless = headless
|
||||||
|
self.proxy = proxy
|
||||||
|
self.playwright = None
|
||||||
|
self.browser: Optional[Browser] = None
|
||||||
|
self.context: Optional[BrowserContext] = None
|
||||||
|
self.page: Optional[Page] = None
|
||||||
|
self._dummy_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
await self.initialize()
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
await self.close()
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
"""
|
||||||
|
Launch the browser and create a context.
|
||||||
|
"""
|
||||||
|
logger.info("Initializing CamoufoxManager...")
|
||||||
|
self.playwright = await async_playwright().start()
|
||||||
|
|
||||||
|
# Launch options
|
||||||
|
launch_args = ["--disable-blink-features=AutomationControlled"]
|
||||||
|
|
||||||
|
self.browser = await self.playwright.chromium.launch(
|
||||||
|
headless=self.headless,
|
||||||
|
args=launch_args,
|
||||||
|
proxy=self.proxy
|
||||||
|
)
|
||||||
|
|
||||||
|
# Context options usually come from a profile (e.g. BrowserForge)
|
||||||
|
# For MVP, we set a fixed User-Agent and Viewport
|
||||||
|
self.context = await self.browser.new_context(
|
||||||
|
user_agent=self._dummy_user_agent,
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale="en-US",
|
||||||
|
timezone_id="America/New_York"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Anti-detection scripts would be injected here
|
||||||
|
await self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||||
|
|
||||||
|
self.page = await self.context.new_page()
|
||||||
|
logger.info("Browser initialized.")
|
||||||
|
|
||||||
|
async def close(self) -> None:
|
||||||
|
"""
|
||||||
|
Clean up resources aggressively.
|
||||||
|
"""
|
||||||
|
logger.info("Closing CamoufoxManager resources...")
|
||||||
|
if self.context:
|
||||||
|
await self.context.close()
|
||||||
|
if self.browser:
|
||||||
|
await self.browser.close()
|
||||||
|
if self.playwright:
|
||||||
|
await self.playwright.stop()
|
||||||
|
logger.info("Resources cleaned up.")
|
||||||
|
|
||||||
|
async def navigate(self, url: str) -> None:
|
||||||
|
"""
|
||||||
|
Navigate to a URL properly.
|
||||||
|
"""
|
||||||
|
if not self.page:
|
||||||
|
raise RuntimeError("Browser not initialized")
|
||||||
|
logger.info(f"Navigating to {url}")
|
||||||
|
await self.page.goto(url, wait_until='domcontentloaded')
|
||||||
|
|
||||||
|
async def extract_session_state(self) -> SessionState:
|
||||||
|
"""
|
||||||
|
Extract cookies, storage, and fingerprint details into SessionState.
|
||||||
|
"""
|
||||||
|
if not self.context or not self.page:
|
||||||
|
raise RuntimeError("Browser not initialized")
|
||||||
|
|
||||||
|
logger.info("Extracting session state...")
|
||||||
|
|
||||||
|
# 1. Cookies
|
||||||
|
cookies = await self.context.cookies()
|
||||||
|
|
||||||
|
# 2. Local Storage
|
||||||
|
local_storage = await self.page.evaluate("() => JSON.stringify(window.localStorage)")
|
||||||
|
import json
|
||||||
|
local_storage_dict = json.loads(local_storage)
|
||||||
|
|
||||||
|
# 3. Session Storage
|
||||||
|
session_storage = await self.page.evaluate("() => JSON.stringify(window.sessionStorage)")
|
||||||
|
session_storage_dict = json.loads(session_storage)
|
||||||
|
|
||||||
|
# 4. CF Clearance (Search in cookies)
|
||||||
|
cf_clearance = next((c for c in cookies if c['name'] == 'cf_clearance'), None)
|
||||||
|
|
||||||
|
# 5. TLS Fingerprint (In a real scenario, this matches the browser build)
|
||||||
|
# For now, we hardcode what we expect to match the Extractor
|
||||||
|
tls_fingerprint = "chrome120"
|
||||||
|
|
||||||
|
return SessionState(
|
||||||
|
cookies=cookies,
|
||||||
|
local_storage=local_storage_dict,
|
||||||
|
session_storage=session_storage_dict,
|
||||||
|
cf_clearance=cf_clearance,
|
||||||
|
user_agent=self._dummy_user_agent,
|
||||||
|
tls_fingerprint=tls_fingerprint
|
||||||
|
)
|
||||||
73
src/extractor/client.py
Normal file
73
src/extractor/client.py
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
from curl_cffi.requests import AsyncSession
|
||||||
|
from src.core.session import SessionState
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class CurlClient:
|
||||||
|
"""
|
||||||
|
High-performance extraction client using curl_cffi.
|
||||||
|
Mimics the TLS fingerprint and header profile of the browser session.
|
||||||
|
"""
|
||||||
|
def __init__(self, session_state: SessionState):
|
||||||
|
self.session_state = session_state
|
||||||
|
self.session = None
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
await self.initialize()
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
await self.close()
|
||||||
|
|
||||||
|
async def initialize(self) -> None:
|
||||||
|
"""
|
||||||
|
Configure the curl_cffi session with matching fingerprint.
|
||||||
|
"""
|
||||||
|
logger.info("Initializing CurlClient...")
|
||||||
|
|
||||||
|
# impersonate argument controls TLS Client Hello
|
||||||
|
# 'chrome120' matches our hardcoded Camoufox build in this MVP
|
||||||
|
self.session = AsyncSession(impersonate=self.session_state.tls_fingerprint)
|
||||||
|
|
||||||
|
# 1. Inject Cookies
|
||||||
|
for cookie in self.session_state.cookies:
|
||||||
|
# curl_cffi expects specific arguments for setting cookies if done manually,
|
||||||
|
# or we can use the cookies parameter in requests.
|
||||||
|
# But AsyncSession has a cookie jar.
|
||||||
|
self.session.cookies.set(
|
||||||
|
name=cookie['name'],
|
||||||
|
value=cookie['value'],
|
||||||
|
domain=cookie['domain'],
|
||||||
|
path=cookie.get('path', '/')
|
||||||
|
# secure is handled by protocol
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Set Headers
|
||||||
|
# We need to explicitly set headers that match the browser
|
||||||
|
self.session.headers = {
|
||||||
|
"User-Agent": self.session_state.user_agent,
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
# Add sec-ch-ua derivation here if strict mode
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"CurlClient initialized with impersonation: {self.session_state.tls_fingerprint}")
|
||||||
|
|
||||||
|
async def close(self) -> None:
|
||||||
|
if self.session:
|
||||||
|
self.session.close()
|
||||||
|
|
||||||
|
async def fetch(self, url: str) -> str:
|
||||||
|
"""
|
||||||
|
Execute a GET request using the impersonated session.
|
||||||
|
"""
|
||||||
|
if not self.session:
|
||||||
|
raise RuntimeError("Client not initialized")
|
||||||
|
|
||||||
|
logger.info(f"Fetching {url}...")
|
||||||
|
response = await self.session.get(url)
|
||||||
|
logger.info(f"Response status: {response.status_code}")
|
||||||
|
return response.text
|
||||||
66
tests/e2e/test_handover.py
Normal file
66
tests/e2e/test_handover.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
import asyncio
|
||||||
|
import pytest
|
||||||
|
from aiohttp import web
|
||||||
|
from src.browser.manager import CamoufoxManager
|
||||||
|
from src.extractor.client import CurlClient
|
||||||
|
|
||||||
|
# Global to store received headers for verification
|
||||||
|
received_headers = []
|
||||||
|
|
||||||
|
async def handle_request(request):
|
||||||
|
"""
|
||||||
|
Mock endpoint that logs headers.
|
||||||
|
"""
|
||||||
|
headers = dict(request.headers)
|
||||||
|
received_headers.append(headers)
|
||||||
|
return web.json_response({"status": "ok", "headers": headers})
|
||||||
|
|
||||||
|
async def start_mock_server(port=8080):
|
||||||
|
app = web.Application()
|
||||||
|
app.router.add_get('/', handle_request)
|
||||||
|
runner = web.AppRunner(app)
|
||||||
|
await runner.setup()
|
||||||
|
site = web.TCPSite(runner, 'localhost', port)
|
||||||
|
await site.start()
|
||||||
|
return runner
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_handover_e2e():
|
||||||
|
"""
|
||||||
|
Test the full loop:
|
||||||
|
1. Browser authenticates (hits mock) and extracts state.
|
||||||
|
2. Extractor initializes with state and hits mock.
|
||||||
|
3. Verify User-Agent consistency.
|
||||||
|
"""
|
||||||
|
port = 8888
|
||||||
|
base_url = f"http://localhost:{port}/"
|
||||||
|
runner = await start_mock_server(port)
|
||||||
|
|
||||||
|
received_headers.clear()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 1. Browser Phase
|
||||||
|
async with CamoufoxManager() as browser:
|
||||||
|
await browser.navigate(base_url)
|
||||||
|
session_state = await browser.extract_session_state()
|
||||||
|
|
||||||
|
assert len(received_headers) == 1
|
||||||
|
browser_headers = received_headers[0]
|
||||||
|
|
||||||
|
# 2. Extractor Phase
|
||||||
|
async with CurlClient(session_state) as extractor:
|
||||||
|
await extractor.fetch(base_url)
|
||||||
|
|
||||||
|
assert len(received_headers) == 2
|
||||||
|
extractor_headers = received_headers[1]
|
||||||
|
|
||||||
|
# 3. Verification
|
||||||
|
print(f"Browser UA: {browser_headers.get('User-Agent')}")
|
||||||
|
print(f"Extractor UA: {extractor_headers.get('User-Agent')}")
|
||||||
|
|
||||||
|
assert browser_headers.get('User-Agent') == extractor_headers.get('User-Agent')
|
||||||
|
# Note: Other headers might vary slightly due to browser vs curl defaults,
|
||||||
|
# but UA must be exact.
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await runner.cleanup()
|
||||||
75
tests/manual/verify_tls.py
Normal file
75
tests/manual/verify_tls.py
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
from src.browser.manager import CamoufoxManager
|
||||||
|
from src.extractor.client import CurlClient
|
||||||
|
|
||||||
|
TARGET_URL = "https://tls.peet.ws/api/all"
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
print(f"Verifying TLS Fingerprints against {TARGET_URL}...\n")
|
||||||
|
|
||||||
|
# 1. Browser
|
||||||
|
print(">>> 1. CAMOUFOX BROWSER REQUEST")
|
||||||
|
browser_fp = None
|
||||||
|
session_state = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with CamoufoxManager(headless=True) as browser:
|
||||||
|
await browser.navigate(TARGET_URL)
|
||||||
|
# Get the page content (JSON)
|
||||||
|
content = await browser.page.content()
|
||||||
|
# Playwright content() returns HTML, but usage of verify API returns JSON text usually wrapped in pre or body.
|
||||||
|
# actually tls.peet.ws/api/all returns JSON. Browser renders it.
|
||||||
|
# To get strict JSON we can use evaluate
|
||||||
|
json_text = await browser.page.evaluate("() => document.body.innerText")
|
||||||
|
try:
|
||||||
|
browser_fp = json.loads(json_text)
|
||||||
|
print("Captured Browser Fingerprint:")
|
||||||
|
print(json.dumps(browser_fp.get('tls', {}), indent=2))
|
||||||
|
except:
|
||||||
|
print("Could not parse JSON from browser page.")
|
||||||
|
print(json_text[:200])
|
||||||
|
|
||||||
|
session_state = await browser.extract_session_state()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Browser failed: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not session_state:
|
||||||
|
print("Failed to get session state.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("\n------------------------------------------------\n")
|
||||||
|
|
||||||
|
# 2. Extractor
|
||||||
|
print(">>> 2. CURL EXTRACTOR REQUEST")
|
||||||
|
try:
|
||||||
|
async with CurlClient(session_state) as extractor:
|
||||||
|
json_text = await extractor.fetch(TARGET_URL)
|
||||||
|
try:
|
||||||
|
extractor_fp = json.loads(json_text)
|
||||||
|
print("Captured Extractor Fingerprint:")
|
||||||
|
print(json.dumps(extractor_fp.get('tls', {}), indent=2))
|
||||||
|
|
||||||
|
# Comparison
|
||||||
|
b_ja3 = browser_fp.get('tls', {}).get('ja3_hash')
|
||||||
|
e_ja3 = extractor_fp.get('tls', {}).get('ja3_hash')
|
||||||
|
|
||||||
|
print(f"\nMatch Result:")
|
||||||
|
print(f"Browser JA3: {b_ja3}")
|
||||||
|
print(f"Extractor JA3: {e_ja3}")
|
||||||
|
|
||||||
|
if b_ja3 == e_ja3:
|
||||||
|
print("✅ SUCCESS: JA3 Hashes Match!")
|
||||||
|
else:
|
||||||
|
print("❌ FAILURE: JA3 Mismatch.")
|
||||||
|
|
||||||
|
except:
|
||||||
|
print("Could not parse JSON from extractor response.")
|
||||||
|
print(json_text[:200])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Extractor failed: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
Loading…
Reference in a new issue