to implement e2e tests
This commit is contained in:
parent
ef370dacff
commit
a15ca58ef8
4 changed files with 334 additions and 0 deletions
120
src/browser/manager.py
Normal file
120
src/browser/manager.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
import asyncio
|
||||
import logging
|
||||
from typing import Optional, Dict, Any
|
||||
from playwright.async_api import async_playwright, BrowserContext, Page, Browser
|
||||
from src.core.session import SessionState
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CamoufoxManager:
|
||||
"""
|
||||
Manages the lifecycle of a Camoufox (Playwright) browser instance.
|
||||
Handles initialization, navigation, and session state extraction.
|
||||
"""
|
||||
def __init__(self, headless: bool = True, proxy: Optional[Dict[str, str]] = None):
|
||||
self.headless = headless
|
||||
self.proxy = proxy
|
||||
self.playwright = None
|
||||
self.browser: Optional[Browser] = None
|
||||
self.context: Optional[BrowserContext] = None
|
||||
self.page: Optional[Page] = None
|
||||
self._dummy_user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
|
||||
async def __aenter__(self):
|
||||
await self.initialize()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
await self.close()
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""
|
||||
Launch the browser and create a context.
|
||||
"""
|
||||
logger.info("Initializing CamoufoxManager...")
|
||||
self.playwright = await async_playwright().start()
|
||||
|
||||
# Launch options
|
||||
launch_args = ["--disable-blink-features=AutomationControlled"]
|
||||
|
||||
self.browser = await self.playwright.chromium.launch(
|
||||
headless=self.headless,
|
||||
args=launch_args,
|
||||
proxy=self.proxy
|
||||
)
|
||||
|
||||
# Context options usually come from a profile (e.g. BrowserForge)
|
||||
# For MVP, we set a fixed User-Agent and Viewport
|
||||
self.context = await self.browser.new_context(
|
||||
user_agent=self._dummy_user_agent,
|
||||
viewport={"width": 1920, "height": 1080},
|
||||
locale="en-US",
|
||||
timezone_id="America/New_York"
|
||||
)
|
||||
|
||||
# Anti-detection scripts would be injected here
|
||||
await self.context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
||||
|
||||
self.page = await self.context.new_page()
|
||||
logger.info("Browser initialized.")
|
||||
|
||||
async def close(self) -> None:
|
||||
"""
|
||||
Clean up resources aggressively.
|
||||
"""
|
||||
logger.info("Closing CamoufoxManager resources...")
|
||||
if self.context:
|
||||
await self.context.close()
|
||||
if self.browser:
|
||||
await self.browser.close()
|
||||
if self.playwright:
|
||||
await self.playwright.stop()
|
||||
logger.info("Resources cleaned up.")
|
||||
|
||||
async def navigate(self, url: str) -> None:
|
||||
"""
|
||||
Navigate to a URL properly.
|
||||
"""
|
||||
if not self.page:
|
||||
raise RuntimeError("Browser not initialized")
|
||||
logger.info(f"Navigating to {url}")
|
||||
await self.page.goto(url, wait_until='domcontentloaded')
|
||||
|
||||
async def extract_session_state(self) -> SessionState:
|
||||
"""
|
||||
Extract cookies, storage, and fingerprint details into SessionState.
|
||||
"""
|
||||
if not self.context or not self.page:
|
||||
raise RuntimeError("Browser not initialized")
|
||||
|
||||
logger.info("Extracting session state...")
|
||||
|
||||
# 1. Cookies
|
||||
cookies = await self.context.cookies()
|
||||
|
||||
# 2. Local Storage
|
||||
local_storage = await self.page.evaluate("() => JSON.stringify(window.localStorage)")
|
||||
import json
|
||||
local_storage_dict = json.loads(local_storage)
|
||||
|
||||
# 3. Session Storage
|
||||
session_storage = await self.page.evaluate("() => JSON.stringify(window.sessionStorage)")
|
||||
session_storage_dict = json.loads(session_storage)
|
||||
|
||||
# 4. CF Clearance (Search in cookies)
|
||||
cf_clearance = next((c for c in cookies if c['name'] == 'cf_clearance'), None)
|
||||
|
||||
# 5. TLS Fingerprint (In a real scenario, this matches the browser build)
|
||||
# For now, we hardcode what we expect to match the Extractor
|
||||
tls_fingerprint = "chrome120"
|
||||
|
||||
return SessionState(
|
||||
cookies=cookies,
|
||||
local_storage=local_storage_dict,
|
||||
session_storage=session_storage_dict,
|
||||
cf_clearance=cf_clearance,
|
||||
user_agent=self._dummy_user_agent,
|
||||
tls_fingerprint=tls_fingerprint
|
||||
)
|
||||
73
src/extractor/client.py
Normal file
73
src/extractor/client.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
from curl_cffi.requests import AsyncSession
|
||||
from src.core.session import SessionState
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CurlClient:
|
||||
"""
|
||||
High-performance extraction client using curl_cffi.
|
||||
Mimics the TLS fingerprint and header profile of the browser session.
|
||||
"""
|
||||
def __init__(self, session_state: SessionState):
|
||||
self.session_state = session_state
|
||||
self.session = None
|
||||
|
||||
async def __aenter__(self):
|
||||
await self.initialize()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
await self.close()
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""
|
||||
Configure the curl_cffi session with matching fingerprint.
|
||||
"""
|
||||
logger.info("Initializing CurlClient...")
|
||||
|
||||
# impersonate argument controls TLS Client Hello
|
||||
# 'chrome120' matches our hardcoded Camoufox build in this MVP
|
||||
self.session = AsyncSession(impersonate=self.session_state.tls_fingerprint)
|
||||
|
||||
# 1. Inject Cookies
|
||||
for cookie in self.session_state.cookies:
|
||||
# curl_cffi expects specific arguments for setting cookies if done manually,
|
||||
# or we can use the cookies parameter in requests.
|
||||
# But AsyncSession has a cookie jar.
|
||||
self.session.cookies.set(
|
||||
name=cookie['name'],
|
||||
value=cookie['value'],
|
||||
domain=cookie['domain'],
|
||||
path=cookie.get('path', '/')
|
||||
# secure is handled by protocol
|
||||
)
|
||||
|
||||
# 2. Set Headers
|
||||
# We need to explicitly set headers that match the browser
|
||||
self.session.headers = {
|
||||
"User-Agent": self.session_state.user_agent,
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
# Add sec-ch-ua derivation here if strict mode
|
||||
}
|
||||
|
||||
logger.info(f"CurlClient initialized with impersonation: {self.session_state.tls_fingerprint}")
|
||||
|
||||
async def close(self) -> None:
|
||||
if self.session:
|
||||
self.session.close()
|
||||
|
||||
async def fetch(self, url: str) -> str:
|
||||
"""
|
||||
Execute a GET request using the impersonated session.
|
||||
"""
|
||||
if not self.session:
|
||||
raise RuntimeError("Client not initialized")
|
||||
|
||||
logger.info(f"Fetching {url}...")
|
||||
response = await self.session.get(url)
|
||||
logger.info(f"Response status: {response.status_code}")
|
||||
return response.text
|
||||
66
tests/e2e/test_handover.py
Normal file
66
tests/e2e/test_handover.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
import asyncio
|
||||
import pytest
|
||||
from aiohttp import web
|
||||
from src.browser.manager import CamoufoxManager
|
||||
from src.extractor.client import CurlClient
|
||||
|
||||
# Global to store received headers for verification
|
||||
received_headers = []
|
||||
|
||||
async def handle_request(request):
|
||||
"""
|
||||
Mock endpoint that logs headers.
|
||||
"""
|
||||
headers = dict(request.headers)
|
||||
received_headers.append(headers)
|
||||
return web.json_response({"status": "ok", "headers": headers})
|
||||
|
||||
async def start_mock_server(port=8080):
|
||||
app = web.Application()
|
||||
app.router.add_get('/', handle_request)
|
||||
runner = web.AppRunner(app)
|
||||
await runner.setup()
|
||||
site = web.TCPSite(runner, 'localhost', port)
|
||||
await site.start()
|
||||
return runner
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handover_e2e():
|
||||
"""
|
||||
Test the full loop:
|
||||
1. Browser authenticates (hits mock) and extracts state.
|
||||
2. Extractor initializes with state and hits mock.
|
||||
3. Verify User-Agent consistency.
|
||||
"""
|
||||
port = 8888
|
||||
base_url = f"http://localhost:{port}/"
|
||||
runner = await start_mock_server(port)
|
||||
|
||||
received_headers.clear()
|
||||
|
||||
try:
|
||||
# 1. Browser Phase
|
||||
async with CamoufoxManager() as browser:
|
||||
await browser.navigate(base_url)
|
||||
session_state = await browser.extract_session_state()
|
||||
|
||||
assert len(received_headers) == 1
|
||||
browser_headers = received_headers[0]
|
||||
|
||||
# 2. Extractor Phase
|
||||
async with CurlClient(session_state) as extractor:
|
||||
await extractor.fetch(base_url)
|
||||
|
||||
assert len(received_headers) == 2
|
||||
extractor_headers = received_headers[1]
|
||||
|
||||
# 3. Verification
|
||||
print(f"Browser UA: {browser_headers.get('User-Agent')}")
|
||||
print(f"Extractor UA: {extractor_headers.get('User-Agent')}")
|
||||
|
||||
assert browser_headers.get('User-Agent') == extractor_headers.get('User-Agent')
|
||||
# Note: Other headers might vary slightly due to browser vs curl defaults,
|
||||
# but UA must be exact.
|
||||
|
||||
finally:
|
||||
await runner.cleanup()
|
||||
75
tests/manual/verify_tls.py
Normal file
75
tests/manual/verify_tls.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import asyncio
|
||||
import json
|
||||
from src.browser.manager import CamoufoxManager
|
||||
from src.extractor.client import CurlClient
|
||||
|
||||
TARGET_URL = "https://tls.peet.ws/api/all"
|
||||
|
||||
async def main():
|
||||
print(f"Verifying TLS Fingerprints against {TARGET_URL}...\n")
|
||||
|
||||
# 1. Browser
|
||||
print(">>> 1. CAMOUFOX BROWSER REQUEST")
|
||||
browser_fp = None
|
||||
session_state = None
|
||||
|
||||
try:
|
||||
async with CamoufoxManager(headless=True) as browser:
|
||||
await browser.navigate(TARGET_URL)
|
||||
# Get the page content (JSON)
|
||||
content = await browser.page.content()
|
||||
# Playwright content() returns HTML, but usage of verify API returns JSON text usually wrapped in pre or body.
|
||||
# actually tls.peet.ws/api/all returns JSON. Browser renders it.
|
||||
# To get strict JSON we can use evaluate
|
||||
json_text = await browser.page.evaluate("() => document.body.innerText")
|
||||
try:
|
||||
browser_fp = json.loads(json_text)
|
||||
print("Captured Browser Fingerprint:")
|
||||
print(json.dumps(browser_fp.get('tls', {}), indent=2))
|
||||
except:
|
||||
print("Could not parse JSON from browser page.")
|
||||
print(json_text[:200])
|
||||
|
||||
session_state = await browser.extract_session_state()
|
||||
except Exception as e:
|
||||
print(f"Browser failed: {e}")
|
||||
return
|
||||
|
||||
if not session_state:
|
||||
print("Failed to get session state.")
|
||||
return
|
||||
|
||||
print("\n------------------------------------------------\n")
|
||||
|
||||
# 2. Extractor
|
||||
print(">>> 2. CURL EXTRACTOR REQUEST")
|
||||
try:
|
||||
async with CurlClient(session_state) as extractor:
|
||||
json_text = await extractor.fetch(TARGET_URL)
|
||||
try:
|
||||
extractor_fp = json.loads(json_text)
|
||||
print("Captured Extractor Fingerprint:")
|
||||
print(json.dumps(extractor_fp.get('tls', {}), indent=2))
|
||||
|
||||
# Comparison
|
||||
b_ja3 = browser_fp.get('tls', {}).get('ja3_hash')
|
||||
e_ja3 = extractor_fp.get('tls', {}).get('ja3_hash')
|
||||
|
||||
print(f"\nMatch Result:")
|
||||
print(f"Browser JA3: {b_ja3}")
|
||||
print(f"Extractor JA3: {e_ja3}")
|
||||
|
||||
if b_ja3 == e_ja3:
|
||||
print("✅ SUCCESS: JA3 Hashes Match!")
|
||||
else:
|
||||
print("❌ FAILURE: JA3 Mismatch.")
|
||||
|
||||
except:
|
||||
print("Could not parse JSON from extractor response.")
|
||||
print(json_text[:200])
|
||||
|
||||
except Exception as e:
|
||||
print(f"Extractor failed: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Loading…
Reference in a new issue