feat(daemon): 新增浏览器守护进程服务,支持启动、状态查询与自动销毁

This commit is contained in:
WJZ_P
2026-03-18 23:37:24 +08:00
parent 0231b7378c
commit 74b4909d13
6 changed files with 569 additions and 74 deletions

View File

@@ -5,6 +5,7 @@
"main": "src/index.js",
"scripts": {
"demo": "node src/demo.js",
"daemon": "node src/daemon/server.js",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [

267
src/daemon/engine.js Normal file
View File

@@ -0,0 +1,267 @@
/**
* engine.js — 浏览器引擎
*
* 职责:
* 维护 _browser 单例,封装 launch / connect / terminate。
* 复用项目已有的 stealth 插件、反检测参数、路径检测逻辑。
*
* 与 browser.js 的关系:
* browser.js 面向 Skill 直接调用ensureBrowser → 拿到 page
* engine.js 面向 Daemon 服务(只管浏览器进程生命周期,不关心具体页面)。
*/
import puppeteerCore from 'puppeteer-core';
import { addExtra } from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import { createConnection } from 'node:net';
import { existsSync, mkdirSync, cpSync } from 'node:fs';
import { platform, homedir } from 'node:os';
import { join, basename } from 'node:path';
import config from '../config.js';
// ── Stealth 包装 ──
const puppeteer = addExtra(puppeteerCore);
puppeteer.use(StealthPlugin());
// ── 单例 ──
let _browser = null;
// ── 浏览器候选路径 ──
const BROWSER_CANDIDATES = {
win32: [
'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe',
'C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe',
'C:\\Program Files\\Chromium\\Application\\chrome.exe',
],
darwin: [
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
'/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge',
'/Applications/Chromium.app/Contents/MacOS/Chromium',
],
linux: [
'/usr/bin/google-chrome',
'/usr/bin/google-chrome-stable',
'/usr/bin/microsoft-edge',
'/usr/bin/microsoft-edge-stable',
'/usr/bin/chromium',
'/usr/bin/chromium-browser',
'/snap/bin/chromium',
],
};
/** 自动检测系统浏览器 */
function detectBrowser() {
const envPaths = [
process.env.PROGRAMFILES,
process.env['PROGRAMFILES(X86)'],
process.env.LOCALAPPDATA,
];
const os = platform();
const candidates = [...(BROWSER_CANDIDATES[os] || [])];
if (os === 'win32') {
for (const base of envPaths) {
if (!base) continue;
candidates.push(
`${base}\\Google\\Chrome\\Application\\chrome.exe`,
`${base}\\Microsoft\\Edge\\Application\\msedge.exe`,
);
}
}
for (const p of candidates) {
if (existsSync(p)) return p;
}
return undefined;
}
// ── userDataDir 相关 ──
const GLOBAL_WJZ_DATA_DIR = join(homedir(), '.wjz_browser_data');
function getDefaultBrowserDataDir() {
const os = platform();
const home = homedir();
const candidates = [];
if (os === 'win32') {
const localAppData = process.env.LOCALAPPDATA || join(home, 'AppData', 'Local');
candidates.push(
join(localAppData, 'Google', 'Chrome', 'User Data'),
join(localAppData, 'Microsoft', 'Edge', 'User Data'),
join(localAppData, 'Chromium', 'User Data'),
);
} else if (os === 'darwin') {
const lib = join(home, 'Library', 'Application Support');
candidates.push(
join(lib, 'Google', 'Chrome'),
join(lib, 'Microsoft Edge'),
join(lib, 'Chromium'),
);
} else {
candidates.push(
join(home, '.config', 'google-chrome'),
join(home, '.config', 'microsoft-edge'),
join(home, '.config', 'chromium'),
);
}
for (const dir of candidates) {
if (existsSync(dir)) return dir;
}
return undefined;
}
function cloneProfileFromDefault(sourceDir, targetDir) {
console.log(`[engine] 首次运行,从浏览器默认数据克隆资产: ${sourceDir}${targetDir}`);
const SKIP_NAMES = new Set([
'singletonlock', 'singletonsocket', 'singletoncookie', 'lockfile',
'cache', 'code cache', 'gpucache', 'dawncache', 'grshadercache',
'crashpadmetrics-active.pma', 'browsermetrics', 'browsermetrics-spare.pma',
]);
const filterFunc = (src) => !SKIP_NAMES.has(basename(src).toLowerCase());
try {
cpSync(sourceDir, targetDir, { recursive: true, filter: filterFunc });
console.log('[engine] 克隆完成');
} catch (err) {
console.warn(`[engine] ⚠ 克隆出错(浏览器仍可启动,但需重新登录): ${err.message}`);
}
}
function resolveUserDataDir() {
if (config.browserUserDataDir) return config.browserUserDataDir;
if (existsSync(GLOBAL_WJZ_DATA_DIR)) return GLOBAL_WJZ_DATA_DIR;
mkdirSync(GLOBAL_WJZ_DATA_DIR, { recursive: true });
const defaultDir = getDefaultBrowserDataDir();
if (defaultDir) {
cloneProfileFromDefault(defaultDir, GLOBAL_WJZ_DATA_DIR);
}
return GLOBAL_WJZ_DATA_DIR;
}
// ── 启动参数 ──
const BROWSER_ARGS = [
'--no-first-run',
'--disable-default-apps',
'--disable-popup-blocking',
'--disable-gpu',
'--disable-software-rasterizer',
'--disable-dev-shm-usage',
...(platform() === 'linux'
? ['--no-sandbox', '--disable-setuid-sandbox']
: []),
'--disable-background-networking',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-features=Translate',
'--no-default-browser-check',
'--disable-crash-reporter',
'--hide-crash-restore-bubble',
'--test-type',
];
/** 端口探活 */
function isPortAlive(port, host = '127.0.0.1', timeout = 1500) {
return new Promise((resolve) => {
const socket = createConnection({ host, port });
const timer = setTimeout(() => { socket.destroy(); resolve(false); }, timeout);
socket.on('connect', () => { clearTimeout(timer); socket.destroy(); resolve(true); });
socket.on('error', () => { clearTimeout(timer); resolve(false); });
});
}
// ── 公开 API ──
/**
* 获取当前浏览器实例(可能为 null
*/
export function getBrowser() {
return _browser;
}
/**
* 确保浏览器可用(冷启动 or 复用),返回 browser 实例
*
* Daemon 场景:不处理 SIGINT/SIGTERM由 server.js 统一管理信号)
*/
export async function ensureBrowserForDaemon() {
const port = config.browserDebugPort;
// 1. 复用已有连接
if (_browser && _browser.isConnected()) {
return _browser;
}
// 2. 尝试连接已运行的浏览器
const alive = await isPortAlive(port);
if (alive) {
try {
_browser = await puppeteer.connect({
browserURL: `http://127.0.0.1:${port}`,
defaultViewport: null,
protocolTimeout: config.browserProtocolTimeout,
});
console.log(`[engine] 已连接到端口 ${port} 的浏览器`);
return _browser;
} catch (err) {
console.warn(`[engine] 连接失败: ${err.message},将尝试启动`);
}
}
// 3. 启动新浏览器
const executablePath = config.browserPath || detectBrowser();
if (!executablePath) {
throw new Error(
`[engine] 未找到可用浏览器。请设置 BROWSER_PATH 或安装 Chrome/Edge。`
);
}
const userDataDir = resolveUserDataDir();
_browser = await puppeteer.launch({
executablePath,
headless: config.browserHeadless,
userDataDir,
defaultViewport: null,
args: [...BROWSER_ARGS, `--remote-debugging-port=${port}`],
ignoreDefaultArgs: ['--enable-automation'],
protocolTimeout: config.browserProtocolTimeout,
// Daemon 自己管信号,不让 Puppeteer 接管
handleSIGINT: false,
handleSIGTERM: false,
handleSIGHUP: false,
});
const pid = _browser.process()?.pid;
console.log(`[engine] 浏览器已启动 pid=${pid} port=${port} path=${executablePath}`);
return _browser;
}
/**
* 终止浏览器进程并清理单例
*/
export async function terminateBrowser() {
if (!_browser) return;
try {
const pid = _browser.process()?.pid;
await _browser.close();
console.log(`[engine] 浏览器已终止 pid=${pid || 'N/A'}`);
} catch (err) {
console.warn(`[engine] 终止浏览器时出错: ${err.message}`);
// 兜底:强杀进程
try {
_browser.process()?.kill('SIGKILL');
} catch { /* ignore */ }
} finally {
_browser = null;
}
}

132
src/daemon/handlers.js Normal file
View File

@@ -0,0 +1,132 @@
/**
* handlers.js — API 路由处理器
*
* 端点:
* GET /browser/acquire — Skill 专用:确保浏览器可用,返回 wsEndpoint
* GET /browser/status — Agent 探测口:查询浏览器健康状态(不续命)
* POST /browser/release — 主动销毁浏览器(硬重置)
* GET /health — Daemon 自身健康检查
*/
import { ensureBrowserForDaemon, getBrowser, terminateBrowser } from './engine.js';
import { resetHeartbeat, getLifecycleInfo } from './lifecycle.js';
/**
* GET /browser/acquire
*
* 如果浏览器没启动就冷启动;如果已启动就重置闲置定时器。
* 返回 wsEndpoint 和 pidSkill 拿到后可以直接 puppeteer.connect()。
*/
export async function handleAcquire(_req, res) {
try {
const browser = await ensureBrowserForDaemon();
resetHeartbeat();
const wsEndpoint = browser.wsEndpoint();
const pid = browser.process()?.pid || null;
sendJSON(res, 200, {
ok: true,
wsEndpoint,
pid,
lifecycle: getLifecycleInfo(),
});
} catch (err) {
console.error(`[handler] /browser/acquire 失败: ${err.message}`);
sendJSON(res, 500, {
ok: false,
error: 'acquire_failed',
detail: err.message,
});
}
}
/**
* GET /browser/status
*
* 纯查询,不重置定时器。返回浏览器的健康状态、所有打开页面的信息。
* Agent 拿到 pages 列表后可以精确定位出错的 Tab 并接管。
*/
export async function handleStatus(_req, res) {
const browser = getBrowser();
if (!browser || !browser.isConnected()) {
sendJSON(res, 200, {
status: 'offline',
lifecycle: getLifecycleInfo(),
});
return;
}
try {
const targets = browser.targets();
const pages = targets
.filter(t => t.type() === 'page')
.map(t => ({
targetId: t._targetId,
url: t.url(),
}));
sendJSON(res, 200, {
status: 'online',
pid: browser.process()?.pid || null,
wsEndpoint: browser.wsEndpoint(),
pages,
pageCount: pages.length,
lifecycle: getLifecycleInfo(),
});
} catch (err) {
sendJSON(res, 200, {
status: 'error',
error: err.message,
lifecycle: getLifecycleInfo(),
});
}
}
/**
* POST /browser/release
*
* 主动销毁浏览器。用于大版本更新或致命错误后的硬重置。
*/
export async function handleRelease(_req, res) {
const browser = getBrowser();
if (!browser) {
sendJSON(res, 200, { ok: true, message: 'browser_already_offline' });
return;
}
try {
const pid = browser.process()?.pid || null;
await terminateBrowser();
sendJSON(res, 200, { ok: true, message: 'browser_terminated', pid });
} catch (err) {
console.error(`[handler] /browser/release 失败: ${err.message}`);
sendJSON(res, 500, {
ok: false,
error: 'release_failed',
detail: err.message,
});
}
}
/**
* GET /health
*
* Daemon 进程自身的健康检查。
*/
export function handleHealth(_req, res) {
sendJSON(res, 200, {
ok: true,
service: 'browser-daemon',
uptime: Math.round(process.uptime()),
memoryMB: Math.round(process.memoryUsage().rss / 1024 / 1024),
});
}
// ── 工具函数 ──
function sendJSON(res, statusCode, data) {
res.writeHead(statusCode, { 'Content-Type': 'application/json; charset=utf-8' });
res.end(JSON.stringify(data));
}

71
src/daemon/lifecycle.js Normal file
View File

@@ -0,0 +1,71 @@
/**
* lifecycle.js — 生命周期控制器
*
* 职责:
* 管理"惰性销毁"定时器。每次收到请求就 resetHeartbeat()(续命);
* 超时未活动则触发浏览器优雅关闭,释放系统资源。
*
* 关键设计:
* - _idleTimer.unref():定时器不阻止 Node 进程退出,
* 否则 SIGINT 时进程会因为未执行完的定时器而挂住。
*/
import { terminateBrowser } from './engine.js';
const DEFAULT_TTL_MS = 30 * 60 * 1000; // 30 分钟
let _idleTimer = null;
let _ttlMs = DEFAULT_TTL_MS;
let _lastHeartbeat = 0;
/**
* 设置 TTL可通过环境变量覆盖
* @param {number} ms
*/
export function setTTL(ms) {
_ttlMs = ms > 0 ? ms : DEFAULT_TTL_MS;
}
/**
* 重置心跳定时器 — 每次 API 调用时执行
*/
export function resetHeartbeat() {
if (_idleTimer) clearTimeout(_idleTimer);
_lastHeartbeat = Date.now();
_idleTimer = setTimeout(async () => {
console.log(`[lifecycle] 💤 ${(_ttlMs / 60000).toFixed(0)} 分钟未活动,终止浏览器进程`);
await terminateBrowser();
_idleTimer = null;
}, _ttlMs);
// 极度关键unref 后定时器不会阻止进程退出
_idleTimer.unref();
}
/**
* 取消定时器(用于 Daemon 关闭时清理)
*/
export function cancelHeartbeat() {
if (_idleTimer) {
clearTimeout(_idleTimer);
_idleTimer = null;
}
}
/**
* 获取生命周期状态
*/
export function getLifecycleInfo() {
const now = Date.now();
const idleSec = _lastHeartbeat > 0 ? Math.round((now - _lastHeartbeat) / 1000) : -1;
const remainingSec = _lastHeartbeat > 0
? Math.max(0, Math.round((_lastHeartbeat + _ttlMs - now) / 1000))
: -1;
return {
ttlMs: _ttlMs,
lastHeartbeat: _lastHeartbeat > 0 ? new Date(_lastHeartbeat).toISOString() : null,
idleSeconds: idleSec,
remainingSeconds: remainingSec,
};
}

88
src/daemon/server.js Normal file
View File

@@ -0,0 +1,88 @@
/**
* server.js — Browser Daemon 入口
*
* 一个极简的 HTTP 微服务,管理浏览器进程的生命周期。
*
* 启动方式:
* node src/daemon/server.js
* DAEMON_PORT=40225 node src/daemon/server.js
*
* API 端点:
* GET /browser/acquire — 确保浏览器可用,返回 wsEndpoint续命
* GET /browser/status — 查询浏览器状态(不续命)
* POST /browser/release — 主动销毁浏览器
* GET /health — Daemon 健康检查
*/
import { createServer } from 'node:http';
import { handleAcquire, handleStatus, handleRelease, handleHealth } from './handlers.js';
import { setTTL, cancelHeartbeat } from './lifecycle.js';
import { terminateBrowser } from './engine.js';
// ── 配置 ──
const PORT = parseInt(process.env.DAEMON_PORT || '40225', 10);
const TTL_MS = parseInt(process.env.DAEMON_TTL_MS || String(30 * 60 * 1000), 10);
setTTL(TTL_MS);
// ── 路由表 ──
const routes = {
'GET /browser/acquire': handleAcquire,
'GET /browser/status': handleStatus,
'POST /browser/release': handleRelease,
'GET /health': handleHealth,
};
// ── HTTP 服务器 ──
const server = createServer((req, res) => {
const { method, url } = req;
// 去掉 query string
const path = (url || '/').split('?')[0];
const routeKey = `${method} ${path}`;
const handler = routes[routeKey];
if (handler) {
handler(req, res);
} else {
res.writeHead(404, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({ ok: false, error: 'not_found', path }));
}
});
server.listen(PORT, () => {
console.log(`[daemon] 🚀 Browser Daemon 已启动 — http://127.0.0.1:${PORT}`);
console.log(`[daemon] ⏱ 闲置 TTL: ${(TTL_MS / 60000).toFixed(0)} 分钟`);
console.log(`[daemon] GET /browser/acquire — 获取/启动浏览器`);
console.log(`[daemon] GET /browser/status — 查询浏览器状态`);
console.log(`[daemon] POST /browser/release — 销毁浏览器`);
console.log(`[daemon] GET /health — 健康检查`);
});
// ── 优雅退出:系统信号拦截 ──
const SIGNALS = ['SIGINT', 'SIGTERM', 'SIGHUP'];
SIGNALS.forEach(sig => {
process.on(sig, async () => {
console.log(`\n[daemon] 🛑 收到 ${sig},开始优雅退出...`);
// 1. 停止接收新请求
server.close();
// 2. 取消闲置定时器
cancelHeartbeat();
// 3. 终止浏览器
await terminateBrowser();
console.log('[daemon] ✅ 清理完毕,进程退出');
process.exit(0);
});
});
// ── 未捕获异常兜底 ──
process.on('uncaughtException', (err) => {
console.error('[daemon] ❌ 未捕获异常:', err.message);
});
process.on('unhandledRejection', (reason) => {
console.error('[daemon] ❌ 未处理的 Promise 拒绝:', reason);
});

View File

@@ -95,50 +95,6 @@ const SELECTORS = {
export function createOps(page) {
const op = createOperator(page);
// ── 图片请求缓存 mapURL → { requestId, ts } ──
// 监听 Network.responseReceived收集图片请求的 requestId
// 供 extractImageBase64 的 getResponseBody 缓存阶段使用。
// 每条缓存 TTL 5 分钟,过期后 getResponseBody 大概率也失效。
const IMAGE_CACHE_TTL = 5 * 60 * 1000; // 5 min
const imageRequestMap = new Map();
function setImageRequest(url, requestId) {
// 先处理url已存在的情况这个时候取消掉url的定时器
if(imageRequestMap.has(url)) {
const entry = imageRequestMap.get(url);
clearTimeout(entry.timer);
}
const timer = setTimeout(() => {
imageRequestMap.delete(url);
}, IMAGE_CACHE_TTL);
timer.unref();// 这个方法可以防止定时器影响进程退出
imageRequestMap.set(url, {
requestId,
timer: timer,
});
}
function getImageRequestId(url) {
const entry = imageRequestMap.get(url);
return entry ? entry.requestId : null;
}
(async () => {
try {
const client = page._client();
await client.send('Network.enable');
client.on('Network.responseReceived', (params) => {
const { requestId, response } = params;
const mime = response.mimeType || '';
if (mime.startsWith('image/')) {
setImageRequest(response.url, requestId);
}
});
} catch (e) {
console.warn('[ops] Network 监听初始化失败(不影响核心功能):', e.message);
}
})();
return {
/** 暴露底层 operator供高级用户直接使用 */
operator: op,
@@ -278,7 +234,7 @@ export function createOps(page) {
}
// 2. 等待菜单动画展开
await sleep(800);
await sleep(250);
// 3. 点击目标模型选项
const selectResult = await op.click(targetSels);
@@ -535,14 +491,13 @@ export function createOps(page) {
/**
* 提取指定图片的 Base64 数据
*
* 四级降级策略:
* 1. Canvas — 同步提取,最快(但跨域图片会被 taint
* 2. 页面 fetch — 异步读取 blob受 CORS 限制Google 图片通常不可用)
* 3. CDP getResponseBody — 从浏览器内存缓存读取,零网络开销(需要 requestId 命中)
* 4. CDP loadNetworkResource — 通过 CDP 协议用浏览器网络栈重新下载,绕过 CORS终极兜底
* 降级策略:
* 1. Canvas — 同步提取,最快(但跨域图片会被 taint【已注释,留作参考】
* 2. 页面 fetch — 异步读取 blob受 CORS 限制Google 图片通常不可用)【已注释,留作参考】
* 3. CDP loadNetworkResource — 通过 CDP 协议用浏览器网络栈下载,绕过 CORS
*
* @param {string} url - 目标图片的 src URL
* @returns {Promise<{ok: boolean, dataUrl?: string, width?: number, height?: number, method?: 'canvas'|'fetch'|'cdp-cache'|'cdp', error?: string}>}
* @returns {Promise<{ok: boolean, dataUrl?: string, method?: 'cdp', error?: string}>}
*/
async extractImageBase64(url) {
if (!url) {
@@ -612,26 +567,7 @@ export function createOps(page) {
// console.log(`[extractImageBase64] ⚠ 页面 fetch 失败 (${fetchResult.error}${fetchResult.detail ? ' — ' + fetchResult.detail : ''}),尝试 CDP 缓存读取...`);
// ── 阶段 3: CDP Network.getResponseBody从浏览器内存缓存读取零网络开销 ──
const requestId = getImageRequestId(canvasResult.src);
if (requestId) {
try {
const client = page._client();
const { body, base64Encoded } = await client.send('Network.getResponseBody', { requestId });
const base64Data = base64Encoded ? body : Buffer.from(body, 'utf8').toString('base64');
const mime = 'image/png'; // 缓存中无法直接拿 MIME用 png 兜底
const dataUrl = `data:${mime};base64,${base64Data}`;
console.log(`[extractImageBase64] ✅ CDP 缓存命中 (${canvasResult.width}x${canvasResult.height}, size=${(base64Data.length * 0.75 / 1024).toFixed(1)}KB)`);
return { ok: true, dataUrl, width: canvasResult.width, height: canvasResult.height, method: 'cdp-cache' };
} catch (e) {
console.log(`[extractImageBase64] ⚠ CDP 缓存读取失败 (${e.message}),降级为 CDP 网络请求...`);
}
} else {
console.log('[extractImageBase64] ⚠ 缓存中无该 URL 的 requestId降级为 CDP 网络请求...');
}
// ── 阶段 4: CDP Network.loadNetworkResource终极兜底重新发请求绕过 CORS ──
// ── CDP Network.loadNetworkResource通过 CDP 发请求,绕过 CORS ──
try {
const client = page._client();
const frameId = page.mainFrame()._id;
@@ -639,7 +575,7 @@ export function createOps(page) {
console.log(`[extractImageBase64] 📡 CDP 请求中... frameId=${frameId}`);
const { resource } = await client.send('Network.loadNetworkResource', {
frameId,
url: canvasResult.src,
url,
options: { disableCache: false, includeCredentials: true },
});
@@ -675,8 +611,8 @@ export function createOps(page) {
const mime = (resource.headers?.['content-type'] || resource.headers?.['Content-Type'] || 'image/png').split(';')[0].trim();
const dataUrl = `data:${mime};base64,${base64Full}`;
console.log(`[extractImageBase64] ✅ CDP 提取成功 (${canvasResult.width}x${canvasResult.height}, mime=${mime}, size=${(base64Full.length * 0.75 / 1024).toFixed(1)}KB)`);
return { ok: true, dataUrl, width: canvasResult.width, height: canvasResult.height, method: 'cdp' };
console.log(`[extractImageBase64] ✅ CDP 提取成功 (mime=${mime}, size=${(base64Full.length * 0.75 / 1024).toFixed(1)}KB)`);
return { ok: true, dataUrl, method: 'cdp' };
} catch (err) {
const errMsg = err.message || String(err);
console.warn(`[extractImageBase64] ❌ CDP 提取异常: ${errMsg}`);