327 lines
11 KiB
JavaScript
327 lines
11 KiB
JavaScript
/**
|
||
* operator.js — 纯 CDP 底层操作封装
|
||
*
|
||
* 职责:
|
||
* 封装最基础的浏览器交互原语(点击、输入、查询、等待等),
|
||
* 全部通过 CDP 协议实现,不往页面注入任何对象。
|
||
*
|
||
* 设计原则:
|
||
* - 所有 DOM 操作通过 page.evaluate() 一次性执行,执行完即走,不留痕迹
|
||
* - 鼠标 / 键盘事件通过 CDP Input 域发送,生成 isTrusted=true 的原生事件
|
||
* - 每个方法都是独立的原子操作,上层 gemini-ops.js 负责编排组合
|
||
*/
|
||
|
||
/**
|
||
* 创建 operator 实例
|
||
* @param {import('puppeteer-core').Page} page
|
||
*/
|
||
export function createOperator(page) {
|
||
|
||
// ─── 内部工具 ───
|
||
|
||
/**
|
||
* 通过 CSS 选择器列表查找第一个可见元素,返回其中心坐标和边界信息
|
||
* 在页面上下文中执行,执行完即走
|
||
* @param {string[]} selectors - 候选选择器,按优先级排列
|
||
* @returns {Promise<{found: boolean, x?: number, y?: number, width?: number, height?: number, selector?: string, tagName?: string}>}
|
||
*/
|
||
async function locate(selectors) {
|
||
return page.evaluate((sels) => {
|
||
for (const sel of sels) {
|
||
let el = null;
|
||
try {
|
||
// 支持 :has-text("xxx") 伪选择器
|
||
if (sel.includes(':has-text(')) {
|
||
const m = sel.match(/^(.*):has-text\("(.*)"\)$/);
|
||
if (m) {
|
||
const candidates = [...document.querySelectorAll(m[1] || '*')];
|
||
el = candidates.find(n => {
|
||
const r = n.getBoundingClientRect();
|
||
const st = getComputedStyle(n);
|
||
return r.width > 0 && r.height > 0
|
||
&& st.display !== 'none' && st.visibility !== 'hidden'
|
||
&& n.textContent?.includes(m[2]);
|
||
}) || null;
|
||
}
|
||
} else {
|
||
const all = [...document.querySelectorAll(sel)];
|
||
el = all.find(n => {
|
||
const r = n.getBoundingClientRect();
|
||
const st = getComputedStyle(n);
|
||
return r.width > 0 && r.height > 0
|
||
&& st.display !== 'none' && st.visibility !== 'hidden';
|
||
}) || null;
|
||
}
|
||
} catch { /* 选择器语法错误,跳过 */ }
|
||
|
||
if (el) {
|
||
const rect = el.getBoundingClientRect();
|
||
return {
|
||
found: true,
|
||
x: rect.x + rect.width / 2,
|
||
y: rect.y + rect.height / 2,
|
||
width: rect.width,
|
||
height: rect.height,
|
||
selector: sel,
|
||
tagName: el.tagName.toLowerCase(),
|
||
};
|
||
}
|
||
}
|
||
return { found: false };
|
||
}, selectors);
|
||
}
|
||
|
||
/**
|
||
* 给坐标加一点随机偏移,模拟人类鼠标不精确的特征
|
||
* @param {number} x
|
||
* @param {number} y
|
||
* @param {number} [jitter=3] - 最大偏移像素
|
||
* @returns {{x: number, y: number}}
|
||
*/
|
||
function humanize(x, y, jitter = 3) {
|
||
return {
|
||
x: x + (Math.random() * 2 - 1) * jitter,
|
||
y: y + (Math.random() * 2 - 1) * jitter,
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 随机延迟(毫秒),模拟人类反应时间
|
||
* @param {number} min
|
||
* @param {number} max
|
||
*/
|
||
function randomDelay(min, max) {
|
||
const ms = min + Math.random() * (max - min);
|
||
return new Promise(r => setTimeout(r, ms));
|
||
}
|
||
|
||
// ─── 公开 API ───
|
||
|
||
return {
|
||
|
||
/**
|
||
* 定位元素 — 通过选择器列表查找第一个可见元素
|
||
* @param {string|string[]} selectors - 单个选择器或候选列表
|
||
* @returns {Promise<{found: boolean, x?: number, y?: number, width?: number, height?: number, selector?: string, tagName?: string}>}
|
||
*/
|
||
async locate(selectors) {
|
||
const sels = Array.isArray(selectors) ? selectors : [selectors];
|
||
return locate(sels);
|
||
},
|
||
|
||
/**
|
||
* 点击元素 — 通过 CDP Input.dispatchMouseEvent 发送真实鼠标事件
|
||
*
|
||
* 生成 isTrusted=true 的原生事件,比 element.click() 更真实
|
||
*
|
||
* @param {string|string[]} selectors - 候选选择器
|
||
* @param {object} [opts]
|
||
* @param {number} [opts.jitter=3] - 坐标随机偏移像素
|
||
* @param {number} [opts.delayBeforeClick=50] - 移动到元素后、点击前的等待(ms)
|
||
* @param {number} [opts.clickDuration=80] - mousedown 到 mouseup 的间隔(ms)
|
||
* @returns {Promise<{ok: boolean, selector?: string, x?: number, y?: number, error?: string}>}
|
||
*/
|
||
async click(selectors, opts = {}) {
|
||
const { jitter = 10, delayBeforeClick = 50, clickDuration = 80 } = opts;
|
||
|
||
const sels = Array.isArray(selectors) ? selectors : [selectors];
|
||
const loc = await locate(sels);
|
||
if (!loc.found) {
|
||
return { ok: false, error: 'element_not_found', triedSelectors: sels };
|
||
}
|
||
|
||
const { x, y } = humanize(loc.x, loc.y, jitter);
|
||
|
||
// 先移动鼠标到目标位置
|
||
await page.mouse.move(x, y);
|
||
await randomDelay(delayBeforeClick * 0.5, delayBeforeClick * 1.5);
|
||
|
||
// mousedown → 短暂停留 → mouseup(模拟真实点击节奏)
|
||
await page.mouse.down();
|
||
await randomDelay(clickDuration * 0.5, clickDuration * 1.5);
|
||
await page.mouse.up();
|
||
|
||
return { ok: true, selector: loc.selector, x, y };
|
||
},
|
||
|
||
/**
|
||
* 输入文本 — 支持两种模式
|
||
*
|
||
* - `'paste'`(默认):通过剪贴板粘贴,整段文本一次性输入,人类也经常这样操作
|
||
* - `'typeChar'`:逐字符键盘输入,每个字符间有随机延迟,模拟打字节奏
|
||
*
|
||
* @param {string} text - 要输入的文本
|
||
* @param {object} [opts]
|
||
* @param {'paste'|'typeChar'} [opts.mode='paste'] - 输入模式
|
||
* @param {number} [opts.minDelay=30] - typeChar 模式下字符间最小间隔(ms)
|
||
* @param {number} [opts.maxDelay=80] - typeChar 模式下字符间最大间隔(ms)
|
||
* @returns {Promise<{ok: boolean, length: number, mode: string}>}
|
||
*/
|
||
async type(text, opts = {}) {
|
||
const { mode = 'paste', minDelay = 30, maxDelay = 80 } = opts;
|
||
|
||
if (mode === 'typeChar') {
|
||
// 逐字符输入,模拟真实打字
|
||
for (const char of text) {
|
||
await page.keyboard.type(char);
|
||
await randomDelay(minDelay, maxDelay);
|
||
}
|
||
} else {
|
||
// 粘贴模式:通过 CDP Input.insertText 一次性输入整段文本
|
||
// 等价于用户从剪贴板粘贴,但不依赖 clipboard API 权限
|
||
const client = page._client();
|
||
await client.send('Input.insertText', { text });
|
||
}
|
||
|
||
return { ok: true, length: text.length, mode };
|
||
},
|
||
|
||
/**
|
||
* 快速设置文本 — 对 contenteditable 元素,用 Ctrl+A → 粘贴的方式填充
|
||
*
|
||
* 比逐字输入快得多,适合长文本(如 prompt)
|
||
* 同样不注入任何对象,通过 evaluate 执行一次性 DOM 操作
|
||
*
|
||
* @param {string|string[]} selectors - 目标输入框选择器
|
||
* @param {string} text - 要填入的文本
|
||
* @returns {Promise<{ok: boolean, selector?: string, error?: string}>}
|
||
*/
|
||
async fill(selectors, text) {
|
||
const sels = Array.isArray(selectors) ? selectors : [selectors];
|
||
const loc = await locate(sels);
|
||
if (!loc.found) {
|
||
return { ok: false, error: 'element_not_found', triedSelectors: sels };
|
||
}
|
||
|
||
// 先点击聚焦目标元素
|
||
const { x, y } = humanize(loc.x, loc.y, 2);
|
||
await page.mouse.click(x, y);
|
||
await randomDelay(100, 200);
|
||
|
||
// 在页面上下文中执行文本填充(一次性,不留痕迹)
|
||
const result = await page.evaluate((selsInner, textInner) => {
|
||
// 重新查找元素(因为 click 后 DOM 可能有变化)
|
||
let el = null;
|
||
for (const sel of selsInner) {
|
||
try {
|
||
const all = [...document.querySelectorAll(sel)];
|
||
el = all.find(n => {
|
||
const r = n.getBoundingClientRect();
|
||
return r.width > 0 && r.height > 0;
|
||
}) || null;
|
||
} catch { /* skip */ }
|
||
if (el) break;
|
||
}
|
||
|
||
if (!el) return { ok: false, error: 'element_lost_after_click' };
|
||
|
||
el.focus();
|
||
|
||
if (el.tagName === 'TEXTAREA' || el.tagName === 'INPUT') {
|
||
// 原生表单元素
|
||
el.value = textInner;
|
||
el.dispatchEvent(new Event('input', { bubbles: true }));
|
||
} else {
|
||
// contenteditable 元素(如 Gemini 的富文本输入框)
|
||
document.execCommand('selectAll', false, null);
|
||
document.execCommand('insertText', false, textInner);
|
||
}
|
||
return { ok: true };
|
||
}, sels, text);
|
||
|
||
return { ...result, selector: loc.selector };
|
||
},
|
||
|
||
/**
|
||
* 在页面上下文中执行一次性查询(不注入任何对象)
|
||
*
|
||
* @param {((...args: any[]) => any)} fn - 要在页面中执行的函数
|
||
* @param {...any} args - 传入函数的参数
|
||
* @returns {Promise<any>}
|
||
*/
|
||
async query(fn, ...args) {
|
||
return page.evaluate(fn, ...args);
|
||
},
|
||
|
||
/**
|
||
* 等待某个条件满足(轮询式)
|
||
*
|
||
* @param {((...args: any[]) => any)} conditionFn - 在页面中执行的判断函数,返回 truthy 值表示满足
|
||
* @param {object} [opts]
|
||
* @param {number} [opts.timeout=30000] - 最大等待时间(ms)
|
||
* @param {number} [opts.interval=500] - 轮询间隔(ms)
|
||
* @param {any[]} [opts.args=[]] - 传入 conditionFn 的参数
|
||
* @returns {Promise<{ok: boolean, result?: any, elapsed: number, error?: string}>}
|
||
*/
|
||
async waitFor(conditionFn, opts = {}) {
|
||
const { timeout = 30_000, interval = 500, args = [] } = opts;
|
||
const start = Date.now();
|
||
|
||
while (Date.now() - start < timeout) {
|
||
try {
|
||
const result = await page.evaluate(conditionFn, ...args);
|
||
if (result) {
|
||
return { ok: true, result, elapsed: Date.now() - start };
|
||
}
|
||
} catch { /* 页面可能还在加载 */ }
|
||
await new Promise(r => setTimeout(r, interval));
|
||
}
|
||
|
||
return { ok: false, error: 'timeout', elapsed: Date.now() - start };
|
||
},
|
||
|
||
/**
|
||
* 等待导航完成
|
||
*
|
||
* @param {object} [opts]
|
||
* @param {string} [opts.waitUntil='networkidle2']
|
||
* @param {number} [opts.timeout=30000]
|
||
* @returns {Promise<void>}
|
||
*/
|
||
async waitForNavigation(opts = {}) {
|
||
const { waitUntil = 'networkidle2', timeout = 30_000 } = opts;
|
||
await page.waitForNavigation({ waitUntil, timeout });
|
||
},
|
||
|
||
/**
|
||
* 按下键盘快捷键
|
||
*
|
||
* @param {string} key - 键名(如 'Enter'、'Tab'、'Escape')
|
||
* @param {object} [opts]
|
||
* @param {number} [opts.delay=50] - keydown 到 keyup 的间隔
|
||
* @returns {Promise<{ok: boolean, key: string}>}
|
||
*/
|
||
async press(key, opts = {}) {
|
||
const { delay = 50 } = opts;
|
||
await page.keyboard.press(key, { delay });
|
||
return { ok: true, key };
|
||
},
|
||
|
||
/**
|
||
* 页面截图(用于调试或状态验证)
|
||
*
|
||
* @param {object} [opts]
|
||
* @param {boolean} [opts.fullPage=false]
|
||
* @param {'png'|'jpeg'|'webp'} [opts.type='png']
|
||
* @param {string} [opts.path] - 保存路径(不传则返回 Buffer)
|
||
* @returns {Promise<Buffer>}
|
||
*/
|
||
async screenshot(opts = {}) {
|
||
return page.screenshot(opts);
|
||
},
|
||
|
||
/**
|
||
* 获取页面当前 URL
|
||
* @returns {string}
|
||
*/
|
||
url() {
|
||
return page.url();
|
||
},
|
||
|
||
/** 底层 page 对象引用 */
|
||
get page() {
|
||
return page;
|
||
},
|
||
};
|
||
}
|