feat(gemini): 新增下载完整尺寸图片功能

This commit is contained in:
knowen
2026-03-20 20:02:19 +08:00
parent 210bf32908
commit af4ae449c8
4 changed files with 211 additions and 37 deletions

View File

@@ -66,6 +66,7 @@ MCP 工具调用(尤其是生图、等待回复等)可能耗时较长。**
| `gemini_upload_images` | 上传图片到输入框(仅上传不发送,可配合 send_message | `images`(路径数组) |
| `gemini_get_images` | 获取会话中所有已加载图片的元信息 | 无 |
| `gemini_extract_image` | 提取指定图片的 base64 并保存到本地 | `imageUrl`(从 get_images 获取) |
| `gemini_download_full_size_image` | 下载完整尺寸的高清图片,默认最新一张,可指定索引 | `index`可选从0开始从旧到新 |
**文字回复提取:**

View File

@@ -94,28 +94,27 @@ async function main() {
}
console.log(`[6] 图片加载完成 (${Date.now() - imgLoadStart}ms)`);
// 7. 获取最新图片并保存到本地
console.log('\n[7] 查找最新生成的图片...');
// 7. 下载完整尺寸的图片(通过 CDP 拦截下载到 outputDir
console.log('\n[7] 下载完整尺寸图片...');
const dlResult = await ops.downloadFullSizeImage();
if (dlResult.ok) {
console.log(`[7] ✅ 完整尺寸图片已保存: ${dlResult.filePath} (原始文件名: ${dlResult.suggestedFilename})`);
} else {
console.warn(`[7] ⚠ 完整尺寸下载失败: ${dlResult.error},回退到 base64 提取...`);
// 回退:用 base64 提取
const imgInfo = await ops.getLatestImage();
console.log('imgInfo:', JSON.stringify(imgInfo, null, 2));
if (imgInfo.ok && imgInfo.src) {
console.log(`[7] 找到图片 (${imgInfo.width}x${imgInfo.height}, isNew=${imgInfo.isNew})`);
// 提取 base64 数据
console.log(`[7] 提取图片数据 (src=${imgInfo.src})...`);
const b64Result = await ops.extractImageBase64(imgInfo.src);
if (b64Result.ok && b64Result.dataUrl) {
// dataUrl 格式: data:image/png;base64,iVBOR...
const matches = b64Result.dataUrl.match(/^data:image\/(\w+);base64,(.+)$/);
if (matches) {
const ext = matches[1] === 'jpeg' ? 'jpg' : matches[1];
const base64Data = matches[2];
const buffer = Buffer.from(base64Data, 'base64');
// 保存到 ./gemini-image/
const outputDir = './gemini-image';
if (!existsSync(outputDir)) {
mkdirSync(outputDir, { recursive: true });
@@ -124,7 +123,7 @@ async function main() {
const filepath = join(outputDir, filename);
writeFileSync(filepath, buffer);
console.log(`[7] ✅ 图片已保存: ${filepath} (${(buffer.length / 1024).toFixed(1)} KB, method=${b64Result.method})`);
console.log(`[7] ✅ 图片已保存(base64回退): ${filepath} (${(buffer.length / 1024).toFixed(1)} KB, method=${b64Result.method})`);
} else {
console.warn('[7] ⚠ dataUrl 格式无法解析');
}
@@ -135,6 +134,7 @@ async function main() {
console.log('[7] 未找到图片(可能本次回答不含图片)');
}
}
}
} catch (err) {
console.error('Error:', err);

View File

@@ -7,6 +7,8 @@
*/
import { createOperator } from './operator.js';
import { sleep } from './util.js';
import config from './config.js';
import { mkdirSync } from 'node:fs';
// ── Gemini 页面元素选择器 ──
const SELECTORS = {
@@ -691,6 +693,131 @@ export function createOps(page) {
});
},
/**
* 下载完整尺寸的图片
*
* 流程:
* 1. 定位目标图片,获取坐标用于 hover
* 2. 通过 CDP Browser.setDownloadBehavior 将下载目录重定向到 config.outputDir
* 3. hover 触发工具栏 → 点击"下载完整尺寸"按钮
* 4. 监听 CDP Browser.downloadWillBegin / Browser.downloadProgress 等待下载完成
* 5. 返回实际保存的文件路径
*
* 按钮选择器button[data-test-id="download-enhanced-image-button"]
*
* @param {object} [options]
* @param {number} [options.index] - 图片索引从0开始从旧到新不传则取最新一张
* @param {number} [options.timeout=30000] - 下载超时时间ms
* @returns {Promise<{ok: boolean, filePath?: string, suggestedFilename?: string, src?: string, index?: number, total?: number, error?: string}>}
*/
async downloadFullSizeImage({ index, timeout = 30_000 } = {}) {
// 1. 定位目标图片,获取其坐标用于 hover
const imgInfo = await op.query((targetIndex) => {
const imgs = [...document.querySelectorAll('img.image.loaded')];
if (!imgs.length) return { ok: false, error: 'no_loaded_images', total: 0 };
const i = targetIndex == null ? imgs.length - 1 : targetIndex;
if (i < 0 || i >= imgs.length) {
return { ok: false, error: 'index_out_of_range', total: imgs.length, requestedIndex: i };
}
const img = imgs[i];
const rect = img.getBoundingClientRect();
return {
ok: true,
x: rect.left + rect.width / 2,
y: rect.top + rect.height / 2,
src: img.src || '',
index: i,
total: imgs.length,
};
}, index);
if (!imgInfo.ok) return imgInfo;
// 2. 通过 CDP 设置下载路径到 config.outputDir
const downloadDir = config.outputDir;
mkdirSync(downloadDir, { recursive: true });
const client = page._client();
await client.send('Browser.setDownloadBehavior', {
behavior: 'allowAndName',
downloadPath: downloadDir,
eventsEnabled: true,
});
// 3. 设置下载监听(在点击前注册,避免遗漏事件)
const downloadPromise = new Promise((resolve, reject) => {
const timer = setTimeout(() => {
client.off('Browser.downloadWillBegin', onBegin);
client.off('Browser.downloadProgress', onProgress);
reject(new Error('download_timeout'));
}, timeout);
let guid = null;
let suggestedFilename = null;
function onBegin(evt) {
guid = evt.guid;
suggestedFilename = evt.suggestedFilename || null;
}
function onProgress(evt) {
if (evt.guid !== guid) return;
if (evt.state === 'completed') {
clearTimeout(timer);
client.off('Browser.downloadWillBegin', onBegin);
client.off('Browser.downloadProgress', onProgress);
resolve({ suggestedFilename });
} else if (evt.state === 'canceled') {
clearTimeout(timer);
client.off('Browser.downloadWillBegin', onBegin);
client.off('Browser.downloadProgress', onProgress);
reject(new Error('download_canceled'));
}
}
client.on('Browser.downloadWillBegin', onBegin);
client.on('Browser.downloadProgress', onProgress);
});
// 4. hover 到图片上,触发工具栏显示
await page.mouse.move(imgInfo.x, imgInfo.y);
await sleep(250);
// 5. 点击"下载完整尺寸"按钮
const btnSelector = 'button[data-test-id="download-enhanced-image-button"]';
const clickResult = await op.click(btnSelector);
if (!clickResult.ok) {
return { ok: false, error: 'full_size_download_btn_not_found', src: imgInfo.src, index: imgInfo.index, total: imgInfo.total };
}
// 6. 等待下载完成
try {
const { suggestedFilename } = await downloadPromise;
const { join } = await import('node:path');
const filePath = join(downloadDir, suggestedFilename || `gemini_fullsize_${Date.now()}.png`);
return {
ok: true,
filePath,
suggestedFilename,
src: imgInfo.src,
index: imgInfo.index,
total: imgInfo.total,
};
} catch (err) {
return {
ok: false,
error: err.message,
src: imgInfo.src,
index: imgInfo.index,
total: imgInfo.total,
};
}
},
// ─── 高层组合操作 ───
/**
@@ -829,11 +956,11 @@ export function createOps(page) {
* @param {object} [opts]
* @param {number} [opts.timeout=120000]
* @param {boolean} [opts.newChat=true]
* @param {boolean} [opts.highRes=false]
* @param {boolean} [opts.fullSize=false] - true 时通过 CDP 拦截下载完整尺寸原图到 outputDirfalse 时提取页面预览图 base64
* @param {(status: object) => void} [opts.onPoll]
*/
async generateImage(prompt, opts = {}) {
const { timeout = 120_000, newChat = true, highRes = false, onPoll } = opts;
const { timeout = 120_000, newChat = true, fullSize = false, onPoll } = opts;
// 1. 可选:新建会话
if (newChat) {
@@ -864,10 +991,12 @@ export function createOps(page) {
}
// 5. 提取 / 下载
if (highRes) {
const dlResult = await this.downloadLatestImage();
return { ok: dlResult.ok, method: 'download', elapsed: waitResult.elapsed, ...dlResult };
if (fullSize) {
// 完整尺寸下载:通过 CDP 拦截,文件保存到 config.outputDir
const dlResult = await this.downloadFullSizeImage();
return { ok: dlResult.ok, method: 'fullSize', elapsed: waitResult.elapsed, ...dlResult };
} else {
// 低分辨率:提取页面预览图的 base64
const b64Result = await this.extractImageBase64(imgInfo.src);
return { ok: b64Result.ok, method: b64Result.method, elapsed: waitResult.elapsed, ...b64Result };
}

View File

@@ -72,7 +72,7 @@ server.registerTool(
// 如果上传了参考图且已手动新建会话,则 generateImage 内部不再新建
const needNewChat = referenceImages.length > 0 ? false : newSession;
const result = await ops.generateImage(prompt, { newChat: needNewChat });
const result = await ops.generateImage(prompt, { newChat: needNewChat, fullSize });
// 执行完毕立刻断开,交还给 Daemon 倒计时
disconnect();
@@ -84,7 +84,17 @@ server.registerTool(
};
}
// 将 base64 写入本地文件
// 完整尺寸下载模式:文件已由 CDP 保存到 outputDir
if (result.method === 'fullSize') {
console.error(`[mcp] 完整尺寸图片已保存至 ${result.filePath}`);
return {
content: [
{ type: "text", text: `图片生成成功!完整尺寸原图已保存至: ${result.filePath}` },
],
};
}
// 低分辨率模式base64 提取,写入本地文件
const base64Data = result.dataUrl.split(',')[1];
const mimeMatch = result.dataUrl.match(/^data:(image\/\w+);/);
const ext = mimeMatch ? mimeMatch[1].split('/')[1] : 'png';
@@ -338,6 +348,40 @@ server.registerTool(
}
);
// ─── 完整尺寸图片下载 ───
server.registerTool(
"gemini_download_full_size_image",
{
description: "下载完整尺寸的图片(高清大图)。默认下载最新一张,也可通过 index 指定第几张从0开始从旧到新排列",
inputSchema: {
index: z.number().int().min(0).optional().describe(
"图片索引从0开始按从旧到新排列。不传则下载最新一张"
),
},
},
async ({ index }) => {
try {
const { ops } = await createGeminiSession();
const result = await ops.downloadFullSizeImage({ index });
disconnect();
if (!result.ok) {
let msg = `下载完整尺寸图片失败: ${result.error}`;
if (result.total != null) msg += `(共 ${result.total} 张图片)`;
if (result.error === 'index_out_of_range') msg += `,请求的索引: ${result.requestedIndex}`;
return { content: [{ type: "text", text: msg }], isError: true };
}
return {
content: [{ type: "text", text: `完整尺寸图片已下载(第 ${result.index + 1} 张,共 ${result.total} 张)\n文件路径: ${result.filePath}\n原始文件名: ${result.suggestedFilename || '未知'}` }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 文字回复获取 ───
server.registerTool(