import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import { writeFileSync, mkdirSync } from "node:fs"; import { join } from "node:path"; // 复用已有的统一入口,不修改原有逻辑 import { createGeminiSession, disconnect } from './index.js'; import config from './config.js'; import { sleep } from './util.js'; const server = new McpServer({ name: "gemini-mcp-server", version: "1.0.0", }); // 注册工具 server.registerTool( "gemini_generate_image", { description: "调用后台的 Gemini 浏览器会话生成高质量图片", inputSchema: { prompt: z.string().describe("图片的详细描述词"), newSession: z.boolean().default(false).describe( "是否新建会话。true= 开启全新对话; false = 复用当前已有的 Gemini 会话页" ), referenceImages: z.array(z.string()).default([]).describe( "参考图片的本地文件路径数组,例如 [\"/path/to/ref1.png\", \"/path/to/ref2.jpg\"]。图片会在发送 prompt 前上传到 Gemini 输入框" ), }, }, async ({ prompt, newSession, referenceImages }) => { try { const { ops } = await createGeminiSession(); // 前置检查:确保已登录 const loginCheck = await ops.checkLogin(); if (!loginCheck.ok || !loginCheck.loggedIn) { disconnect(); return { content: [{ type: "text", text: `Gemini 未登录 Google 账号,请先在浏览器中完成登录后重试` }], isError: true, }; } // 需要先处理新建会话(如果需要),因为 generateImage 内部的 newChat 会在上传之后才执行 if (newSession) { await ops.click('newChatBtn'); await sleep(250); } // 如果有参考图,先上传 if (referenceImages.length > 0) { for (const imgPath of referenceImages) { console.error(`[mcp] 正在上传参考图: ${imgPath}`); const uploadResult = await ops.uploadImage(imgPath); if (!uploadResult.ok) { return { content: [{ type: "text", text: `参考图上传失败: ${imgPath}\n错误: ${uploadResult.error}` }], isError: true, }; } } console.error(`[mcp] ${referenceImages.length} 张参考图上传完成`); } // 如果上传了参考图且已手动新建会话,则 generateImage 内部不再新建 const needNewChat = referenceImages.length > 0 ? false : newSession; const result = await ops.generateImage(prompt, { newChat: needNewChat }); // 执行完毕立刻断开,交还给 Daemon 倒计时 disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `生成失败: ${result.error}` }], isError: true, }; } // 将 base64 写入本地文件 const base64Data = result.dataUrl.split(',')[1]; const mimeMatch = result.dataUrl.match(/^data:(image\/\w+);/); const ext = mimeMatch ? mimeMatch[1].split('/')[1] : 'png'; mkdirSync(config.outputDir, { recursive: true }); const filename = `gemini_${Date.now()}.${ext}`; const filePath = join(config.outputDir, filename); writeFileSync(filePath, Buffer.from(base64Data, 'base64')); console.error(`[mcp] 图片已保存至 ${filePath}`); return { content: [ { type: "text", text: `图片生成成功!已保存至: ${filePath}` }, { type: "image", data: base64Data, mimeType: mimeMatch ? mimeMatch[1] : "image/png", }, ], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true, }; } } ); // ─── 会话管理 ─── // 新建会话 server.registerTool( "gemini_new_chat", { description: "在 Gemini 中新建一个空白对话", inputSchema: {}, }, async () => { try { const { ops } = await createGeminiSession(); const result = await ops.click('newChatBtn'); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `新建会话失败: ${result.error}` }], isError: true }; } return { content: [{ type: "text", text: "已新建 Gemini 会话" }] }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // 临时会话 server.registerTool( "gemini_temp_chat", { description: "进入 Gemini 临时对话模式(不保留历史记录,适合隐私场景)。注意:临时会话按钮仅在空白新会话页面可见,本工具会自动先新建会话再进入临时模式", inputSchema: {}, }, async () => { try { const { ops } = await createGeminiSession(); // 临时会话按钮仅在空白新会话页可见,当前会话有内容时会被隐藏 // 因此必须先新建会话,确保页面回到空白状态 const newChatResult = await ops.click('newChatBtn'); if (!newChatResult.ok) { disconnect(); return { content: [{ type: "text", text: `前置步骤失败:无法新建会话(临时会话按钮仅在空白页可见): ${newChatResult.error}` }], isError: true }; } // 等待新会话页面稳定 await sleep(250); const result = await ops.clickTempChat(); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `进入临时会话失败: ${result.error}` }], isError: true }; } return { content: [{ type: "text", text: "已进入临时对话模式(自动先新建了空白会话)" }] }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 模型切换 ─── server.registerTool( "gemini_switch_model", { description: "切换 Gemini 模型(pro / quick / think)", inputSchema: { model: z.enum(["pro", "quick", "think"]).describe("目标模型:pro=高质量, quick=快速, think=深度思考"), }, }, async ({ model }) => { try { const { ops } = await createGeminiSession(); const result = await ops.switchToModel(model); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `切换模型失败: ${result.error}` }], isError: true }; } return { content: [{ type: "text", text: `模型已切换到 ${model}${result.previousModel ? `(之前是 ${result.previousModel})` : ''}` }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 文本对话 ─── server.registerTool( "gemini_send_message", { description: "向 Gemini 发送文本消息并等待回答完成(不提取图片,纯文本交互)", inputSchema: { message: z.string().describe("要发送给 Gemini 的文本内容"), timeout: z.number().default(120000).describe("等待回答完成的超时时间(毫秒),默认 120000"), }, }, async ({ message, timeout }) => { try { const { ops } = await createGeminiSession(); const result = await ops.sendAndWait(message, { timeout }); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `发送失败: ${result.error},耗时 ${result.elapsed}ms` }], isError: true }; } return { content: [{ type: "text", text: `消息已发送并等待完成,耗时 ${result.elapsed}ms` }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 图片上传 ─── server.registerTool( "gemini_upload_images", { description: "向 Gemini 当前输入框上传图片(仅上传,不发送消息),可配合 gemini_send_message 组合使用", inputSchema: { images: z.array(z.string()).min(1).describe("本地图片文件路径数组"), }, }, async ({ images }) => { try { const { ops } = await createGeminiSession(); const results = []; for (const imgPath of images) { console.error(`[mcp] 正在上传: ${imgPath}`); const r = await ops.uploadImage(imgPath); results.push({ path: imgPath, ...r }); if (!r.ok) { disconnect(); return { content: [{ type: "text", text: `上传失败: ${imgPath}\n错误: ${r.error}\n\n已成功上传 ${results.filter(x => x.ok).length}/${images.length} 张` }], isError: true, }; } } disconnect(); return { content: [{ type: "text", text: `全部 ${images.length} 张图片上传成功` }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 图片获取 ─── server.registerTool( "gemini_get_images", { description: "获取当前 Gemini 会话中所有已加载的图片列表(不下载,仅返回元信息)", inputSchema: {}, }, async () => { try { const { ops } = await createGeminiSession(); const result = await ops.getAllImages(); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `未找到图片: ${result.error}` }], isError: true }; } return { content: [{ type: "text", text: JSON.stringify({ total: result.total, newCount: result.newCount, images: result.images }, null, 2) }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); server.registerTool( "gemini_extract_image", { description: "提取指定图片的 base64 数据并保存到本地文件。可从 gemini_get_images 获取图片 src URL", inputSchema: { imageUrl: z.string().describe("图片的 src URL(从 gemini_get_images 结果中获取)"), }, }, async ({ imageUrl }) => { try { const { ops } = await createGeminiSession(); const result = await ops.extractImageBase64(imageUrl); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `图片提取失败: ${result.error}${result.detail ? ' — ' + result.detail : ''}` }], isError: true }; } // 保存到本地 const base64Data = result.dataUrl.split(',')[1]; const mimeMatch = result.dataUrl.match(/^data:(image\/\w+);/); const ext = mimeMatch ? mimeMatch[1].split('/')[1] : 'png'; mkdirSync(config.outputDir, { recursive: true }); const filename = `gemini_${Date.now()}.${ext}`; const filePath = join(config.outputDir, filename); writeFileSync(filePath, Buffer.from(base64Data, 'base64')); console.error(`[mcp] 图片已保存至 ${filePath}`); return { content: [ { type: "text", text: `图片提取成功,已保存至: ${filePath}` }, { type: "image", data: base64Data, mimeType: mimeMatch ? mimeMatch[1] : "image/png" }, ], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 文字回复获取 ─── server.registerTool( "gemini_get_all_text_responses", { description: "获取当前 Gemini 会话中所有文字回复内容(仅文字,不含图片等其他类型回复)", inputSchema: {}, }, async () => { try { const { ops } = await createGeminiSession(); const result = await ops.getAllTextResponses(); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `未找到回复: ${result.error}` }], isError: true }; } return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); server.registerTool( "gemini_get_latest_text_response", { description: "获取当前 Gemini 会话中最新一条文字回复(仅文字,不含图片等其他类型回复)", inputSchema: {}, }, async () => { try { const { ops } = await createGeminiSession(); const result = await ops.getLatestTextResponse(); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `未找到回复: ${result.error}` }], isError: true }; } return { content: [{ type: "text", text: result.text }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 登录状态检查 ─── server.registerTool( "gemini_check_login", { description: "检查当前 Gemini 页面是否已登录 Google 账号", inputSchema: {}, }, async () => { try { const { ops } = await createGeminiSession(); const result = await ops.checkLogin(); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `检测失败: ${result.error}` }], isError: true }; } const status = result.loggedIn ? "已登录" : "未登录"; return { content: [{ type: "text", text: `${status}(导航栏文本: "${result.barText}")` }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 页面状态 & 恢复 ─── server.registerTool( "gemini_probe", { description: "探测 Gemini 页面各元素状态(输入框、按钮、当前模型等),用于调试和排查问题", inputSchema: {}, }, async () => { try { const { ops } = await createGeminiSession(); const result = await ops.probe(); disconnect(); return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }], }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); server.registerTool( "gemini_reload_page", { description: "刷新 Gemini 页面(页面卡住或状态异常时使用)", inputSchema: { timeout: z.number().default(30000).describe("等待页面重新加载完成的超时(毫秒),默认 30000"), }, }, async ({ timeout }) => { try { const { ops } = await createGeminiSession(); const result = await ops.reloadPage({ timeout }); disconnect(); if (!result.ok) { return { content: [{ type: "text", text: `页面刷新失败: ${result.error}` }], isError: true }; } return { content: [{ type: "text", text: `页面刷新完成,耗时 ${result.elapsed}ms` }] }; } catch (err) { return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true }; } } ); // ─── 浏览器信息 ─── // 查询浏览器信息 server.registerTool( "gemini_browser_info", { description: "获取 Gemini 浏览器会话的连接信息(CDP 端口、WebSocket 地址、Daemon 状态等),方便外部工具直连浏览器", inputSchema: {}, }, async () => { const daemonUrl = `http://127.0.0.1:${config.daemonPort}`; try { // 1. 检查 Daemon 健康状态 const healthRes = await fetch(`${daemonUrl}/health`, { signal: AbortSignal.timeout(3000) }); const health = await healthRes.json(); if (!health.ok) { return { content: [{ type: "text", text: "Daemon 未就绪,浏览器可能未启动。请先调用 gemini_generate_image 触发自动启动。" }], isError: true, }; } // 2. 获取浏览器连接信息 const acquireRes = await fetch(`${daemonUrl}/browser/acquire`, { signal: AbortSignal.timeout(5000) }); const acquire = await acquireRes.json(); const info = { daemon: { url: daemonUrl, port: config.daemonPort, status: "running", }, browser: { cdpPort: config.browserDebugPort, wsEndpoint: acquire.wsEndpoint || null, pid: acquire.pid || null, headless: config.browserHeadless, }, config: { protocolTimeout: config.browserProtocolTimeout, outputDir: config.outputDir, daemonTTL: config.daemonTTL, }, }; return { content: [{ type: "text", text: JSON.stringify(info, null, 2) }], }; } catch (err) { return { content: [{ type: "text", text: `无法连接 Daemon (${daemonUrl}),浏览器可能未启动。\n错误: ${err.message}\n\n提示: 请先调用 gemini_generate_image 触发自动启动,或手动运行 npm run daemon`, }], isError: true, }; } } ); // 启动标准输入输出通信 async function run() { const transport = new StdioServerTransport(); await server.connect(transport); console.error("Gemini MCP Server running on stdio"); // 必须用 console.error,避免污染 stdio } run().catch(console.error);