531 lines
17 KiB
JavaScript
531 lines
17 KiB
JavaScript
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||
import { z } from "zod";
|
||
import { writeFileSync, mkdirSync } from "node:fs";
|
||
import { join } from "node:path";
|
||
|
||
// 复用已有的统一入口,不修改原有逻辑
|
||
import { createGeminiSession, disconnect } from './index.js';
|
||
import config from './config.js';
|
||
import { sleep } from './util.js';
|
||
|
||
const server = new McpServer({
|
||
name: "gemini-mcp-server",
|
||
version: "1.0.0",
|
||
});
|
||
|
||
// 注册工具
|
||
server.registerTool(
|
||
"gemini_generate_image",
|
||
{
|
||
description: "调用后台的 Gemini 浏览器会话生成高质量图片",
|
||
inputSchema: {
|
||
prompt: z.string().describe("图片的详细描述词"),
|
||
newSession: z.boolean().default(false).describe(
|
||
"是否新建会话。true= 开启全新对话; false = 复用当前已有的 Gemini 会话页"
|
||
),
|
||
referenceImages: z.array(z.string()).default([]).describe(
|
||
"参考图片的本地文件路径数组,例如 [\"/path/to/ref1.png\", \"/path/to/ref2.jpg\"]。图片会在发送 prompt 前上传到 Gemini 输入框"
|
||
),
|
||
},
|
||
},
|
||
async ({ prompt, newSession, referenceImages }) => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
|
||
// 前置检查:确保已登录
|
||
const loginCheck = await ops.checkLogin();
|
||
if (!loginCheck.ok || !loginCheck.loggedIn) {
|
||
disconnect();
|
||
return {
|
||
content: [{ type: "text", text: `Gemini 未登录 Google 账号,请先在浏览器中完成登录后重试` }],
|
||
isError: true,
|
||
};
|
||
}
|
||
// 需要先处理新建会话(如果需要),因为 generateImage 内部的 newChat 会在上传之后才执行
|
||
if (newSession) {
|
||
await ops.click('newChatBtn');
|
||
await sleep(250);
|
||
}
|
||
|
||
// 如果有参考图,先上传
|
||
if (referenceImages.length > 0) {
|
||
for (const imgPath of referenceImages) {
|
||
console.error(`[mcp] 正在上传参考图: ${imgPath}`);
|
||
const uploadResult = await ops.uploadImage(imgPath);
|
||
if (!uploadResult.ok) {
|
||
return {
|
||
content: [{ type: "text", text: `参考图上传失败: ${imgPath}\n错误: ${uploadResult.error}` }],
|
||
isError: true,
|
||
};
|
||
}
|
||
}
|
||
console.error(`[mcp] ${referenceImages.length} 张参考图上传完成`);
|
||
}
|
||
|
||
// 如果上传了参考图且已手动新建会话,则 generateImage 内部不再新建
|
||
const needNewChat = referenceImages.length > 0 ? false : newSession;
|
||
const result = await ops.generateImage(prompt, { newChat: needNewChat });
|
||
|
||
// 执行完毕立刻断开,交还给 Daemon 倒计时
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return {
|
||
content: [{ type: "text", text: `生成失败: ${result.error}` }],
|
||
isError: true,
|
||
};
|
||
}
|
||
|
||
// 将 base64 写入本地文件
|
||
const base64Data = result.dataUrl.split(',')[1];
|
||
const mimeMatch = result.dataUrl.match(/^data:(image\/\w+);/);
|
||
const ext = mimeMatch ? mimeMatch[1].split('/')[1] : 'png';
|
||
|
||
mkdirSync(config.outputDir, { recursive: true });
|
||
const filename = `gemini_${Date.now()}.${ext}`;
|
||
const filePath = join(config.outputDir, filename);
|
||
writeFileSync(filePath, Buffer.from(base64Data, 'base64'));
|
||
|
||
console.error(`[mcp] 图片已保存至 ${filePath}`);
|
||
|
||
return {
|
||
content: [
|
||
{ type: "text", text: `图片生成成功!已保存至: ${filePath}` },
|
||
{
|
||
type: "image",
|
||
data: base64Data,
|
||
mimeType: mimeMatch ? mimeMatch[1] : "image/png",
|
||
},
|
||
],
|
||
};
|
||
} catch (err) {
|
||
return {
|
||
content: [{ type: "text", text: `执行崩溃: ${err.message}` }],
|
||
isError: true,
|
||
};
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 会话管理 ───
|
||
|
||
// 新建会话
|
||
server.registerTool(
|
||
"gemini_new_chat",
|
||
{
|
||
description: "在 Gemini 中新建一个空白对话",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.click('newChatBtn');
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `新建会话失败: ${result.error}` }], isError: true };
|
||
}
|
||
return { content: [{ type: "text", text: "已新建 Gemini 会话" }] };
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// 临时会话
|
||
server.registerTool(
|
||
"gemini_temp_chat",
|
||
{
|
||
description: "进入 Gemini 临时对话模式(不保留历史记录,适合隐私场景)。注意:临时会话按钮仅在空白新会话页面可见,本工具会自动先新建会话再进入临时模式",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
|
||
// 临时会话按钮仅在空白新会话页可见,当前会话有内容时会被隐藏
|
||
// 因此必须先新建会话,确保页面回到空白状态
|
||
const newChatResult = await ops.click('newChatBtn');
|
||
if (!newChatResult.ok) {
|
||
disconnect();
|
||
return { content: [{ type: "text", text: `前置步骤失败:无法新建会话(临时会话按钮仅在空白页可见): ${newChatResult.error}` }], isError: true };
|
||
}
|
||
// 等待新会话页面稳定
|
||
await sleep(250);
|
||
|
||
const result = await ops.clickTempChat();
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `进入临时会话失败: ${result.error}` }], isError: true };
|
||
}
|
||
return { content: [{ type: "text", text: "已进入临时对话模式(自动先新建了空白会话)" }] };
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 模型切换 ───
|
||
|
||
server.registerTool(
|
||
"gemini_switch_model",
|
||
{
|
||
description: "切换 Gemini 模型(pro / quick / think)",
|
||
inputSchema: {
|
||
model: z.enum(["pro", "quick", "think"]).describe("目标模型:pro=高质量, quick=快速, think=深度思考"),
|
||
},
|
||
},
|
||
async ({ model }) => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.switchToModel(model);
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `切换模型失败: ${result.error}` }], isError: true };
|
||
}
|
||
return {
|
||
content: [{ type: "text", text: `模型已切换到 ${model}${result.previousModel ? `(之前是 ${result.previousModel})` : ''}` }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 文本对话 ───
|
||
|
||
server.registerTool(
|
||
"gemini_send_message",
|
||
{
|
||
description: "向 Gemini 发送文本消息并等待回答完成(不提取图片,纯文本交互)",
|
||
inputSchema: {
|
||
message: z.string().describe("要发送给 Gemini 的文本内容"),
|
||
timeout: z.number().default(120000).describe("等待回答完成的超时时间(毫秒),默认 120000"),
|
||
},
|
||
},
|
||
async ({ message, timeout }) => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.sendAndWait(message, { timeout });
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `发送失败: ${result.error},耗时 ${result.elapsed}ms` }], isError: true };
|
||
}
|
||
return {
|
||
content: [{ type: "text", text: `消息已发送并等待完成,耗时 ${result.elapsed}ms` }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 图片上传 ───
|
||
|
||
server.registerTool(
|
||
"gemini_upload_images",
|
||
{
|
||
description: "向 Gemini 当前输入框上传图片(仅上传,不发送消息),可配合 gemini_send_message 组合使用",
|
||
inputSchema: {
|
||
images: z.array(z.string()).min(1).describe("本地图片文件路径数组"),
|
||
},
|
||
},
|
||
async ({ images }) => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
|
||
const results = [];
|
||
for (const imgPath of images) {
|
||
console.error(`[mcp] 正在上传: ${imgPath}`);
|
||
const r = await ops.uploadImage(imgPath);
|
||
results.push({ path: imgPath, ...r });
|
||
if (!r.ok) {
|
||
disconnect();
|
||
return {
|
||
content: [{ type: "text", text: `上传失败: ${imgPath}\n错误: ${r.error}\n\n已成功上传 ${results.filter(x => x.ok).length}/${images.length} 张` }],
|
||
isError: true,
|
||
};
|
||
}
|
||
}
|
||
|
||
disconnect();
|
||
return {
|
||
content: [{ type: "text", text: `全部 ${images.length} 张图片上传成功` }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 图片获取 ───
|
||
|
||
server.registerTool(
|
||
"gemini_get_images",
|
||
{
|
||
description: "获取当前 Gemini 会话中所有已加载的图片列表(不下载,仅返回元信息)",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.getAllImages();
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `未找到图片: ${result.error}` }], isError: true };
|
||
}
|
||
|
||
return {
|
||
content: [{ type: "text", text: JSON.stringify({ total: result.total, newCount: result.newCount, images: result.images }, null, 2) }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
server.registerTool(
|
||
"gemini_extract_image",
|
||
{
|
||
description: "提取指定图片的 base64 数据并保存到本地文件。可从 gemini_get_images 获取图片 src URL",
|
||
inputSchema: {
|
||
imageUrl: z.string().describe("图片的 src URL(从 gemini_get_images 结果中获取)"),
|
||
},
|
||
},
|
||
async ({ imageUrl }) => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.extractImageBase64(imageUrl);
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `图片提取失败: ${result.error}${result.detail ? ' — ' + result.detail : ''}` }], isError: true };
|
||
}
|
||
|
||
// 保存到本地
|
||
const base64Data = result.dataUrl.split(',')[1];
|
||
const mimeMatch = result.dataUrl.match(/^data:(image\/\w+);/);
|
||
const ext = mimeMatch ? mimeMatch[1].split('/')[1] : 'png';
|
||
|
||
mkdirSync(config.outputDir, { recursive: true });
|
||
const filename = `gemini_${Date.now()}.${ext}`;
|
||
const filePath = join(config.outputDir, filename);
|
||
writeFileSync(filePath, Buffer.from(base64Data, 'base64'));
|
||
|
||
console.error(`[mcp] 图片已保存至 ${filePath}`);
|
||
|
||
return {
|
||
content: [
|
||
{ type: "text", text: `图片提取成功,已保存至: ${filePath}` },
|
||
{ type: "image", data: base64Data, mimeType: mimeMatch ? mimeMatch[1] : "image/png" },
|
||
],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 文字回复获取 ───
|
||
|
||
server.registerTool(
|
||
"gemini_get_all_text_responses",
|
||
{
|
||
description: "获取当前 Gemini 会话中所有文字回复内容(仅文字,不含图片等其他类型回复)",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.getAllTextResponses();
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `未找到回复: ${result.error}` }], isError: true };
|
||
}
|
||
|
||
return {
|
||
content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
server.registerTool(
|
||
"gemini_get_latest_text_response",
|
||
{
|
||
description: "获取当前 Gemini 会话中最新一条文字回复(仅文字,不含图片等其他类型回复)",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.getLatestTextResponse();
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `未找到回复: ${result.error}` }], isError: true };
|
||
}
|
||
|
||
return {
|
||
content: [{ type: "text", text: result.text }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 登录状态检查 ───
|
||
|
||
server.registerTool(
|
||
"gemini_check_login",
|
||
{
|
||
description: "检查当前 Gemini 页面是否已登录 Google 账号",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.checkLogin();
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `检测失败: ${result.error}` }], isError: true };
|
||
}
|
||
|
||
const status = result.loggedIn ? "已登录" : "未登录";
|
||
return {
|
||
content: [{ type: "text", text: `${status}(导航栏文本: "${result.barText}")` }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 页面状态 & 恢复 ───
|
||
|
||
server.registerTool(
|
||
"gemini_probe",
|
||
{
|
||
description: "探测 Gemini 页面各元素状态(输入框、按钮、当前模型等),用于调试和排查问题",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.probe();
|
||
disconnect();
|
||
|
||
return {
|
||
content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
|
||
};
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
server.registerTool(
|
||
"gemini_reload_page",
|
||
{
|
||
description: "刷新 Gemini 页面(页面卡住或状态异常时使用)",
|
||
inputSchema: {
|
||
timeout: z.number().default(30000).describe("等待页面重新加载完成的超时(毫秒),默认 30000"),
|
||
},
|
||
},
|
||
async ({ timeout }) => {
|
||
try {
|
||
const { ops } = await createGeminiSession();
|
||
const result = await ops.reloadPage({ timeout });
|
||
disconnect();
|
||
|
||
if (!result.ok) {
|
||
return { content: [{ type: "text", text: `页面刷新失败: ${result.error}` }], isError: true };
|
||
}
|
||
return { content: [{ type: "text", text: `页面刷新完成,耗时 ${result.elapsed}ms` }] };
|
||
} catch (err) {
|
||
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
|
||
}
|
||
}
|
||
);
|
||
|
||
// ─── 浏览器信息 ───
|
||
|
||
// 查询浏览器信息
|
||
server.registerTool(
|
||
"gemini_browser_info",
|
||
{
|
||
description: "获取 Gemini 浏览器会话的连接信息(CDP 端口、WebSocket 地址、Daemon 状态等),方便外部工具直连浏览器",
|
||
inputSchema: {},
|
||
},
|
||
async () => {
|
||
const daemonUrl = `http://127.0.0.1:${config.daemonPort}`;
|
||
|
||
try {
|
||
// 1. 检查 Daemon 健康状态
|
||
const healthRes = await fetch(`${daemonUrl}/health`, { signal: AbortSignal.timeout(3000) });
|
||
const health = await healthRes.json();
|
||
|
||
if (!health.ok) {
|
||
return {
|
||
content: [{ type: "text", text: "Daemon 未就绪,浏览器可能未启动。请先调用 gemini_generate_image 触发自动启动。" }],
|
||
isError: true,
|
||
};
|
||
}
|
||
|
||
// 2. 获取浏览器连接信息
|
||
const acquireRes = await fetch(`${daemonUrl}/browser/acquire`, { signal: AbortSignal.timeout(5000) });
|
||
const acquire = await acquireRes.json();
|
||
|
||
const info = {
|
||
daemon: {
|
||
url: daemonUrl,
|
||
port: config.daemonPort,
|
||
status: "running",
|
||
},
|
||
browser: {
|
||
cdpPort: config.browserDebugPort,
|
||
wsEndpoint: acquire.wsEndpoint || null,
|
||
pid: acquire.pid || null,
|
||
headless: config.browserHeadless,
|
||
},
|
||
config: {
|
||
protocolTimeout: config.browserProtocolTimeout,
|
||
outputDir: config.outputDir,
|
||
daemonTTL: config.daemonTTL,
|
||
},
|
||
};
|
||
|
||
return {
|
||
content: [{ type: "text", text: JSON.stringify(info, null, 2) }],
|
||
};
|
||
} catch (err) {
|
||
return {
|
||
content: [{
|
||
type: "text",
|
||
text: `无法连接 Daemon (${daemonUrl}),浏览器可能未启动。\n错误: ${err.message}\n\n提示: 请先调用 gemini_generate_image 触发自动启动,或手动运行 npm run daemon`,
|
||
}],
|
||
isError: true,
|
||
};
|
||
}
|
||
}
|
||
);
|
||
|
||
// 启动标准输入输出通信
|
||
async function run() {
|
||
const transport = new StdioServerTransport();
|
||
await server.connect(transport);
|
||
console.error("Gemini MCP Server running on stdio"); // 必须用 console.error,避免污染 stdio
|
||
}
|
||
|
||
run().catch(console.error);
|