Files
gemini-skill/src/mcp-server.js

531 lines
17 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { z } from "zod";
import { writeFileSync, mkdirSync } from "node:fs";
import { join } from "node:path";
// 复用已有的统一入口,不修改原有逻辑
import { createGeminiSession, disconnect } from './index.js';
import config from './config.js';
import { sleep } from './util.js';
const server = new McpServer({
name: "gemini-mcp-server",
version: "1.0.0",
});
// 注册工具
server.registerTool(
"gemini_generate_image",
{
description: "调用后台的 Gemini 浏览器会话生成高质量图片",
inputSchema: {
prompt: z.string().describe("图片的详细描述词"),
newSession: z.boolean().default(false).describe(
"是否新建会话。true= 开启全新对话; false = 复用当前已有的 Gemini 会话页"
),
referenceImages: z.array(z.string()).default([]).describe(
"参考图片的本地文件路径数组,例如 [\"/path/to/ref1.png\", \"/path/to/ref2.jpg\"]。图片会在发送 prompt 前上传到 Gemini 输入框"
),
},
},
async ({ prompt, newSession, referenceImages }) => {
try {
const { ops } = await createGeminiSession();
// 前置检查:确保已登录
const loginCheck = await ops.checkLogin();
if (!loginCheck.ok || !loginCheck.loggedIn) {
disconnect();
return {
content: [{ type: "text", text: `Gemini 未登录 Google 账号,请先在浏览器中完成登录后重试` }],
isError: true,
};
}
// 需要先处理新建会话(如果需要),因为 generateImage 内部的 newChat 会在上传之后才执行
if (newSession) {
await ops.click('newChatBtn');
await sleep(250);
}
// 如果有参考图,先上传
if (referenceImages.length > 0) {
for (const imgPath of referenceImages) {
console.error(`[mcp] 正在上传参考图: ${imgPath}`);
const uploadResult = await ops.uploadImage(imgPath);
if (!uploadResult.ok) {
return {
content: [{ type: "text", text: `参考图上传失败: ${imgPath}\n错误: ${uploadResult.error}` }],
isError: true,
};
}
}
console.error(`[mcp] ${referenceImages.length} 张参考图上传完成`);
}
// 如果上传了参考图且已手动新建会话,则 generateImage 内部不再新建
const needNewChat = referenceImages.length > 0 ? false : newSession;
const result = await ops.generateImage(prompt, { newChat: needNewChat });
// 执行完毕立刻断开,交还给 Daemon 倒计时
disconnect();
if (!result.ok) {
return {
content: [{ type: "text", text: `生成失败: ${result.error}` }],
isError: true,
};
}
// 将 base64 写入本地文件
const base64Data = result.dataUrl.split(',')[1];
const mimeMatch = result.dataUrl.match(/^data:(image\/\w+);/);
const ext = mimeMatch ? mimeMatch[1].split('/')[1] : 'png';
mkdirSync(config.outputDir, { recursive: true });
const filename = `gemini_${Date.now()}.${ext}`;
const filePath = join(config.outputDir, filename);
writeFileSync(filePath, Buffer.from(base64Data, 'base64'));
console.error(`[mcp] 图片已保存至 ${filePath}`);
return {
content: [
{ type: "text", text: `图片生成成功!已保存至: ${filePath}` },
{
type: "image",
data: base64Data,
mimeType: mimeMatch ? mimeMatch[1] : "image/png",
},
],
};
} catch (err) {
return {
content: [{ type: "text", text: `执行崩溃: ${err.message}` }],
isError: true,
};
}
}
);
// ─── 会话管理 ───
// 新建会话
server.registerTool(
"gemini_new_chat",
{
description: "在 Gemini 中新建一个空白对话",
inputSchema: {},
},
async () => {
try {
const { ops } = await createGeminiSession();
const result = await ops.click('newChatBtn');
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `新建会话失败: ${result.error}` }], isError: true };
}
return { content: [{ type: "text", text: "已新建 Gemini 会话" }] };
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// 临时会话
server.registerTool(
"gemini_temp_chat",
{
description: "进入 Gemini 临时对话模式(不保留历史记录,适合隐私场景)。注意:临时会话按钮仅在空白新会话页面可见,本工具会自动先新建会话再进入临时模式",
inputSchema: {},
},
async () => {
try {
const { ops } = await createGeminiSession();
// 临时会话按钮仅在空白新会话页可见,当前会话有内容时会被隐藏
// 因此必须先新建会话,确保页面回到空白状态
const newChatResult = await ops.click('newChatBtn');
if (!newChatResult.ok) {
disconnect();
return { content: [{ type: "text", text: `前置步骤失败:无法新建会话(临时会话按钮仅在空白页可见): ${newChatResult.error}` }], isError: true };
}
// 等待新会话页面稳定
await sleep(250);
const result = await ops.clickTempChat();
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `进入临时会话失败: ${result.error}` }], isError: true };
}
return { content: [{ type: "text", text: "已进入临时对话模式(自动先新建了空白会话)" }] };
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 模型切换 ───
server.registerTool(
"gemini_switch_model",
{
description: "切换 Gemini 模型pro / quick / think",
inputSchema: {
model: z.enum(["pro", "quick", "think"]).describe("目标模型pro=高质量, quick=快速, think=深度思考"),
},
},
async ({ model }) => {
try {
const { ops } = await createGeminiSession();
const result = await ops.switchToModel(model);
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `切换模型失败: ${result.error}` }], isError: true };
}
return {
content: [{ type: "text", text: `模型已切换到 ${model}${result.previousModel ? `(之前是 ${result.previousModel}` : ''}` }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 文本对话 ───
server.registerTool(
"gemini_send_message",
{
description: "向 Gemini 发送文本消息并等待回答完成(不提取图片,纯文本交互)",
inputSchema: {
message: z.string().describe("要发送给 Gemini 的文本内容"),
timeout: z.number().default(120000).describe("等待回答完成的超时时间(毫秒),默认 120000"),
},
},
async ({ message, timeout }) => {
try {
const { ops } = await createGeminiSession();
const result = await ops.sendAndWait(message, { timeout });
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `发送失败: ${result.error},耗时 ${result.elapsed}ms` }], isError: true };
}
return {
content: [{ type: "text", text: `消息已发送并等待完成,耗时 ${result.elapsed}ms` }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 图片上传 ───
server.registerTool(
"gemini_upload_images",
{
description: "向 Gemini 当前输入框上传图片(仅上传,不发送消息),可配合 gemini_send_message 组合使用",
inputSchema: {
images: z.array(z.string()).min(1).describe("本地图片文件路径数组"),
},
},
async ({ images }) => {
try {
const { ops } = await createGeminiSession();
const results = [];
for (const imgPath of images) {
console.error(`[mcp] 正在上传: ${imgPath}`);
const r = await ops.uploadImage(imgPath);
results.push({ path: imgPath, ...r });
if (!r.ok) {
disconnect();
return {
content: [{ type: "text", text: `上传失败: ${imgPath}\n错误: ${r.error}\n\n已成功上传 ${results.filter(x => x.ok).length}/${images.length}` }],
isError: true,
};
}
}
disconnect();
return {
content: [{ type: "text", text: `全部 ${images.length} 张图片上传成功` }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 图片获取 ───
server.registerTool(
"gemini_get_images",
{
description: "获取当前 Gemini 会话中所有已加载的图片列表(不下载,仅返回元信息)",
inputSchema: {},
},
async () => {
try {
const { ops } = await createGeminiSession();
const result = await ops.getAllImages();
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `未找到图片: ${result.error}` }], isError: true };
}
return {
content: [{ type: "text", text: JSON.stringify({ total: result.total, newCount: result.newCount, images: result.images }, null, 2) }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
server.registerTool(
"gemini_extract_image",
{
description: "提取指定图片的 base64 数据并保存到本地文件。可从 gemini_get_images 获取图片 src URL",
inputSchema: {
imageUrl: z.string().describe("图片的 src URL从 gemini_get_images 结果中获取)"),
},
},
async ({ imageUrl }) => {
try {
const { ops } = await createGeminiSession();
const result = await ops.extractImageBase64(imageUrl);
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `图片提取失败: ${result.error}${result.detail ? ' — ' + result.detail : ''}` }], isError: true };
}
// 保存到本地
const base64Data = result.dataUrl.split(',')[1];
const mimeMatch = result.dataUrl.match(/^data:(image\/\w+);/);
const ext = mimeMatch ? mimeMatch[1].split('/')[1] : 'png';
mkdirSync(config.outputDir, { recursive: true });
const filename = `gemini_${Date.now()}.${ext}`;
const filePath = join(config.outputDir, filename);
writeFileSync(filePath, Buffer.from(base64Data, 'base64'));
console.error(`[mcp] 图片已保存至 ${filePath}`);
return {
content: [
{ type: "text", text: `图片提取成功,已保存至: ${filePath}` },
{ type: "image", data: base64Data, mimeType: mimeMatch ? mimeMatch[1] : "image/png" },
],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 文字回复获取 ───
server.registerTool(
"gemini_get_all_text_responses",
{
description: "获取当前 Gemini 会话中所有文字回复内容(仅文字,不含图片等其他类型回复)",
inputSchema: {},
},
async () => {
try {
const { ops } = await createGeminiSession();
const result = await ops.getAllTextResponses();
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `未找到回复: ${result.error}` }], isError: true };
}
return {
content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
server.registerTool(
"gemini_get_latest_text_response",
{
description: "获取当前 Gemini 会话中最新一条文字回复(仅文字,不含图片等其他类型回复)",
inputSchema: {},
},
async () => {
try {
const { ops } = await createGeminiSession();
const result = await ops.getLatestTextResponse();
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `未找到回复: ${result.error}` }], isError: true };
}
return {
content: [{ type: "text", text: result.text }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 登录状态检查 ───
server.registerTool(
"gemini_check_login",
{
description: "检查当前 Gemini 页面是否已登录 Google 账号",
inputSchema: {},
},
async () => {
try {
const { ops } = await createGeminiSession();
const result = await ops.checkLogin();
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `检测失败: ${result.error}` }], isError: true };
}
const status = result.loggedIn ? "已登录" : "未登录";
return {
content: [{ type: "text", text: `${status}(导航栏文本: "${result.barText}"` }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 页面状态 & 恢复 ───
server.registerTool(
"gemini_probe",
{
description: "探测 Gemini 页面各元素状态(输入框、按钮、当前模型等),用于调试和排查问题",
inputSchema: {},
},
async () => {
try {
const { ops } = await createGeminiSession();
const result = await ops.probe();
disconnect();
return {
content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
};
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
server.registerTool(
"gemini_reload_page",
{
description: "刷新 Gemini 页面(页面卡住或状态异常时使用)",
inputSchema: {
timeout: z.number().default(30000).describe("等待页面重新加载完成的超时(毫秒),默认 30000"),
},
},
async ({ timeout }) => {
try {
const { ops } = await createGeminiSession();
const result = await ops.reloadPage({ timeout });
disconnect();
if (!result.ok) {
return { content: [{ type: "text", text: `页面刷新失败: ${result.error}` }], isError: true };
}
return { content: [{ type: "text", text: `页面刷新完成,耗时 ${result.elapsed}ms` }] };
} catch (err) {
return { content: [{ type: "text", text: `执行崩溃: ${err.message}` }], isError: true };
}
}
);
// ─── 浏览器信息 ───
// 查询浏览器信息
server.registerTool(
"gemini_browser_info",
{
description: "获取 Gemini 浏览器会话的连接信息CDP 端口、WebSocket 地址、Daemon 状态等),方便外部工具直连浏览器",
inputSchema: {},
},
async () => {
const daemonUrl = `http://127.0.0.1:${config.daemonPort}`;
try {
// 1. 检查 Daemon 健康状态
const healthRes = await fetch(`${daemonUrl}/health`, { signal: AbortSignal.timeout(3000) });
const health = await healthRes.json();
if (!health.ok) {
return {
content: [{ type: "text", text: "Daemon 未就绪,浏览器可能未启动。请先调用 gemini_generate_image 触发自动启动。" }],
isError: true,
};
}
// 2. 获取浏览器连接信息
const acquireRes = await fetch(`${daemonUrl}/browser/acquire`, { signal: AbortSignal.timeout(5000) });
const acquire = await acquireRes.json();
const info = {
daemon: {
url: daemonUrl,
port: config.daemonPort,
status: "running",
},
browser: {
cdpPort: config.browserDebugPort,
wsEndpoint: acquire.wsEndpoint || null,
pid: acquire.pid || null,
headless: config.browserHeadless,
},
config: {
protocolTimeout: config.browserProtocolTimeout,
outputDir: config.outputDir,
daemonTTL: config.daemonTTL,
},
};
return {
content: [{ type: "text", text: JSON.stringify(info, null, 2) }],
};
} catch (err) {
return {
content: [{
type: "text",
text: `无法连接 Daemon (${daemonUrl}),浏览器可能未启动。\n错误: ${err.message}\n\n提示: 请先调用 gemini_generate_image 触发自动启动,或手动运行 npm run daemon`,
}],
isError: true,
};
}
}
);
// 启动标准输入输出通信
async function run() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error("Gemini MCP Server running on stdio"); // 必须用 console.error避免污染 stdio
}
run().catch(console.error);