Skip to content

Commit 0dc796c

Browse files
authored
feat: Add scraping tool option (#13)
1 parent b77d5b1 commit 0dc796c

File tree

1 file changed

+10
-56
lines changed

1 file changed

+10
-56
lines changed

src/server.ts

+10-56
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
88
import type { Transport } from '@modelcontextprotocol/sdk/shared/transport.js';
99
import {
1010
CallToolRequestSchema,
11-
GetPromptRequestSchema,
12-
ListPromptsRequestSchema,
1311
ListToolsRequestSchema,
1412
} from '@modelcontextprotocol/sdk/types.js';
1513
import dotenv from 'dotenv';
@@ -32,28 +30,13 @@ const WebBrowserArgsSchema = z.object({
3230
.describe(
3331
'The maximum number of top organic Google Search results whose web pages will be extracted',
3432
),
33+
scrapingTool: z.enum(['browser-playwright', 'raw-http'])
34+
.describe('Select a scraping tool for extracting the target web pages. '
35+
+ 'The Browser tool is more powerful and can handle JavaScript heavy websites, while the '
36+
+ 'Plain HTML tool can not handle JavaScript but is about two times faster.')
37+
.default('raw-http'),
3538
});
3639

37-
const PROMPTS = [
38-
{
39-
name: TOOL_SEARCH,
40-
description: 'Search phrase or a URL at Google and return crawled web pages as text or Markdown',
41-
arguments: [
42-
{
43-
name: 'query',
44-
description: 'Google Search keywords or a URL of a specific web page',
45-
required: true,
46-
},
47-
{
48-
name: 'maxResults',
49-
description: 'The maximum number of top organic Google Search results whose web pages'
50-
+ ' will be extracted (default: 1)',
51-
required: false,
52-
},
53-
],
54-
},
55-
];
56-
5740
/**
5841
* Create an MCP server with a tool to call RAG Web Browser Actor
5942
*/
@@ -74,11 +57,10 @@ export class RagWebBrowserServer {
7457
},
7558
);
7659
this.setupErrorHandling();
77-
this.setupPromptHandlers();
7860
this.setupToolHandlers();
7961
}
8062

81-
private async callRagWebBrowser(query: string, maxResults: number): Promise<string> {
63+
private async callRagWebBrowser(query: string, maxResults: number, scrapingTool: string): Promise<string> {
8264
if (!APIFY_API_TOKEN) {
8365
throw new Error('APIFY_API_TOKEN is required but not set. '
8466
+ 'Please set it in your environment variables or pass it as a command-line argument.');
@@ -87,6 +69,7 @@ export class RagWebBrowserServer {
8769
const queryParams = new URLSearchParams({
8870
query,
8971
maxResults: maxResults.toString(),
72+
scrapingTool,
9073
});
9174
const url = `${ACTOR_BASE_URL}?${queryParams.toString()}`;
9275
const response = await fetch(url, {
@@ -114,43 +97,14 @@ export class RagWebBrowserServer {
11497
});
11598
}
11699

117-
private setupPromptHandlers(): void {
118-
this.server.setRequestHandler(ListPromptsRequestSchema, async () => {
119-
return {
120-
prompts: PROMPTS,
121-
};
122-
});
123-
124-
this.server.setRequestHandler(GetPromptRequestSchema, async (request) => {
125-
const { name, arguments: args } = request.params;
126-
switch (name) {
127-
case TOOL_SEARCH: {
128-
const parsed = WebBrowserArgsSchema.parse(args);
129-
const content = await this.callRagWebBrowser(parsed.query, parsed.maxResults);
130-
return {
131-
description: `Markdown content for search query: ${parsed.query}`,
132-
messages: [
133-
{
134-
role: 'user',
135-
content: { type: 'text', text: content },
136-
},
137-
],
138-
};
139-
}
140-
default: {
141-
throw new Error(`Unknown prompt: ${name}`);
142-
}
143-
}
144-
});
145-
}
146-
147100
private setupToolHandlers(): void {
148101
this.server.setRequestHandler(ListToolsRequestSchema, async () => {
149102
return {
150103
tools: [
151104
{
152105
name: TOOL_SEARCH,
153-
description: 'Search phrase or a URL at Google and return crawled web pages as text or Markdown',
106+
description: 'Search phrase or a URL at Google and return crawled web pages as text or Markdown. '
107+
+ 'Prefer HTTP client for speed and browser-playwright for reability.',
154108
inputSchema: zodToJsonSchema(WebBrowserArgsSchema),
155109
},
156110
],
@@ -161,7 +115,7 @@ export class RagWebBrowserServer {
161115
switch (name) {
162116
case TOOL_SEARCH: {
163117
const parsed = WebBrowserArgsSchema.parse(args);
164-
const content = await this.callRagWebBrowser(parsed.query, parsed.maxResults);
118+
const content = await this.callRagWebBrowser(parsed.query, parsed.maxResults, parsed.scrapingTool);
165119
return {
166120
content: [{ type: 'text', text: content }],
167121
};

0 commit comments

Comments
 (0)