Skip to content

Commit 5eca9b9

Browse files
authored
Merge pull request #134 from brightdata/pr-133-convention-fixes
Surface Google SERP parse errors (supersedes #133)
2 parents 5e08015 + 43eff16 commit 5eca9b9

5 files changed

Lines changed: 123 additions & 69 deletions

File tree

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@brightdata/mcp",
3-
"version": "2.9.4",
3+
"version": "2.9.5",
44
"description": "An MCP interface into the Bright Data toolset",
55
"type": "module",
66
"main": "./server.js",
@@ -38,6 +38,7 @@
3838
},
3939
"files": [
4040
"server.js",
41+
"search_utils.js",
4142
"browser_tools.js",
4243
"browser_session.js",
4344
"aria_snapshot_filter.js",

search_utils.js

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
'use strict'; /*jslint node:true es9:true*/
2+
3+
function truncate_response(response_text, max_length = 300){
4+
if (typeof response_text != 'string')
5+
return '';
6+
const trimmed = response_text.trim();
7+
if (trimmed.length <= max_length)
8+
return trimmed;
9+
return `${trimmed.slice(0, max_length)}...`;
10+
}
11+
12+
export function clean_google_search_payload(raw_data){
13+
const data = raw_data && typeof raw_data=='object' ? raw_data : {};
14+
const organic = Array.isArray(data.organic) ? data.organic : [];
15+
const organic_clean = organic
16+
.map(entry=>{
17+
if (!entry || typeof entry!='object')
18+
return null;
19+
const link = typeof entry.link=='string' ? entry.link.trim() : '';
20+
const title = typeof entry.title=='string'
21+
? entry.title.trim() : '';
22+
const description = typeof entry.description=='string'
23+
? entry.description.trim() : '';
24+
if (!link || !title)
25+
return null;
26+
return {link, title, description};
27+
})
28+
.filter(Boolean);
29+
return {organic: organic_clean};
30+
}
31+
32+
export function parse_google_search_response(response_text, tool_name){
33+
try {
34+
return clean_google_search_payload(JSON.parse(response_text));
35+
} catch(e){
36+
const snippet = truncate_response(response_text);
37+
const details = snippet ? ` Response snippet: ${snippet}` : '';
38+
throw new Error(`Unexpected non-JSON response from Bright Data`
39+
+` for ${tool_name}.${details}`, {cause: e});
40+
}
41+
}

server.js

Lines changed: 41 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import axios from 'axios';
66
import {tools as browser_tools} from './browser_tools.js';
77
import prompts from './prompts.js';
88
import {GROUPS} from './tool_groups.js';
9+
import {parse_google_search_response} from './search_utils.js';
910
import {createRequire} from 'node:module';
1011
import {remark} from 'remark';
1112
import strip from 'strip-markdown';
@@ -198,7 +199,7 @@ const addTool = (tool) => {
198199
addTool({
199200
name: 'search_engine',
200201
description: 'Scrape search results from Google, Bing or Yandex. Returns '
201-
+'SERP results in JSON or Markdown (URL, title, description), Ideal for'
202+
+'SERP results in JSON or Markdown (URL, title, description),Ideal for'
202203
+'gathering current information, news, and detailed search results.',
203204
annotations: {
204205
title: 'Search Engine',
@@ -238,15 +239,8 @@ addTool({
238239
});
239240
if (!is_google)
240241
return response.data;
241-
try {
242-
const search_data = JSON.parse(response.data);
243-
return JSON.stringify(
244-
clean_google_search_payload(search_data), null, 2);
245-
} catch(e){
246-
return JSON.stringify({
247-
organic: []
248-
}, null, 2);
249-
}
242+
return JSON.stringify(parse_google_search_response(response.data,
243+
'search_engine'), null, 2);
250244
}),
251245
});
252246

@@ -310,48 +304,51 @@ addTool({
310304
execute: tool_fn('search_engine_batch', async({queries}, ctx)=>{
311305
const search_promises = queries.map(({query, engine, cursor,
312306
geo_location})=>{
313-
const is_google = (engine || 'google') === 'google';
314-
const url = search_url(engine || 'google', query, cursor,
307+
const normalized_engine = engine || 'google';
308+
const is_google = normalized_engine === 'google';
309+
const url = search_url(normalized_engine, query, cursor,
315310
geo_location);
316-
317-
return base_request({
318-
url: 'https://api.brightdata.com/request',
319-
method: 'POST',
320-
data: {
321-
url: is_google ? `${url}&brd_json=1` : url,
322-
zone: unlocker_zone,
323-
format: 'raw',
324-
data_format: is_google ? 'parsed_light' : 'markdown',
325-
},
326-
headers: api_headers(ctx.clientName, 'search_engine_batch'),
327-
responseType: 'text',
328-
}).then(response=>{
329-
if (is_google)
330-
{
331-
try {
332-
const search_data = JSON.parse(response.data);
333-
return {
334-
query,
335-
engine: engine || 'google',
336-
result: clean_google_search_payload(search_data),
337-
};
338-
} catch(e){
311+
return (async()=>{
312+
try {
313+
const response = await base_request({
314+
url: 'https://api.brightdata.com/request',
315+
method: 'POST',
316+
data: {
317+
url: is_google ? `${url}&brd_json=1` : url,
318+
zone: unlocker_zone,
319+
format: 'raw',
320+
data_format: is_google ? 'parsed_light'
321+
: 'markdown',
322+
},
323+
headers: api_headers(ctx.clientName,
324+
'search_engine_batch'),
325+
responseType: 'text',
326+
});
327+
if (is_google)
328+
{
339329
return {
340330
query,
341-
engine: engine || 'google',
342-
result: clean_google_search_payload(null),
331+
engine: normalized_engine,
332+
result: parse_google_search_response(response.data,
333+
'search_engine_batch'),
343334
};
344335
}
336+
return {
337+
query,
338+
engine: normalized_engine,
339+
result: response.data,
340+
};
341+
} catch(e){
342+
return {
343+
query,
344+
engine: normalized_engine,
345+
error: e instanceof Error ? e.message : String(e),
346+
};
345347
}
346-
return {
347-
query,
348-
engine: engine || 'google',
349-
result: response.data
350-
};
351-
});
348+
})();
352349
});
353350

354-
const results = await Promise.allSettled(search_promises);
351+
const results = await Promise.all(search_promises);
355352
return JSON.stringify(results, null, 2);
356353
}),
357354
});
@@ -1256,28 +1253,6 @@ function tool_fn(name, fn){
12561253
};
12571254
}
12581255

1259-
function clean_google_search_payload(raw_data){
1260-
const data = raw_data && typeof raw_data=='object' ? raw_data : {};
1261-
const organic = Array.isArray(data.organic) ? data.organic : [];
1262-
1263-
const organic_clean = organic
1264-
.map(entry=>{
1265-
if (!entry || typeof entry!='object')
1266-
return null;
1267-
const link = typeof entry.link=='string' ? entry.link.trim() : '';
1268-
const title = typeof entry.title=='string'
1269-
? entry.title.trim() : '';
1270-
const description = typeof entry.description=='string'
1271-
? entry.description.trim() : '';
1272-
if (!link || !title)
1273-
return null;
1274-
return {link, title, description};
1275-
})
1276-
.filter(Boolean);
1277-
1278-
return {organic: organic_clean};
1279-
}
1280-
12811256
function search_url(engine, query, cursor, geo_location){
12821257
let q = encodeURIComponent(query);
12831258
let page = cursor ? parseInt(cursor) : 0;

test/search-utils.test.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
'use strict'; /*jslint node:true es9:true*/
2+
import test from 'node:test';
3+
import assert from 'node:assert/strict';
4+
import {clean_google_search_payload, parse_google_search_response}
5+
from '../search_utils.js';
6+
7+
test('clean_google_search_payload keeps valid organic results', ()=>{
8+
const payload = clean_google_search_payload({
9+
organic: [
10+
{
11+
link: ' https://example.com ',
12+
title: ' Example ',
13+
description: ' Sample ',
14+
},
15+
{
16+
link: '',
17+
title: 'Missing link',
18+
description: 'Ignored',
19+
},
20+
],
21+
});
22+
23+
assert.deepEqual(payload, {
24+
organic: [{
25+
link: 'https://example.com',
26+
title: 'Example',
27+
description: 'Sample',
28+
}],
29+
});
30+
});
31+
32+
test('parse_google_search_response throws on invalid JSON body', ()=>{
33+
assert.throws(
34+
()=>parse_google_search_response('<html>blocked</html>',
35+
'search_engine'),
36+
/Unexpected non-JSON response from Bright Data for search_engine\./);
37+
});

0 commit comments

Comments
 (0)