@@ -6,6 +6,7 @@ import axios from 'axios';
66import { tools as browser_tools } from './browser_tools.js' ;
77import prompts from './prompts.js' ;
88import { GROUPS } from './tool_groups.js' ;
9+ import { parse_google_search_response } from './search_utils.js' ;
910import { createRequire } from 'node:module' ;
1011import { remark } from 'remark' ;
1112import strip from 'strip-markdown' ;
@@ -198,7 +199,7 @@ const addTool = (tool) => {
198199addTool ( {
199200 name : 'search_engine' ,
200201 description : 'Scrape search results from Google, Bing or Yandex. Returns '
201- + 'SERP results in JSON or Markdown (URL, title, description), Ideal for'
202+ + 'SERP results in JSON or Markdown (URL, title, description),Ideal for'
202203 + 'gathering current information, news, and detailed search results.' ,
203204 annotations : {
204205 title : 'Search Engine' ,
@@ -238,15 +239,8 @@ addTool({
238239 } ) ;
239240 if ( ! is_google )
240241 return response . data ;
241- try {
242- const search_data = JSON . parse ( response . data ) ;
243- return JSON . stringify (
244- clean_google_search_payload ( search_data ) , null , 2 ) ;
245- } catch ( e ) {
246- return JSON . stringify ( {
247- organic : [ ]
248- } , null , 2 ) ;
249- }
242+ return JSON . stringify ( parse_google_search_response ( response . data ,
243+ 'search_engine' ) , null , 2 ) ;
250244 } ) ,
251245} ) ;
252246
@@ -310,48 +304,51 @@ addTool({
310304 execute : tool_fn ( 'search_engine_batch' , async ( { queries} , ctx ) => {
311305 const search_promises = queries . map ( ( { query, engine, cursor,
312306 geo_location} ) => {
313- const is_google = ( engine || 'google' ) === 'google' ;
314- const url = search_url ( engine || 'google' , query , cursor ,
307+ const normalized_engine = engine || 'google' ;
308+ const is_google = normalized_engine === 'google' ;
309+ const url = search_url ( normalized_engine , query , cursor ,
315310 geo_location ) ;
316-
317- return base_request ( {
318- url : 'https://api.brightdata.com/request' ,
319- method : 'POST' ,
320- data : {
321- url : is_google ? `${ url } &brd_json=1` : url ,
322- zone : unlocker_zone ,
323- format : 'raw' ,
324- data_format : is_google ? 'parsed_light' : 'markdown' ,
325- } ,
326- headers : api_headers ( ctx . clientName , 'search_engine_batch' ) ,
327- responseType : 'text' ,
328- } ) . then ( response => {
329- if ( is_google )
330- {
331- try {
332- const search_data = JSON . parse ( response . data ) ;
333- return {
334- query,
335- engine : engine || 'google' ,
336- result : clean_google_search_payload ( search_data ) ,
337- } ;
338- } catch ( e ) {
311+ return ( async ( ) => {
312+ try {
313+ const response = await base_request ( {
314+ url : 'https://api.brightdata.com/request' ,
315+ method : 'POST' ,
316+ data : {
317+ url : is_google ? `${ url } &brd_json=1` : url ,
318+ zone : unlocker_zone ,
319+ format : 'raw' ,
320+ data_format : is_google ? 'parsed_light'
321+ : 'markdown' ,
322+ } ,
323+ headers : api_headers ( ctx . clientName ,
324+ 'search_engine_batch' ) ,
325+ responseType : 'text' ,
326+ } ) ;
327+ if ( is_google )
328+ {
339329 return {
340330 query,
341- engine : engine || 'google' ,
342- result : clean_google_search_payload ( null ) ,
331+ engine : normalized_engine ,
332+ result : parse_google_search_response ( response . data ,
333+ 'search_engine_batch' ) ,
343334 } ;
344335 }
336+ return {
337+ query,
338+ engine : normalized_engine ,
339+ result : response . data ,
340+ } ;
341+ } catch ( e ) {
342+ return {
343+ query,
344+ engine : normalized_engine ,
345+ error : e instanceof Error ? e . message : String ( e ) ,
346+ } ;
345347 }
346- return {
347- query,
348- engine : engine || 'google' ,
349- result : response . data
350- } ;
351- } ) ;
348+ } ) ( ) ;
352349 } ) ;
353350
354- const results = await Promise . allSettled ( search_promises ) ;
351+ const results = await Promise . all ( search_promises ) ;
355352 return JSON . stringify ( results , null , 2 ) ;
356353 } ) ,
357354} ) ;
@@ -1256,28 +1253,6 @@ function tool_fn(name, fn){
12561253 } ;
12571254}
12581255
1259- function clean_google_search_payload ( raw_data ) {
1260- const data = raw_data && typeof raw_data == 'object' ? raw_data : { } ;
1261- const organic = Array . isArray ( data . organic ) ? data . organic : [ ] ;
1262-
1263- const organic_clean = organic
1264- . map ( entry => {
1265- if ( ! entry || typeof entry != 'object' )
1266- return null ;
1267- const link = typeof entry . link == 'string' ? entry . link . trim ( ) : '' ;
1268- const title = typeof entry . title == 'string'
1269- ? entry . title . trim ( ) : '' ;
1270- const description = typeof entry . description == 'string'
1271- ? entry . description . trim ( ) : '' ;
1272- if ( ! link || ! title )
1273- return null ;
1274- return { link, title, description} ;
1275- } )
1276- . filter ( Boolean ) ;
1277-
1278- return { organic : organic_clean } ;
1279- }
1280-
12811256function search_url ( engine , query , cursor , geo_location ) {
12821257 let q = encodeURIComponent ( query ) ;
12831258 let page = cursor ? parseInt ( cursor ) : 0 ;
0 commit comments