Skip to content

SearchToolkit

SearchToolkit

Bases: AsyncBaseToolkit

Search Toolkit

NOTE
  • Please configure the required env variables! See configs/agents/tools/search.yaml

Methods:

Name Description
- search

str, num_results: int = 5)

- web_qa

str, query: str)

Source code in utu/tools/search_toolkit.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class SearchToolkit(AsyncBaseToolkit):
    """Search Toolkit

    NOTE:
        - Please configure the required env variables! See `configs/agents/tools/search.yaml`

    Methods:
        - search(query: str, num_results: int = 5)
        - web_qa(url: str, query: str)
    """

    def __init__(self, config: ToolkitConfig = None):
        super().__init__(config)
        search_engine = self.config.config.get("search_engine", "google")
        match search_engine:
            case "google":
                from .search.google_search import GoogleSearch

                self.search_engine = GoogleSearch(self.config.config)
            case "jina":
                from .search.jina_search import JinaSearch

                self.search_engine = JinaSearch(self.config.config)
            case "baidu":
                from .search.baidu_search import BaiduSearch

                self.search_engine = BaiduSearch(self.config.config)
            case "duckduckgo":
                from .search.duckduckgo_search import DuckDuckGoSearch

                self.search_engine = DuckDuckGoSearch(self.config.config)
            case _:
                raise ValueError(f"Unsupported search engine: {search_engine}")
        crawl_engine = self.config.config.get("crawl_engine", "jina")
        match crawl_engine:
            case "jina":
                from .search.jina_crawl import JinaCrawl

                self.crawl_engine = JinaCrawl(self.config.config)
            case "crawl4ai":
                from .search.crawl4ai_crawl import Crawl4aiCrawl

                self.crawl_engine = Crawl4aiCrawl(self.config.config)
            case _:
                raise ValueError(f"Unsupported crawl engine: {crawl_engine}")
        # llm for web_qa
        self.llm = SimplifiedAsyncOpenAI(
            **self.config.config_llm.model_provider.model_dump() if self.config.config_llm else {}
        )
        self.summary_token_limit = self.config.config.get("summary_token_limit", 1_000)

    async def search(self, query: str, num_results: int = 5) -> dict:
        """web search to gather information from the web.

        Tips:
        1. search query should be concrete and not vague or super long
        2. try to add Google search operators in query if necessary,
        - " " for exact match;
        - -xxx for exclude;
        - * wildcard matching;
        - filetype:xxx for file types;
        - site:xxx for site search;
        - before:YYYY-MM-DD, after:YYYY-MM-DD for time range.

        Args:
            query (str): The query to search for.
            num_results (int, optional): The number of results to return. Defaults to 5.
        """
        # https://serper.dev/playground
        logger.info(f"[tool] search: {oneline_object(query)}")
        res = await self.search_engine.search(query, num_results)
        logger.info(oneline_object(res))
        return res

    async def web_qa(self, url: str, query: str) -> str:
        """Ask question to a webpage, you will get the answer and related links from the specified url.

        Tips:
        - Use cases: gather information from a webpage, ask detailed questions.

        Args:
            url (str): The url to ask question to.
            query (str): The question to ask. Should be clear, concise, and specific.
        """
        logger.info(f"[tool] web_qa: {oneline_object({url, query})}")
        content = await self.crawl_engine.crawl(url)
        query = (
            query or "Summarize the content of this webpage, in the same language as the webpage."
        )  # use the same language
        res_summary, res_links = await asyncio.gather(
            self._qa(content, query), self._extract_links(url, content, query)
        )
        result = f"Summary: {res_summary}\n\nRelated Links: {res_links}"
        return result

    async def _qa(self, content: str, query: str) -> str:
        template = TOOL_PROMPTS["search_qa"].format(content=content, query=query)
        return await self.llm.query_one(
            messages=[{"role": "user", "content": template}], **self.config.config_llm.model_params.model_dump()
        )

    async def _extract_links(self, url: str, content: str, query: str) -> str:
        template = TOOL_PROMPTS["search_related"].format(url=url, content=content, query=query)
        return await self.llm.query_one(
            messages=[{"role": "user", "content": template}], **self.config.config_llm.model_params.model_dump()
        )

    async def get_tools_map(self) -> dict[str, Callable]:
        return {
            "search": self.search,
            "web_qa": self.web_qa,
        }

search async

search(query: str, num_results: int = 5) -> dict

web search to gather information from the web.

Tips: 1. search query should be concrete and not vague or super long 2. try to add Google search operators in query if necessary, - " " for exact match; - -xxx for exclude; - * wildcard matching; - filetype:xxx for file types; - site:xxx for site search; - before:YYYY-MM-DD, after:YYYY-MM-DD for time range.

Parameters:

Name Type Description Default
query str

The query to search for.

required
num_results int

The number of results to return. Defaults to 5.

5
Source code in utu/tools/search_toolkit.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
async def search(self, query: str, num_results: int = 5) -> dict:
    """web search to gather information from the web.

    Tips:
    1. search query should be concrete and not vague or super long
    2. try to add Google search operators in query if necessary,
    - " " for exact match;
    - -xxx for exclude;
    - * wildcard matching;
    - filetype:xxx for file types;
    - site:xxx for site search;
    - before:YYYY-MM-DD, after:YYYY-MM-DD for time range.

    Args:
        query (str): The query to search for.
        num_results (int, optional): The number of results to return. Defaults to 5.
    """
    # https://serper.dev/playground
    logger.info(f"[tool] search: {oneline_object(query)}")
    res = await self.search_engine.search(query, num_results)
    logger.info(oneline_object(res))
    return res

web_qa async

web_qa(url: str, query: str) -> str

Ask question to a webpage, you will get the answer and related links from the specified url.

Tips: - Use cases: gather information from a webpage, ask detailed questions.

Parameters:

Name Type Description Default
url str

The url to ask question to.

required
query str

The question to ask. Should be clear, concise, and specific.

required
Source code in utu/tools/search_toolkit.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
async def web_qa(self, url: str, query: str) -> str:
    """Ask question to a webpage, you will get the answer and related links from the specified url.

    Tips:
    - Use cases: gather information from a webpage, ask detailed questions.

    Args:
        url (str): The url to ask question to.
        query (str): The question to ask. Should be clear, concise, and specific.
    """
    logger.info(f"[tool] web_qa: {oneline_object({url, query})}")
    content = await self.crawl_engine.crawl(url)
    query = (
        query or "Summarize the content of this webpage, in the same language as the webpage."
    )  # use the same language
    res_summary, res_links = await asyncio.gather(
        self._qa(content, query), self._extract_links(url, content, query)
    )
    result = f"Summary: {res_summary}\n\nRelated Links: {res_links}"
    return result

get_tools_map_func async

get_tools_map_func() -> dict[str, Callable]

Get tools map. It will filter tools by config.activated_tools if it is not None.

Source code in utu/tools/base.py
58
59
60
61
62
63
64
65
66
67
68
69
async def get_tools_map_func(self) -> dict[str, Callable]:
    """Get tools map. It will filter tools by config.activated_tools if it is not None."""
    if self.tools_map is None:
        self.tools_map = await self.get_tools_map()
    if self.config.activated_tools:
        assert all(tool_name in self.tools_map for tool_name in self.config.activated_tools), (
            f"Error config activated tools: {self.config.activated_tools}! available tools: {self.tools_map.keys()}"
        )
        tools_map = {tool_name: self.tools_map[tool_name] for tool_name in self.config.activated_tools}
    else:
        tools_map = self.tools_map
    return tools_map

get_tools_in_agents async

get_tools_in_agents() -> list[FunctionTool]

Get tools in openai-agents format.

Source code in utu/tools/base.py
71
72
73
74
75
76
77
78
79
80
81
82
async def get_tools_in_agents(self) -> list[FunctionTool]:
    """Get tools in openai-agents format."""
    tools_map = await self.get_tools_map_func()
    tools = []
    for _, tool in tools_map.items():
        tools.append(
            function_tool(
                tool,
                strict_mode=False,  # turn off strict mode
            )
        )
    return tools

get_tools_in_openai async

get_tools_in_openai() -> list[dict]

Get tools in OpenAI format.

Source code in utu/tools/base.py
84
85
86
87
async def get_tools_in_openai(self) -> list[dict]:
    """Get tools in OpenAI format."""
    tools = await self.get_tools_in_agents()
    return [ChatCompletionConverter.tool_to_openai(tool) for tool in tools]

get_tools_in_mcp async

get_tools_in_mcp() -> list[Tool]

Get tools in MCP format.

Source code in utu/tools/base.py
89
90
91
92
async def get_tools_in_mcp(self) -> list[types.Tool]:
    """Get tools in MCP format."""
    tools = await self.get_tools_in_agents()
    return [MCPConverter.function_tool_to_mcp(tool) for tool in tools]

call_tool async

call_tool(name: str, arguments: dict) -> str

Call a tool by its name.

Source code in utu/tools/base.py
 94
 95
 96
 97
 98
 99
100
async def call_tool(self, name: str, arguments: dict) -> str:
    """Call a tool by its name."""
    tools_map = await self.get_tools_map_func()
    if name not in tools_map:
        raise ValueError(f"Tool {name} not found")
    tool = tools_map[name]
    return await tool(**arguments)