Skip to content

Search papers

This tool is used to search for academic papers on Semantic Scholar.

SearchInput

Bases: BaseModel

Input schema for the search papers tool.

Source code in aiagents4pharma/talk2scholars/tools/s2/search.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
class SearchInput(BaseModel):
    """Input schema for the search papers tool."""

    query: str = Field(
        description="Search query string to find academic papers."
        "Be specific and include relevant academic terms."
    )
    limit: int = Field(
        default=5, description="Maximum number of results to return", ge=1, le=100
    )
    year: Optional[str] = Field(
        default=None,
        description="Year range in format: YYYY for specific year, "
        "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
    )
    tool_call_id: Annotated[str, InjectedToolCallId]

search_tool(query, tool_call_id, limit=5, year=None)

Search for academic papers on Semantic Scholar.

Parameters:

Name Type Description Default
query str

The search query string to find academic papers.

required
tool_call_id Annotated[str, InjectedToolCallId]

The tool call ID.

required
limit int

The maximum number of results to return. Defaults to 2.

5
year str

Year range for papers.

None
Supports formats like "2024-", "-2024", "2024

2025". Defaults to None.

required

Returns:

Type Description
Command[Any]

The number of papers found on Semantic Scholar.

Source code in aiagents4pharma/talk2scholars/tools/s2/search.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@tool("search_tool", args_schema=SearchInput, parse_docstring=True)
def search_tool(
    query: str,
    tool_call_id: Annotated[str, InjectedToolCallId],
    limit: int = 5,
    year: Optional[str] = None,
) -> Command[Any]:
    """
    Search for academic papers on Semantic Scholar.

    Args:
        query (str): The search query string to find academic papers.
        tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
        limit (int, optional): The maximum number of results to return. Defaults to 2.
        year (str, optional): Year range for papers.
        Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.

    Returns:
        The number of papers found on Semantic Scholar.
    """
    logger.info("Searching for papers on %s", query)
    endpoint = cfg.api_endpoint
    params = {
        "query": query,
        "limit": min(limit, 100),
        "fields": ",".join(cfg.api_fields),
    }

    # Add year parameter if provided
    if year:
        params["year"] = year

    response = requests.get(endpoint, params=params, timeout=10)
    data = response.json()
    papers = data.get("data", [])
    logger.info("Received %d papers", len(papers))
    if not papers:
        return Command(
            update={  # Place 'messages' inside 'update'
                "messages": [
                    ToolMessage(
                        content="No papers found. Please try a different search query.",
                        tool_call_id=tool_call_id,
                    )
                ]
            }
        )
    # Create a dictionary to store the papers
    filtered_papers = {
        paper["paperId"]: {
            # "semantic_scholar_id": paper["paperId"],  # Store Semantic Scholar ID
            "Title": paper.get("title", "N/A"),
            "Abstract": paper.get("abstract", "N/A"),
            "Year": paper.get("year", "N/A"),
            "Citation Count": paper.get("citationCount", "N/A"),
            "URL": paper.get("url", "N/A"),
            # "arXiv_ID": paper.get("externalIds", {}).get(
            #     "ArXiv", "N/A"
            # ),  # Extract arXiv ID
        }
        for paper in papers
        if paper.get("title") and paper.get("authors")
    }

    logger.info("Filtered %d papers", len(filtered_papers))

    # Prepare content with top 3 paper titles and years
    top_papers = list(filtered_papers.values())[:3]
    top_papers_info = "\n".join(
        [
            f"{i+1}. {paper['Title']} ({paper['Year']})"
            for i, paper in enumerate(top_papers)
        ]
    )

    content = (
        "Search was successful. Papers are attached as an artifact. "
        "Here is a summary of the search results:\n"
    )
    content += f"Number of papers found: {len(filtered_papers)}\n"
    content += f"Query: {query}\n"
    content += f"Year: {year}\n" if year else ""
    content += "Top papers:\n" + top_papers_info

    return Command(
        update={
            "papers": filtered_papers,  # Now sending the dictionary directly
            "last_displayed_papers": "papers",
            "messages": [
                ToolMessage(
                    content=content,
                    tool_call_id=tool_call_id,
                    artifact=filtered_papers,
                )
            ],
        }
    )