Skip to content

Search papers

This tool is used to search for academic papers on Semantic Scholar.

SearchInput

Bases: BaseModel

Input schema for the search papers tool.

Source code in aiagents4pharma/talk2competitors/tools/s2/search.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class SearchInput(BaseModel):
    """Input schema for the search papers tool."""

    query: str = Field(
        description="Search query string to find academic papers."
        "Be specific and include relevant academic terms."
    )
    limit: int = Field(
        default=2, description="Maximum number of results to return", ge=1, le=100
    )
    year: Optional[str] = Field(
        default=None,
        description="Year range in format: YYYY for specific year, "
        "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
    )
    tool_call_id: Annotated[str, InjectedToolCallId]

search_tool(query, tool_call_id, limit=2, year=None)

Search for academic papers on Semantic Scholar.

Parameters:

Name Type Description Default
query str

The search query string to find academic papers.

required
tool_call_id Annotated[str, InjectedToolCallId]

The tool call ID.

required
limit int

The maximum number of results to return. Defaults to 2.

2
year str

Year range for papers.

None
Supports formats like "2024-", "-2024", "2024

2025". Defaults to None.

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: The search results and related information.

Source code in aiagents4pharma/talk2competitors/tools/s2/search.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
@tool(args_schema=SearchInput)
def search_tool(
    query: str,
    tool_call_id: Annotated[str, InjectedToolCallId],
    limit: int = 2,
    year: Optional[str] = None,
) -> Dict[str, Any]:
    """
    Search for academic papers on Semantic Scholar.

    Args:
        query (str): The search query string to find academic papers.
        tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
        limit (int, optional): The maximum number of results to return. Defaults to 2.
        year (str, optional): Year range for papers.
        Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.

    Returns:
        Dict[str, Any]: The search results and related information.
    """
    print("Starting paper search...")
    endpoint = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": min(limit, 100),
        # "fields": "paperId,title,abstract,year,authors,
        # citationCount,url,publicationTypes,openAccessPdf",
        "fields": "paperId,title,abstract,year,authors,citationCount,url",
    }

    # Add year parameter if provided
    if year:
        params["year"] = year

    response = requests.get(endpoint, params=params, timeout=10)
    data = response.json()
    papers = data.get("data", [])

    # Create a dictionary to store the papers
    filtered_papers = {
        paper["paperId"]: {
            "Title": paper.get("title", "N/A"),
            "Abstract": paper.get("abstract", "N/A"),
            "Year": paper.get("year", "N/A"),
            "Citation Count": paper.get("citationCount", "N/A"),
            "URL": paper.get("url", "N/A"),
            # "Publication Type": paper.get("publicationTypes", ["N/A"])[0]
            # if paper.get("publicationTypes")
            # else "N/A",
            # "Open Access PDF": paper.get("openAccessPdf", {}).get("url", "N/A")
            # if paper.get("openAccessPdf") is not None
            # else "N/A",
        }
        for paper in papers
        if paper.get("title") and paper.get("authors")
    }

    df = pd.DataFrame(filtered_papers)

    # Format papers for state update
    papers = [
        f"Paper ID: {paper_id}\n"
        f"Title: {paper_data['Title']}\n"
        f"Abstract: {paper_data['Abstract']}\n"
        f"Year: {paper_data['Year']}\n"
        f"Citations: {paper_data['Citation Count']}\n"
        f"URL: {paper_data['URL']}\n"
        # f"Publication Type: {paper_data['Publication Type']}\n"
        # f"Open Access PDF: {paper_data['Open Access PDF']}"
        for paper_id, paper_data in filtered_papers.items()
    ]

    markdown_table = df.to_markdown(tablefmt="grid")
    logging.info("Search results: %s", papers)

    return Command(
        update={
            "papers": filtered_papers,  # Now sending the dictionary directly
            "messages": [
                ToolMessage(content=markdown_table, tool_call_id=tool_call_id)
            ],
        }
    )