Skip to content

Search studies

A tool to fetch studies from the Single Cell Portal.

search_studies(search_term, tool_call_id, limit=5)

Fetch studies from single cell portal

Parameters:

Name Type Description Default
search_term str

The search term to use. Example: "COVID-19", "cancer", etc.

required
limit int

The number of papers to return. Default is 5.

5
Source code in aiagents4pharma/talk2cells/tools/scp_agent/search_studies.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
@tool('search_studies')
def search_studies(search_term: str,
                   tool_call_id: Annotated[str, InjectedToolCallId],
                   limit: int = 5):
    """
    Fetch studies from single cell portal

    Args:
        search_term (str): The search term to use. Example: "COVID-19", "cancer", etc.
        limit (int): The number of papers to return. Default is 5.

    """
    logger.log(logging.INFO, "Calling the tool search_studies")
    scp_endpoint = 'https://singlecell.broadinstitute.org/single_cell/api/v1/search?type=study'
    # params = {'terms': search_term, 'facets': 'MONDO_0005011'}
    params = {'terms': search_term}
    status_code = 0
    while status_code != 200:
        # Make a GET request to the single cell portal
        search_response = requests.get(scp_endpoint,
                                       params=params,
                                       timeout=10,
                                       verify=False)
        status_code = search_response.status_code
        logger.log(logging.INFO, "Status code %s received from SCP")

    # Select the columns to display in the table
    selected_columns = ["study_source", "name", "study_url", "gene_count", "cell_count"]

    # Extract the data from the response
    # with the selected columns
    df = pd.DataFrame(search_response.json()['studies'])[selected_columns]

    # Convert column 'Study Name' into clickable
    # hyperlinks from the column 'Study URL'
    scp_api_url = 'https://singlecell.broadinstitute.org'
    df['name'] = df.apply(
            lambda x: f"<a href=\"{scp_api_url}/{x['study_url']}\">{x['name']}</a>",
            axis=1)

    # Excldue the column 'Study URL' from the dataframe
    df = df.drop(columns=['study_url'])

    # Add a new column a the beginning of the dataframe with row numbers
    df.insert(0, 'S/N', range(1, 1 + len(df)))

    # Update the state key 'search_table' with the dataframe in markdown format
    return Command(
        update={
            # update the state keys
            "search_table": df.to_markdown(tablefmt="grid"),
            # update the message history
            "messages": [
                ToolMessage(
                    f"Successfully fetched {limit} studies on {search_term}.",
                    tool_call_id=tool_call_id
                )
            ],
        }
    )