Skip to content

Generate Answer

Generate an answer for a question using retrieved chunks of documents.

_build_context_and_sources(retrieved_chunks)

Build the combined context string and set of paper_ids from retrieved chunks.

Source code in aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def _build_context_and_sources(
    retrieved_chunks: List[Document],
) -> tuple[str, set[str]]:
    """
    Build the combined context string and set of paper_ids from retrieved chunks.
    """
    papers = {}
    for doc in retrieved_chunks:
        pid = doc.metadata.get("paper_id", "unknown")
        papers.setdefault(pid, []).append(doc)
    formatted = []
    idx = 1
    for pid, chunks in papers.items():
        title = chunks[0].metadata.get("title", "Unknown")
        formatted.append(f"[Document {idx}] From: '{title}' (ID: {pid})")
        for chunk in chunks:
            page = chunk.metadata.get("page", "unknown")
            formatted.append(f"Page {page}: {chunk.page_content}")
        idx += 1
    context = "\n\n".join(formatted)
    sources: set[str] = set()
    for doc in retrieved_chunks:
        pid = doc.metadata.get("paper_id")
        if isinstance(pid, str):
            sources.add(pid)
    return context, sources

generate_answer(question, retrieved_chunks, llm_model, config)

Generate an answer for a question using retrieved chunks.

Parameters:

Name Type Description Default
question str

The question to answer

required
retrieved_chunks List[Document]

List of relevant document chunks

required
llm_model BaseChatModel

Language model for generating answers

required
config Any

Configuration for answer generation

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: Dictionary with the answer and metadata

Source code in aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def generate_answer(
    question: str,
    retrieved_chunks: List[Document],
    llm_model: BaseChatModel,
    config: Any,
) -> Dict[str, Any]:
    """
    Generate an answer for a question using retrieved chunks.

    Args:
        question (str): The question to answer
        retrieved_chunks (List[Document]): List of relevant document chunks
        llm_model (BaseChatModel): Language model for generating answers
        config (Any): Configuration for answer generation

    Returns:
        Dict[str, Any]: Dictionary with the answer and metadata
    """
    # Ensure the configuration is provided and has the prompt_template.
    if config is None:
        raise ValueError("Configuration for generate_answer is required.")
    if "prompt_template" not in config:
        raise ValueError("The prompt_template is missing from the configuration.")

    # Build context and sources, then invoke LLM
    context, paper_sources = _build_context_and_sources(retrieved_chunks)
    prompt = config["prompt_template"].format(context=context, question=question)
    response = llm_model.invoke(prompt)

    # Return the response with metadata
    return {
        "output_text": response.content,
        "sources": [doc.metadata for doc in retrieved_chunks],
        "num_sources": len(retrieved_chunks),
        "papers_used": list(paper_sources),
    }

load_hydra_config()

Load the configuration using Hydra and return the configuration for the Q&A tool.

Source code in aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py
48
49
50
51
52
53
54
55
56
57
58
59
def load_hydra_config() -> Any:
    """
    Load the configuration using Hydra and return the configuration for the Q&A tool.
    """
    with hydra.initialize(version_base=None, config_path="../../../configs"):
        cfg = hydra.compose(
            config_name="config",
            overrides=["tools/question_and_answer=default"],
        )
        config = cfg.tools.question_and_answer
        logger.debug("Loaded Question and Answer tool configuration.")
        return config