Skip to content

Tool Helper

Helper class for question and answer tool in PDF processing.

QAToolHelper

Encapsulates helper routines for the PDF Question & Answer tool. Enhanced with automatic GPU/CPU detection and optimization.

Source code in aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class QAToolHelper:
    """
    Encapsulates helper routines for the PDF Question & Answer tool.
    Enhanced with automatic GPU/CPU detection and optimization.
    """

    def __init__(self) -> None:
        self.config: Any = None
        self.call_id: str = ""
        self.has_gpu: bool = False  # Track GPU availability
        logger.debug("Initialized QAToolHelper")

    def start_call(self, config: Any, call_id: str) -> None:
        """Initialize helper with current config and call identifier."""
        self.config = config
        self.call_id = call_id
        logger.debug("QAToolHelper started call %s", call_id)

    def get_state_models_and_data(self, state: dict) -> tuple[Any, Any, Dict[str, Any]]:
        """Retrieve embedding model, LLM, and article data from agent state."""
        text_emb = state.get("text_embedding_model")
        if not text_emb:
            msg = "No text embedding model found in state."
            logger.error("%s: %s", self.call_id, msg)
            raise ValueError(msg)
        llm = state.get("llm_model")
        if not llm:
            msg = "No LLM model found in state."
            logger.error("%s: %s", self.call_id, msg)
            raise ValueError(msg)
        articles = state.get("article_data", {})
        if not articles:
            msg = "No article_data found in state."
            logger.error("%s: %s", self.call_id, msg)
            raise ValueError(msg)
        return text_emb, llm, articles

    def init_vector_store(self, emb_model: Any) -> Any:
        """Get the singleton Milvus vector store instance with GPU/CPU optimization."""
        logger.info(
            "%s: Getting singleton vector store instance with hardware optimization",
            self.call_id,
        )
        vs = get_vectorstore(embedding_model=emb_model, config=self.config)

        # Track GPU availability from vector store
        self.has_gpu = getattr(vs, "has_gpu", False)
        hardware_type = "GPU-accelerated" if self.has_gpu else "CPU-only"

        logger.info(
            "%s: Vector store initialized (%s mode)",
            self.call_id,
            hardware_type,
        )

        # Log hardware-specific configuration
        if hasattr(vs, "index_params"):
            index_type = vs.index_params.get("index_type", "Unknown")
            logger.info(
                "%s: Using %s index type for %s processing",
                self.call_id,
                index_type,
                hardware_type,
            )

        return vs

    def get_hardware_stats(self) -> Dict[str, Any]:
        """Get current hardware configuration stats for monitoring."""
        return {
            "gpu_available": self.has_gpu,
            "hardware_mode": "GPU-accelerated" if self.has_gpu else "CPU-only",
            "call_id": self.call_id,
        }

get_hardware_stats()

Get current hardware configuration stats for monitoring.

Source code in aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py
81
82
83
84
85
86
87
def get_hardware_stats(self) -> Dict[str, Any]:
    """Get current hardware configuration stats for monitoring."""
    return {
        "gpu_available": self.has_gpu,
        "hardware_mode": "GPU-accelerated" if self.has_gpu else "CPU-only",
        "call_id": self.call_id,
    }

get_state_models_and_data(state)

Retrieve embedding model, LLM, and article data from agent state.

Source code in aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def get_state_models_and_data(self, state: dict) -> tuple[Any, Any, Dict[str, Any]]:
    """Retrieve embedding model, LLM, and article data from agent state."""
    text_emb = state.get("text_embedding_model")
    if not text_emb:
        msg = "No text embedding model found in state."
        logger.error("%s: %s", self.call_id, msg)
        raise ValueError(msg)
    llm = state.get("llm_model")
    if not llm:
        msg = "No LLM model found in state."
        logger.error("%s: %s", self.call_id, msg)
        raise ValueError(msg)
    articles = state.get("article_data", {})
    if not articles:
        msg = "No article_data found in state."
        logger.error("%s: %s", self.call_id, msg)
        raise ValueError(msg)
    return text_emb, llm, articles

init_vector_store(emb_model)

Get the singleton Milvus vector store instance with GPU/CPU optimization.

Source code in aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def init_vector_store(self, emb_model: Any) -> Any:
    """Get the singleton Milvus vector store instance with GPU/CPU optimization."""
    logger.info(
        "%s: Getting singleton vector store instance with hardware optimization",
        self.call_id,
    )
    vs = get_vectorstore(embedding_model=emb_model, config=self.config)

    # Track GPU availability from vector store
    self.has_gpu = getattr(vs, "has_gpu", False)
    hardware_type = "GPU-accelerated" if self.has_gpu else "CPU-only"

    logger.info(
        "%s: Vector store initialized (%s mode)",
        self.call_id,
        hardware_type,
    )

    # Log hardware-specific configuration
    if hasattr(vs, "index_params"):
        index_type = vs.index_params.get("index_type", "Unknown")
        logger.info(
            "%s: Using %s index type for %s processing",
            self.call_id,
            index_type,
            hardware_type,
        )

    return vs