Skip to content

EnrichmentWithPubChem

Enrichment class for enriching PubChem IDs with their STRINGS representation and descriptions.

EnrichmentWithPubChem

Bases: Enrichments

Enrichment class using PubChem

Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class EnrichmentWithPubChem(Enrichments):
    """
    Enrichment class using PubChem
    """
    def enrich_documents(self, texts: List[str]) -> List[str]:
        """
        Enrich a list of input PubChem IDs with their STRINGS representation.

        Args:
            texts: The list of pubchem IDs to be enriched.

        Returns:
            The list of enriched STRINGS and their descriptions.
        """

        enriched_pubchem_ids_smiles = []
        enriched_pubchem_ids_descriptions = []

        pubchem_cids = texts
        for pubchem_cid in pubchem_cids:
            try:
                c = pcp.Compound.from_cid(pubchem_cid)
            except pcp.BadRequestError:
                enriched_pubchem_ids_smiles.append(None)
                enriched_pubchem_ids_descriptions.append(None)
                continue
            enriched_pubchem_ids_smiles.append(c.isomeric_smiles)
            enriched_pubchem_ids_descriptions.append(pubchem_cid_description(pubchem_cid))

        return enriched_pubchem_ids_descriptions, enriched_pubchem_ids_smiles

    def enrich_documents_with_rag(self, texts, docs):
        """
        Enrich a list of input PubChem IDs with their STRINGS representation.

        Args:
            texts: The list of pubchem IDs to be enriched.
            docs: None

        Returns:
            The list of enriched STRINGS
        """
        return self.enrich_documents(texts)

enrich_documents(texts)

Enrich a list of input PubChem IDs with their STRINGS representation.

Parameters:

Name Type Description Default
texts List[str]

The list of pubchem IDs to be enriched.

required

Returns:

Type Description
List[str]

The list of enriched STRINGS and their descriptions.

Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def enrich_documents(self, texts: List[str]) -> List[str]:
    """
    Enrich a list of input PubChem IDs with their STRINGS representation.

    Args:
        texts: The list of pubchem IDs to be enriched.

    Returns:
        The list of enriched STRINGS and their descriptions.
    """

    enriched_pubchem_ids_smiles = []
    enriched_pubchem_ids_descriptions = []

    pubchem_cids = texts
    for pubchem_cid in pubchem_cids:
        try:
            c = pcp.Compound.from_cid(pubchem_cid)
        except pcp.BadRequestError:
            enriched_pubchem_ids_smiles.append(None)
            enriched_pubchem_ids_descriptions.append(None)
            continue
        enriched_pubchem_ids_smiles.append(c.isomeric_smiles)
        enriched_pubchem_ids_descriptions.append(pubchem_cid_description(pubchem_cid))

    return enriched_pubchem_ids_descriptions, enriched_pubchem_ids_smiles

enrich_documents_with_rag(texts, docs)

Enrich a list of input PubChem IDs with their STRINGS representation.

Parameters:

Name Type Description Default
texts

The list of pubchem IDs to be enriched.

required
docs

None

required

Returns:

Type Description

The list of enriched STRINGS

Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py
43
44
45
46
47
48
49
50
51
52
53
54
def enrich_documents_with_rag(self, texts, docs):
    """
    Enrich a list of input PubChem IDs with their STRINGS representation.

    Args:
        texts: The list of pubchem IDs to be enriched.
        docs: None

    Returns:
        The list of enriched STRINGS
    """
    return self.enrich_documents(texts)