Enrichment class for enriching PubChem IDs with their STRINGS representation and descriptions.
EnrichmentWithPubChem
Bases: Enrichments
Enrichment class using PubChem
Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 | class EnrichmentWithPubChem(Enrichments):
"""
Enrichment class using PubChem
"""
def enrich_documents(self, texts: List[str]) -> List[str]:
"""
Enrich a list of input PubChem IDs with their STRINGS representation.
Args:
texts: The list of pubchem IDs to be enriched.
Returns:
The list of enriched STRINGS and their descriptions.
"""
enriched_pubchem_ids_smiles = []
enriched_pubchem_ids_descriptions = []
# Load Hydra configuration to get the base URL for PubChem
with hydra.initialize(version_base=None, config_path="../../configs"):
cfg = hydra.compose(config_name='config',
overrides=['utils/pubchem_utils=default'])
cfg = cfg.utils.pubchem_utils
# Iterate over each PubChem ID in the input list
pubchem_cids = texts
for pubchem_cid in pubchem_cids:
# Prepare the URL
pubchem_url = f"{cfg.pubchem_cid2smiles_url}/{pubchem_cid}/property/smiles/JSON"
# Get the data
response = requests.get(pubchem_url, timeout=60)
data = response.json()
# Extract the PubChem CID SMILES
smiles = ''
description = ''
if "PropertyTable" in data:
for prop in data["PropertyTable"]['Properties']:
smiles = prop.get("SMILES", '')
description = pubchem_cid_description(pubchem_cid)
else:
# If the PubChem ID is not found, set smiles and description to None
smiles = None
description = None
enriched_pubchem_ids_smiles.append(smiles)
enriched_pubchem_ids_descriptions.append(description)
return enriched_pubchem_ids_descriptions, enriched_pubchem_ids_smiles
def enrich_documents_with_rag(self, texts, docs):
"""
Enrich a list of input PubChem IDs with their STRINGS representation.
Args:
texts: The list of pubchem IDs to be enriched.
docs: None
Returns:
The list of enriched STRINGS
"""
return self.enrich_documents(texts)
|
enrich_documents(texts)
Enrich a list of input PubChem IDs with their STRINGS representation.
Parameters:
Name |
Type |
Description |
Default |
texts
|
List[str]
|
The list of pubchem IDs to be enriched.
|
required
|
Returns:
Type |
Description |
List[str]
|
The list of enriched STRINGS and their descriptions.
|
Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63 | def enrich_documents(self, texts: List[str]) -> List[str]:
"""
Enrich a list of input PubChem IDs with their STRINGS representation.
Args:
texts: The list of pubchem IDs to be enriched.
Returns:
The list of enriched STRINGS and their descriptions.
"""
enriched_pubchem_ids_smiles = []
enriched_pubchem_ids_descriptions = []
# Load Hydra configuration to get the base URL for PubChem
with hydra.initialize(version_base=None, config_path="../../configs"):
cfg = hydra.compose(config_name='config',
overrides=['utils/pubchem_utils=default'])
cfg = cfg.utils.pubchem_utils
# Iterate over each PubChem ID in the input list
pubchem_cids = texts
for pubchem_cid in pubchem_cids:
# Prepare the URL
pubchem_url = f"{cfg.pubchem_cid2smiles_url}/{pubchem_cid}/property/smiles/JSON"
# Get the data
response = requests.get(pubchem_url, timeout=60)
data = response.json()
# Extract the PubChem CID SMILES
smiles = ''
description = ''
if "PropertyTable" in data:
for prop in data["PropertyTable"]['Properties']:
smiles = prop.get("SMILES", '')
description = pubchem_cid_description(pubchem_cid)
else:
# If the PubChem ID is not found, set smiles and description to None
smiles = None
description = None
enriched_pubchem_ids_smiles.append(smiles)
enriched_pubchem_ids_descriptions.append(description)
return enriched_pubchem_ids_descriptions, enriched_pubchem_ids_smiles
|
enrich_documents_with_rag(texts, docs)
Enrich a list of input PubChem IDs with their STRINGS representation.
Parameters:
Name |
Type |
Description |
Default |
texts
|
|
The list of pubchem IDs to be enriched.
|
required
|
docs
|
|
|
required
|
Returns:
Type |
Description |
|
The list of enriched STRINGS
|
Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py
65
66
67
68
69
70
71
72
73
74
75
76 | def enrich_documents_with_rag(self, texts, docs):
"""
Enrich a list of input PubChem IDs with their STRINGS representation.
Args:
texts: The list of pubchem IDs to be enriched.
docs: None
Returns:
The list of enriched STRINGS
"""
return self.enrich_documents(texts)
|