Skip to content

EnrichmentWithOLS

Enrichment class for enriching OLS terms with textual descriptions

EnrichmentWithOLS

Bases: Enrichments

Enrichment class using OLS terms

Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
class EnrichmentWithOLS(Enrichments):
    """
    Enrichment class using OLS terms
    """

    def enrich_documents(self, texts: list[str]) -> list[str]:
        """
        Enrich a list of input OLS terms

        Args:
            texts: The list of OLS terms to be enriched.

        Returns:
            The list of enriched descriptions
        """

        ols_ids = texts

        logger.log(logging.INFO, "Load Hydra configuration for OLS enrichments.")
        with hydra.initialize(version_base=None, config_path="../../configs"):
            cfg = hydra.compose(
                config_name="config", overrides=["utils/enrichments/ols_terms=default"]
            )
            cfg = cfg.utils.enrichments.ols_terms

        descriptions = []
        for ols_id in ols_ids:
            params = {"short_form": ols_id}
            r = requests.get(
                cfg.base_url,
                headers={"Accept": "application/json"},
                params=params,
                timeout=cfg.timeout,
            )
            response_body = json.loads(r.text)
            # if the response body is empty
            if "_embedded" not in response_body:
                descriptions.append("")
                continue
            # Add the description to the list
            description = []
            for term in response_body["_embedded"]["terms"]:
                # If the term has a description, add it to the list
                description += term.get("description", [])
                # Add synonyms to the description
                description += term.get("synonyms", [])
                # Add the label to the description
                # Label is not provided as list, so we need to convert it to a list
                label = term.get("label", "")
                if label:
                    description += [label]
            # Make unique the description
            description = list(set(description))
            # Join the description with new line
            description = "\n".join(description)
            # Ensure we always return a string, even if empty
            descriptions.append(description if description else "")
        return descriptions

    def enrich_documents_with_rag(self, texts, docs):
        """
        Enrich a list of input OLS terms

        Args:
            texts: The list of OLS to be enriched.

        Returns:
            The list of enriched descriptions
        """
        return self.enrich_documents(texts)

enrich_documents(texts)

Enrich a list of input OLS terms

Parameters:

Name Type Description Default
texts list[str]

The list of OLS terms to be enriched.

required

Returns:

Type Description
list[str]

The list of enriched descriptions

Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def enrich_documents(self, texts: list[str]) -> list[str]:
    """
    Enrich a list of input OLS terms

    Args:
        texts: The list of OLS terms to be enriched.

    Returns:
        The list of enriched descriptions
    """

    ols_ids = texts

    logger.log(logging.INFO, "Load Hydra configuration for OLS enrichments.")
    with hydra.initialize(version_base=None, config_path="../../configs"):
        cfg = hydra.compose(
            config_name="config", overrides=["utils/enrichments/ols_terms=default"]
        )
        cfg = cfg.utils.enrichments.ols_terms

    descriptions = []
    for ols_id in ols_ids:
        params = {"short_form": ols_id}
        r = requests.get(
            cfg.base_url,
            headers={"Accept": "application/json"},
            params=params,
            timeout=cfg.timeout,
        )
        response_body = json.loads(r.text)
        # if the response body is empty
        if "_embedded" not in response_body:
            descriptions.append("")
            continue
        # Add the description to the list
        description = []
        for term in response_body["_embedded"]["terms"]:
            # If the term has a description, add it to the list
            description += term.get("description", [])
            # Add synonyms to the description
            description += term.get("synonyms", [])
            # Add the label to the description
            # Label is not provided as list, so we need to convert it to a list
            label = term.get("label", "")
            if label:
                description += [label]
        # Make unique the description
        description = list(set(description))
        # Join the description with new line
        description = "\n".join(description)
        # Ensure we always return a string, even if empty
        descriptions.append(description if description else "")
    return descriptions

enrich_documents_with_rag(texts, docs)

Enrich a list of input OLS terms

Parameters:

Name Type Description Default
texts

The list of OLS to be enriched.

required

Returns:

Type Description

The list of enriched descriptions

Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/ols_terms.py
79
80
81
82
83
84
85
86
87
88
89
def enrich_documents_with_rag(self, texts, docs):
    """
    Enrich a list of input OLS terms

    Args:
        texts: The list of OLS to be enriched.

    Returns:
        The list of enriched descriptions
    """
    return self.enrich_documents(texts)