Enrichment class for enriching Gene names with their function and sequence using UniProt.
  
            EnrichmentWithUniProt
    
            
              Bases: Enrichments
        Enrichment class using UniProt
              
                Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py
                | 20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98 | class EnrichmentWithUniProt(Enrichments):
    """
    Enrichment class using UniProt
    """
    def enrich_documents(self, texts: list[str]) -> list[str]:
        """
        Enrich a list of input UniProt gene names with their function and sequence.
        Args:
            texts: The list of gene names to be enriched.
        Returns:
            The list of enriched functions and sequences
        """
        enriched_gene_names = texts
        logger.log(
            logging.INFO,
            "Load Hydra configuration for Gene enrichment with description and sequence.",
        )
        with hydra.initialize(version_base=None, config_path="../../configs"):
            cfg = hydra.compose(
                config_name="config",
                overrides=["utils/enrichments/uniprot_proteins=default"],
            )
            cfg = cfg.utils.enrichments.uniprot_proteins
        descriptions = []
        sequences = []
        for gene in enriched_gene_names:
            params = {
                "reviewed": cfg.reviewed,
                "isoform": cfg.isoform,
                "exact_gene": gene,
                "organism": cfg.organism,
                # You can get the list of all available organisms here:
                # https://www.uniprot.org/help/taxonomy
            }
            r = requests.get(
                cfg.uniprot_url,
                headers={"Accept": "application/json"},
                params=params,
                timeout=cfg.timeout,
            )
            # if the response is not ok
            if not r.ok:
                descriptions.append(None)
                sequences.append(None)
                continue
            response_body = json.loads(r.text)
            # if the response body is empty
            if not response_body:
                descriptions.append(None)
                sequences.append(None)
                continue
            description = ""
            for comment in response_body[0]["comments"]:
                if comment["type"] == "FUNCTION":
                    for value in comment["text"]:
                        description += value["value"]
            sequence = response_body[0]["sequence"]["sequence"]
            descriptions.append(description)
            sequences.append(sequence)
        return descriptions, sequences
    def enrich_documents_with_rag(self, texts, docs):
        """
        Enrich a list of input UniProt gene names with their function and sequence.
        Args:
            texts: The list of gene names to be enriched.
        Returns:
            The list of enriched functions and sequences
        """
        return self.enrich_documents(texts)
 | 
 
  
            enrich_documents(texts)
    
        Enrich a list of input UniProt gene names with their function and sequence.
Parameters:
    
      
        
          | Name | Type | Description | Default | 
      
      
          
            | texts | list[str] | 
                The list of gene names to be enriched. | required | 
      
    
    Returns:
    
      
        
          | Type | Description | 
      
      
          
            | list[str] | 
                The list of enriched functions and sequences | 
      
    
            
              Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py
              | 25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 | def enrich_documents(self, texts: list[str]) -> list[str]:
    """
    Enrich a list of input UniProt gene names with their function and sequence.
    Args:
        texts: The list of gene names to be enriched.
    Returns:
        The list of enriched functions and sequences
    """
    enriched_gene_names = texts
    logger.log(
        logging.INFO,
        "Load Hydra configuration for Gene enrichment with description and sequence.",
    )
    with hydra.initialize(version_base=None, config_path="../../configs"):
        cfg = hydra.compose(
            config_name="config",
            overrides=["utils/enrichments/uniprot_proteins=default"],
        )
        cfg = cfg.utils.enrichments.uniprot_proteins
    descriptions = []
    sequences = []
    for gene in enriched_gene_names:
        params = {
            "reviewed": cfg.reviewed,
            "isoform": cfg.isoform,
            "exact_gene": gene,
            "organism": cfg.organism,
            # You can get the list of all available organisms here:
            # https://www.uniprot.org/help/taxonomy
        }
        r = requests.get(
            cfg.uniprot_url,
            headers={"Accept": "application/json"},
            params=params,
            timeout=cfg.timeout,
        )
        # if the response is not ok
        if not r.ok:
            descriptions.append(None)
            sequences.append(None)
            continue
        response_body = json.loads(r.text)
        # if the response body is empty
        if not response_body:
            descriptions.append(None)
            sequences.append(None)
            continue
        description = ""
        for comment in response_body[0]["comments"]:
            if comment["type"] == "FUNCTION":
                for value in comment["text"]:
                    description += value["value"]
        sequence = response_body[0]["sequence"]["sequence"]
        descriptions.append(description)
        sequences.append(sequence)
    return descriptions, sequences
 | 
 
     
 
            enrich_documents_with_rag(texts, docs)
    
        Enrich a list of input UniProt gene names with their function and sequence.
Parameters:
    
      
        
          | Name | Type | Description | Default | 
      
      
          
            | texts |  | 
                The list of gene names to be enriched. | required | 
      
    
    Returns:
    
      
        
          | Type | Description | 
      
      
          
            |  | 
                The list of enriched functions and sequences | 
      
    
            
              Source code in aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py
              | 88
89
90
91
92
93
94
95
96
97
98 | def enrich_documents_with_rag(self, texts, docs):
    """
    Enrich a list of input UniProt gene names with their function and sequence.
    Args:
        texts: The list of gene names to be enriched.
    Returns:
        The list of enriched functions and sequences
    """
    return self.enrich_documents(texts)
 |