Skip to content

EmbeddingWithSentenceTransformer

Embedding class using SentenceTransformer model based on LangChain Embeddings class.

EmbeddingWithSentenceTransformer

Bases: Embeddings

Embedding class using SentenceTransformer model based on LangChain Embeddings class.

Source code in aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
class EmbeddingWithSentenceTransformer(Embeddings):
    """
    Embedding class using SentenceTransformer model based on LangChain Embeddings class.
    """

    def __init__(
        self,
        model_name: str,
        model_cache_dir: str = None,
        trust_remote_code: bool = True,
    ):
        """
        Initialize the EmbeddingWithSentenceTransformer class.

        Args:
            model_name: The name of the SentenceTransformer model to be used.
            model_cache_dir: The directory to cache the SentenceTransformer model.
            trust_remote_code: Whether to trust the remote code of the model.
        """

        # Set parameters
        self.model_name = model_name
        self.model_cache_dir = model_cache_dir
        self.trust_remote_code = trust_remote_code

        # Load the model
        self.model = SentenceTransformer(self.model_name,
                                         cache_folder=self.model_cache_dir,
                                         trust_remote_code=self.trust_remote_code)

    def embed_documents(self, texts: List[str]) -> List[float]:
        """
        Generate embedding for a list of input texts using SentenceTransformer model.

        Args:
            texts: The list of texts to be embedded.

        Returns:
            The list of embeddings for the given texts.
        """

        # Generate the embedding
        embeddings = self.model.encode(texts, show_progress_bar=False)

        return embeddings

    def embed_query(self, text: str) -> List[float]:
        """
        Generate embeddings for an input text using SentenceTransformer model.

        Args:
            text: A query to be embedded.
        Returns:
            The embeddings for the given query.
        """

        # Generate the embedding
        embeddings = self.model.encode(text, show_progress_bar=False)

        return embeddings

__init__(model_name, model_cache_dir=None, trust_remote_code=True)

Initialize the EmbeddingWithSentenceTransformer class.

Parameters:

Name Type Description Default
model_name str

The name of the SentenceTransformer model to be used.

required
model_cache_dir str

The directory to cache the SentenceTransformer model.

None
trust_remote_code bool

Whether to trust the remote code of the model.

True
Source code in aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def __init__(
    self,
    model_name: str,
    model_cache_dir: str = None,
    trust_remote_code: bool = True,
):
    """
    Initialize the EmbeddingWithSentenceTransformer class.

    Args:
        model_name: The name of the SentenceTransformer model to be used.
        model_cache_dir: The directory to cache the SentenceTransformer model.
        trust_remote_code: Whether to trust the remote code of the model.
    """

    # Set parameters
    self.model_name = model_name
    self.model_cache_dir = model_cache_dir
    self.trust_remote_code = trust_remote_code

    # Load the model
    self.model = SentenceTransformer(self.model_name,
                                     cache_folder=self.model_cache_dir,
                                     trust_remote_code=self.trust_remote_code)

embed_documents(texts)

Generate embedding for a list of input texts using SentenceTransformer model.

Parameters:

Name Type Description Default
texts List[str]

The list of texts to be embedded.

required

Returns:

Type Description
List[float]

The list of embeddings for the given texts.

Source code in aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def embed_documents(self, texts: List[str]) -> List[float]:
    """
    Generate embedding for a list of input texts using SentenceTransformer model.

    Args:
        texts: The list of texts to be embedded.

    Returns:
        The list of embeddings for the given texts.
    """

    # Generate the embedding
    embeddings = self.model.encode(texts, show_progress_bar=False)

    return embeddings

embed_query(text)

Generate embeddings for an input text using SentenceTransformer model.

Parameters:

Name Type Description Default
text str

A query to be embedded.

required

Returns: The embeddings for the given query.

Source code in aiagents4pharma/talk2knowledgegraphs/utils/embeddings/sentence_transformer.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def embed_query(self, text: str) -> List[float]:
    """
    Generate embeddings for an input text using SentenceTransformer model.

    Args:
        text: A query to be embedded.
    Returns:
        The embeddings for the given query.
    """

    # Generate the embedding
    embeddings = self.model.encode(text, show_progress_bar=False)

    return embeddings