llamacppembeddings doesnt work #26372
Replies: 1 comment 3 replies
-
Hey there, @machengyan! I'm here to help you out with any bugs, questions, or contributions you might have. Let's tackle this together! To address the issue with the from typing import Any, Dict, List, Optional
from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
class LlamaCppEmbeddings(BaseModel, Embeddings):
"""llama.cpp embedding models.
To use, you should have the llama-cpp-python library installed, and provide the
path to the Llama model as a named parameter to the constructor.
Check out: https://github.com/abetlen/llama-cpp-python
Example:
.. code-block:: python
from langchain_community.embeddings import LlamaCppEmbeddings
llama = LlamaCppEmbeddings(model_path="/path/to/model.bin")
"""
client: Any #: :meta private:
model_path: str
n_ctx: int = Field(512, alias="n_ctx")
"""Token context window."""
n_parts: int = Field(-1, alias="n_parts")
"""Number of parts to split the model into.
If -1, the number of parts is automatically determined."""
seed: int = Field(-1, alias="seed")
"""Seed. If -1, a random seed is used."""
f16_kv: bool = Field(False, alias="f16_kv")
"""Use half-precision for key/value cache."""
logits_all: bool = Field(False, alias="logits_all")
"""Return logits for all tokens, not just the last token."""
vocab_only: bool = Field(False, alias="vocab_only")
"""Only load the vocabulary, no weights."""
use_mlock: bool = Field(False, alias="use_mlock")
"""Force system to keep model in RAM."""
n_threads: Optional[int] = Field(None, alias="n_threads")
"""Number of threads to use. If None, the number
of threads is automatically determined."""
n_batch: Optional[int] = Field(512, alias="n_batch")
"""Number of tokens to process in parallel.
Should be a number between 1 and n_ctx."""
n_gpu_layers: Optional[int] = Field(None, alias="n_gpu_layers")
"""Number of layers to be loaded into gpu memory. Default None."""
verbose: bool = Field(True, alias="verbose")
"""Print verbose output to stderr."""
device: Optional[str] = Field(None, alias="device")
"""Device type to use and pass to the model"""
pooling: str = Field("mean", alias="pooling")
"""Pooling type to use for embeddings. Options are 'mean', 'max', 'min'."""
class Config:
extra = "forbid"
@root_validator(pre=False, skip_on_failure=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that llama-cpp-python library is installed."""
model_path = values["model_path"]
model_param_names = [
"n_ctx",
"n_parts",
"seed",
"f16_kv",
"logits_all",
"vocab_only",
"use_mlock",
"n_threads",
"n_batch",
"verbose",
"device",
]
model_params = {k: values[k] for k in model_param_names}
# For backwards compatibility, only include if non-null.
if values["n_gpu_layers"] is not None:
model_params["n_gpu_layers"] = values["n_gpu_layers"]
try:
from llama_cpp import Llama
values["client"] = Llama(model_path, embedding=True, **model_params)
except ImportError:
raise ImportError(
"Could not import llama-cpp-python library. "
"Please install the llama-cpp-python library to "
"use this embedding model: pip install llama-cpp-python"
)
except Exception as e:
raise ValueError(
f"Could not load Llama model from path: {model_path}. "
f"Received error {e}"
)
return values
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed a list of documents using the Llama model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
embeddings = self.client.create_embedding(texts)
pooled_embeddings = self._apply_pooling(embeddings["data"])
return [list(map(float, e)) for e in pooled_embeddings]
def embed_query(self, text: str) -> List[float]:
"""Embed a query using the Llama model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
embedding = self.client.embed(text)
pooled_embedding = self._apply_pooling([embedding])
return list(map(float, pooled_embedding[0]))
def _apply_pooling(self, embeddings: List[Dict[str, Any]]) -> List[List[float]]:
"""Apply pooling to the embeddings.
Args:
embeddings: The list of embeddings to pool.
Returns:
List of pooled embeddings.
"""
if self.pooling == "mean":
return [self._mean_pooling(e["embedding"]) for e in embeddings]
elif self.pooling == "max":
return [self._max_pooling(e["embedding"]) for e in embeddings]
elif self.pooling == "min":
return [self._min_pooling(e["embedding"]) for e in embeddings]
else:
raise ValueError(f"Unsupported pooling type: {self.pooling}")
def _mean_pooling(self, embedding: List[float]) -> List[float]:
return [sum(embedding) / len(embedding)]
def _max_pooling(self, embedding: List[float]) -> List[float]:
return [max(embedding)]
def _min_pooling(self, embedding: List[float]) -> List[float]:
return [min(embedding)] This code adds a |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
Checked other resources
Commit to Help
Example Code
Description
this example from document doesn't work, because the default embedding class use pooling type 0, so it works as no pooling. the output would be tokenized embedding which is not acceptable by FAISS.
should consider add pooling type to the LlamaCppEmbeddings creation, and tried and it works.
System Info
System Information
Package Information
Other Dependencies
Beta Was this translation helpful? Give feedback.
All reactions