Skip to content

add hugginface integration (embeddings, models ...) #60

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 11 additions & 13 deletions scrapegraphai/graphs/abstract_graph.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""
"""
Module having abstract class for creating all the graphs
"""
from abc import ABC, abstractmethod
from typing import Optional
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace
from ..helpers import models_tokens


Expand Down Expand Up @@ -46,7 +46,7 @@ def _create_llm(self, llm_config: dict):
# take the model after the last dash
llm_params["model"] = llm_params["model"].split("/")[-1]
try:
self.model_token = models_tokens["openai"][llm_params["model"]]
self.model_token = models_tokens["azure"][llm_params["model"]]
except KeyError:
raise ValueError("Model not supported")
return AzureOpenAI(llm_params)
Expand All @@ -59,14 +59,6 @@ def _create_llm(self, llm_config: dict):
return Gemini(llm_params)

elif "ollama" in llm_params["model"]:
"""
Avaiable models:
- llama2
- mistral
- codellama
- dolphin-mixtral
- mistral-openorca
"""
llm_params["model"] = llm_params["model"].split("/")[-1]

# allow user to set model_tokens in config
Expand All @@ -79,9 +71,15 @@ def _create_llm(self, llm_config: dict):
raise ValueError("Model not supported")

return Ollama(llm_params)

elif "hugging_face" in llm_params["model"]:
try:
self.model_token = models_tokens["hugging_face"][llm_params["model"]]
except KeyError:
raise ValueError("Model not supported")
return HuggingFace(llm_params)
else:
raise ValueError("Model not supported")
raise ValueError(
"Model provided by the configuration not supported")

@abstractmethod
def _create_graph(self):
Expand Down
1 change: 1 addition & 0 deletions scrapegraphai/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from .openai_tts import OpenAITextToSpeech
from .gemini import Gemini
from .ollama import Ollama
from .hugging_face import HuggingFace
22 changes: 22 additions & 0 deletions scrapegraphai/models/hugging_face.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
Module for implementing the hugginface class
"""
from langchain_community.chat_models.huggingface import ChatHuggingFace


class HuggingFace(ChatHuggingFace):
"""Provides a convenient wrapper for interacting with Hugging Face language models
designed for conversational AI applications.

Args:
llm_config (dict): A configuration dictionary containing:
* api_key (str, optional): Your Hugging Face API key.
* model_name (str): The name of the Hugging Face LLM to load.
* tokenizer_name (str, optional): Name of the corresponding tokenizer.
* device (str, optional): Device for running the model ('cpu' by default).

"""

def __init__(self, llm_config: dict):
"""Initializes the HuggingFace chat model wrapper"""
super().__init__(**llm_config)
13 changes: 8 additions & 5 deletions scrapegraphai/nodes/rag_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
from ..models import OpenAI, Ollama, AzureOpenAI
from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace
from langchain_community.embeddings import OllamaEmbeddings
from .base_node import BaseNode

Expand All @@ -26,11 +27,11 @@ class RAGNode(BaseNode):
node_type (str): The type of the node, set to "node" indicating a standard operational node.

Args:
node_name (str, optional): The unique identifier name for the node.
node_name (str, optional): The unique identifier name for the node.
Defaults to "ParseHTMLNode".

Methods:
execute(state): Parses the HTML document contained within the state using
execute(state): Parses the HTML document contained within the state using
the specified tags, if provided, and updates the state with the parsed content.
"""

Expand All @@ -44,7 +45,7 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name:

def execute(self, state):
"""
Executes the node's logic to implement RAG (Retrieval-Augmented Generation)
Executes the node's logic to implement RAG (Retrieval-Augmented Generation)
The method updates the state with relevant chunks of the document.

Args:
Expand All @@ -54,7 +55,7 @@ def execute(self, state):
dict: The updated state containing the 'relevant_chunks' key with the relevant chunks.

Raises:
KeyError: If 'document' is not found in the state, indicating that the necessary
KeyError: If 'document' is not found in the state, indicating that the necessary
information for parsing is missing.
"""

Expand Down Expand Up @@ -92,6 +93,8 @@ def execute(self, state):
embeddings = AzureOpenAIEmbeddings()
elif isinstance(embedding_model, Ollama):
embeddings = OllamaEmbeddings(model=embedding_model.model)
elif isinstance(embedding_model, HuggingFace):
embeddings = HuggingFaceHubEmbeddings(model=embedding_model.model)
else:
raise ValueError("Embedding Model missing or not supported")

Expand Down
Loading