Merge pull request #111 from VinciGit00/groq-implementation

PeriniM · web-flow · commit ae2971c51967 · 2024-04-30T02:59:26.000+02:00
Groq implementation
diff --git a/README.md b/README.md
@@ -46,6 +46,7 @@ You can use the `SmartScraper` class to extract information from a website using
 The `SmartScraper` class is a direct graph implementation that uses the most common nodes present in a web scraping pipeline. For more information, please see the [documentation](https://scrapegraph-ai.readthedocs.io/en/latest/).
 ### Case 1: Extracting information using Ollama
 Remember to download the model on Ollama separately!
+
 ```python
 from scrapegraphai.graphs import SmartScraperGraph
 
diff --git a/examples/mixed_models/smart_scraper_mixed.py b/examples/mixed_models/smart_scraper_mixed.py
@@ -0,0 +1,49 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+groq_key = os.getenv("GROQ_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "groq/gemma-7b-it",
+        "api_key": groq_key,
+        "temperature": 0
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    }
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the projects with their description and the author.",
+    # also accepts a string with the already downloaded HTML code
+    source="https://perinim.github.io/projects",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/poetry.lock b/poetry.lock
diff --git a/requirements.txt b/requirements.txt
@@ -1,15 +1,15 @@
-langchain==0.1.6
-langchain_community==0.0.19
-langchain_core==0.1.22
-langchain_openai==0.0.5
-langchain_google_genai==0.0.11
-faiss-cpu==1.7.4
+langchain==0.1.14
+langchain-openai==0.1.1
+langchain-google-genai==1.0.1
 html2text==2020.1.16
+faiss-cpu==1.8.0
 beautifulsoup4==4.12.3
 pandas==2.0.3
 python-dotenv==1.0.1
 tiktoken>=0.5.2,<0.6.0
 tqdm==4.66.1
+graphviz==0.20.1
+google==3.0.0
 minify-html==0.15.0
-langchain-groq==0.1.3
 free-proxy==1.1.1
+langchain-groq==0.1.3
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -3,7 +3,7 @@
 """
 from abc import ABC, abstractmethod
 from typing import Optional
-from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace
+from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq
 from ..helpers import models_tokens
 
 
@@ -20,7 +20,7 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
         self.source = source
         self.config = config
         self.llm_model = self._create_llm(config["llm"])
-        self.embedder_model = None if "embeddings" not in config else self._create_llm(
+        self.embedder_model = self.llm_model if "embeddings" not in config else self._create_llm(
             config["embeddings"])
         self.graph = self._create_graph()
         self.final_state = None
@@ -84,6 +84,14 @@ def _create_llm(self, llm_config: dict):
             except KeyError:
                 raise KeyError("Model not supported")
             return HuggingFace(llm_params)
+        elif "groq" in llm_params["model"]:
+            llm_params["model"] = llm_params["model"].split("/")[-1]
+            
+            try:
+                self.model_token = models_tokens["groq"][llm_params["model"]]
+            except KeyError:
+                raise KeyError("Model not supported")
+            return Groq(llm_params)
         else:
             raise ValueError(
                 "Model provided by the configuration not supported")
diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
@@ -32,7 +32,11 @@
         "mistral-openorca": 32000,
         "stablelm-zephyr": 8192
     },
-    "gemma": {
-        "gemma": 8192,
-    }
+    
+    "groq": {
+        "llama3-8b-8192": 8192,
+        "llama3-70b-8192": 8192,
+        "mixtral-8x7b-32768": 32768,
+        "gemma-7b-it": 8192,
+    },
 }
diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
@@ -9,3 +9,4 @@
 from .gemini import Gemini
 from .ollama import Ollama
 from .hugging_face import HuggingFace
+from .groq import Groq