Skip to content

Commit ae2971c

Browse files
authored
Merge pull request #111 from VinciGit00/groq-implementation
Groq implementation
2 parents 7a48204 + 719a353 commit ae2971c

File tree

7 files changed

+75
-2784
lines changed

7 files changed

+75
-2784
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ You can use the `SmartScraper` class to extract information from a website using
4646
The `SmartScraper` class is a direct graph implementation that uses the most common nodes present in a web scraping pipeline. For more information, please see the [documentation](https://scrapegraph-ai.readthedocs.io/en/latest/).
4747
### Case 1: Extracting information using Ollama
4848
Remember to download the model on Ollama separately!
49+
4950
```python
5051
from scrapegraphai.graphs import SmartScraperGraph
5152

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
Basic example of scraping pipeline using SmartScraper
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.graphs import SmartScraperGraph
8+
from scrapegraphai.utils import prettify_exec_info
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Define the configuration for the graph
13+
# ************************************************
14+
15+
groq_key = os.getenv("GROQ_APIKEY")
16+
17+
graph_config = {
18+
"llm": {
19+
"model": "groq/gemma-7b-it",
20+
"api_key": groq_key,
21+
"temperature": 0
22+
},
23+
"embeddings": {
24+
"model": "ollama/nomic-embed-text",
25+
"temperature": 0,
26+
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
27+
}
28+
}
29+
30+
# ************************************************
31+
# Create the SmartScraperGraph instance and run it
32+
# ************************************************
33+
34+
smart_scraper_graph = SmartScraperGraph(
35+
prompt="List me all the projects with their description and the author.",
36+
# also accepts a string with the already downloaded HTML code
37+
source="https://perinim.github.io/projects",
38+
config=graph_config
39+
)
40+
41+
result = smart_scraper_graph.run()
42+
print(result)
43+
44+
# ************************************************
45+
# Get graph execution info
46+
# ************************************************
47+
48+
graph_exec_info = smart_scraper_graph.get_execution_info()
49+
print(prettify_exec_info(graph_exec_info))

poetry.lock

Lines changed: 0 additions & 2772 deletions
This file was deleted.

requirements.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
langchain==0.1.6
2-
langchain_community==0.0.19
3-
langchain_core==0.1.22
4-
langchain_openai==0.0.5
5-
langchain_google_genai==0.0.11
6-
faiss-cpu==1.7.4
1+
langchain==0.1.14
2+
langchain-openai==0.1.1
3+
langchain-google-genai==1.0.1
74
html2text==2020.1.16
5+
faiss-cpu==1.8.0
86
beautifulsoup4==4.12.3
97
pandas==2.0.3
108
python-dotenv==1.0.1
119
tiktoken>=0.5.2,<0.6.0
1210
tqdm==4.66.1
11+
graphviz==0.20.1
12+
google==3.0.0
1313
minify-html==0.15.0
14-
langchain-groq==0.1.3
1514
free-proxy==1.1.1
15+
langchain-groq==0.1.3

scrapegraphai/graphs/abstract_graph.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
from abc import ABC, abstractmethod
55
from typing import Optional
6-
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace
6+
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq
77
from ..helpers import models_tokens
88

99

@@ -20,7 +20,7 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
2020
self.source = source
2121
self.config = config
2222
self.llm_model = self._create_llm(config["llm"])
23-
self.embedder_model = None if "embeddings" not in config else self._create_llm(
23+
self.embedder_model = self.llm_model if "embeddings" not in config else self._create_llm(
2424
config["embeddings"])
2525
self.graph = self._create_graph()
2626
self.final_state = None
@@ -84,6 +84,14 @@ def _create_llm(self, llm_config: dict):
8484
except KeyError:
8585
raise KeyError("Model not supported")
8686
return HuggingFace(llm_params)
87+
elif "groq" in llm_params["model"]:
88+
llm_params["model"] = llm_params["model"].split("/")[-1]
89+
90+
try:
91+
self.model_token = models_tokens["groq"][llm_params["model"]]
92+
except KeyError:
93+
raise KeyError("Model not supported")
94+
return Groq(llm_params)
8795
else:
8896
raise ValueError(
8997
"Model provided by the configuration not supported")

scrapegraphai/helpers/models_tokens.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@
3232
"mistral-openorca": 32000,
3333
"stablelm-zephyr": 8192
3434
},
35-
"gemma": {
36-
"gemma": 8192,
37-
}
35+
36+
"groq": {
37+
"llama3-8b-8192": 8192,
38+
"llama3-70b-8192": 8192,
39+
"mixtral-8x7b-32768": 32768,
40+
"gemma-7b-it": 8192,
41+
},
3842
}

scrapegraphai/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@
99
from .gemini import Gemini
1010
from .ollama import Ollama
1111
from .hugging_face import HuggingFace
12+
from .groq import Groq

0 commit comments

Comments
 (0)