ScrapeGraphAI · PeriniM · Apr 3, 2024 · Apr 3, 2024 · Apr 3, 2024 · Apr 3, 2024
diff --git a/examples/ScrapeGraphAI_generated_graph b/examples/ScrapeGraphAI_generated_graph
diff --git a/examples/graph_builder_example.py b/examples/graph_builder_example.py
diff --git a/examples/plain_html_example.txt → examples/inputs/plain_html_example.txt b/examples/plain_html_example.txt → examples/inputs/plain_html_example.txt
diff --git a/examples/result.csv → examples/results/result.csv b/examples/result.csv → examples/results/result.csv
diff --git a/examples/result.json → examples/results/result.json b/examples/result.json → examples/results/result.json
diff --git a/examples/scrape_plain_text.py b/examples/scrape_plain_text.py
@@ -5,6 +5,7 @@
 import os
 from dotenv import load_dotenv
 from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json
 
 load_dotenv()
 openai_key = os.getenv("OPENAI_APIKEY")
@@ -19,7 +20,7 @@
 
 
 # It could be also a http request using the request model
-text = open('plain_html_example.txt', 'r', encoding="utf-8")
+text = open('inputs/plain_html_example.txt', 'r', encoding="utf-8")
 
 # Create the SmartScraperGraph instance
 smart_scraper_graph = SmartScraperGraph(
@@ -32,6 +33,5 @@
 print(result)
 
 # Save to json or csv
-onvert_to_csv(result, "result")
+convert_to_csv(result, "result")
 convert_to_json(result, "result")
-
diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -71,8 +71,7 @@ def _create_llm(self, llm_config: dict):
             return OpenAI(llm_params)
         elif "gemini" in llm_params["model"]:
             return Gemini(llm_params)
-        else:
-            raise ValueError("Model not supported")
+        raise ValueError("Model not supported")
 
     def _create_graph(self):
         """

diff --git a/scrapegraphai/models/gemini.py b/scrapegraphai/models/gemini.py
@@ -1,3 +1,6 @@
+"""
+Gemini module configuration
+"""
 from langchain_google_genai import ChatGoogleGenerativeAI
 
 

diff --git a/scrapegraphai/nodes/base_node.py b/scrapegraphai/nodes/base_node.py
@@ -40,7 +40,8 @@ class BaseNode(ABC):
                     raised to indicate the incorrect usage.
     """
 
-    def __init__(self, node_name: str, node_type: str, input: str, output: List[str], min_input_len: int = 1, model_config: Optional[dict] = None):
+    def __init__(self, node_name: str, node_type: str, input: str, output: List[str],
+                 min_input_len: int = 1, model_config: Optional[dict] = None):
         """
         Initialize the node with a unique identifier and a specified node type.
 
@@ -73,7 +74,9 @@ def execute(self, state: dict) -> dict:
         pass
 
     def get_input_keys(self, state: dict) -> List[str]:
-        # Use the _parse_input_keys method to identify which state keys are needed based on the input attribute
+        """Use the _parse_input_keys method to identify which state keys are 
+        needed based on the input attribute
+        """
         try:
             input_keys = self._parse_input_keys(state, self.input)
             self._validate_input_keys(input_keys)

diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py
@@ -1,7 +1,7 @@
-""" 
+"""
 Module for the ImageToTextNode class.
 """
-
+from typing import List
 from .base_node import BaseNode
 
 
@@ -10,34 +10,43 @@ class ImageToTextNode(BaseNode):
     A class representing a node that processes an image and returns the text description.
 
     Attributes:
-        llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
+        llm_model (OpenAIImageToText): An instance of the OpenAIImageToText class.
 
     Methods:
         execute(state, url): Execute the node's logic and return the updated state.
     """
 
-    def __init__(self, llm, node_name: str):
+    def __init__(self, input: str, output: List[str], model_config: dict,
+                 node_name: str = "ImageToText"):
         """
         Initializes an instance of the ImageToTextNode class.
 
         Args:
-            llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
-            node_name (str): name of the node
+            input (str): The input for the node.
+            output (List[str]): The output of the node.
+            model_config (dict): Configuration for the model.
+            node_name (str): Name of the node.
         """
-        super().__init__(node_name, "node")
-        self.llm = llm
+        super().__init__(node_name, "node", input, output, 1, model_config)
+        self.llm_model = model_config["llm_model"]
 
-    def execute(self, state: dict, url: str) -> dict:
+    def execute(self, state: dict) -> dict:
         """
         Execute the node's logic and return the updated state.
+
         Args:
             state (dict): The current state of the graph.
-            url (str): url of the image where to 
-        :return: The updated state after executing this node.
-        """
 
+        Returns:
+            dict: The updated state after executing this node.
+        """
         print("---GENERATING TEXT FROM IMAGE---")
-        text_answer = self.llm.run(url)
+        input_keys = self.get_input_keys(state)
+
+        input_data = [state[key] for key in input_keys]
+        url = input_data[0]
+
+        text_answer = self.llm_model.run(url)
 
         state.update({"image_text": text_answer})
         return state
diff --git a/scrapegraphai/utils/parse_state_keys.py b/scrapegraphai/utils/parse_state_keys.py
@@ -7,6 +7,8 @@
 def parse_expression(expression, state: dict):
     """ 
     Function for parsing the expressions
+    Args:
+        state (dict): state to elaborate
     """
     # Check for empty expression
     if not expression:
@@ -69,14 +71,14 @@ def evaluate_expression(expression):
                 '|'.join(sub_result) + expression[end+1:]
         return evaluate_simple_expression(expression)
 
-    result = evaluate_expression(expression)
+    temp_result = evaluate_expression(expression)
 
-    if not result:
+    if not temp_result:
         raise ValueError("No state keys matched the expression.")
 
     # Remove redundant state keys from the result, without changing their order
     final_result = []
-    for key in result:
+    for key in temp_result:
         if key not in final_result:
             final_result.append(key)
 

diff --git a/scrapegraphai/utils/remover.py b/scrapegraphai/utils/remover.py
@@ -18,14 +18,11 @@ def remover(html_content: str) -> str:
 
     soup = BeautifulSoup(html_content, 'html.parser')
 
-    # Estrai il titolo
     title_tag = soup.find('title')
     title = title_tag.get_text() if title_tag else ""
 
-    # Rimuovi i tag <script> in tutto il documento
     [script.extract() for script in soup.find_all('script')]
 
-    # Estrai il corpo del documento
     body_content = soup.find('body')
     body = str(body_content) if body_content else ""
 

diff --git a/scrapegraphai/utils/save_audio_from_bytes.py b/scrapegraphai/utils/save_audio_from_bytes.py
@@ -1,11 +1,11 @@
 """
 This utility function saves the byte response as an audio file.
 """
-
 from pathlib import Path
+from typing import Union
 
 
-def save_audio_from_bytes(byte_response, output_path):
+def save_audio_from_bytes(byte_response: bytes, output_path: Union[str, Path]) -> None:
     """
     Saves the byte response as an audio file.