From 4c3ea8f4ac1e5d86501cbdb1140d4b906cb7fecd Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Wed, 3 Apr 2024 12:53:12 +0200
Subject: [PATCH 1/3] fixed image_to_tex_node and refactoring

---
 examples/ScrapeGraphAI_generated_graph       | 19 ------------
 examples/graph_builder_example.py            | 31 --------------------
 examples/{ => inputs}/plain_html_example.txt |  0
 examples/{ => results}/result.csv            |  0
 examples/{ => results}/result.json           |  0
 examples/scrape_plain_text.py                |  6 ++--
 scrapegraphai/nodes/base_node.py             |  7 +++--
 scrapegraphai/nodes/image_to_text_node.py    | 29 ++++++++++--------
 scrapegraphai/utils/parse_state_keys.py      |  8 +++--
 scrapegraphai/utils/remover.py               |  3 --
 scrapegraphai/utils/save_audio_from_bytes.py |  4 +--
 11 files changed, 32 insertions(+), 75 deletions(-)
 delete mode 100644 examples/ScrapeGraphAI_generated_graph
 delete mode 100644 examples/graph_builder_example.py
 rename examples/{ => inputs}/plain_html_example.txt (100%)
 rename examples/{ => results}/result.csv (100%)
 rename examples/{ => results}/result.json (100%)

diff --git a/examples/ScrapeGraphAI_generated_graph b/examples/ScrapeGraphAI_generated_graph
deleted file mode 100644
index acc3232c..00000000
--- a/examples/ScrapeGraphAI_generated_graph
+++ /dev/null
@@ -1,19 +0,0 @@
-// ScrapeGraphAI Generated Graph
-digraph {
-	node [color=lightblue2 style=filled]
-	FetchHTMLNode [shape=doublecircle]
-	GetProbableTagsNode
-	ParseNode
-	RAGNode
-	GenerateAnswerNode
-	ConditionalNode
-	ImageToTextNode
-	TextToSpeechNode
-	FetchHTMLNode -> GetProbableTagsNode
-	GetProbableTagsNode -> ParseNode
-	ParseNode -> RAGNode
-	RAGNode -> GenerateAnswerNode
-	RAGNode -> ConditionalNode
-	ConditionalNode -> ImageToTextNode
-	ConditionalNode -> TextToSpeechNode
-}
diff --git a/examples/graph_builder_example.py b/examples/graph_builder_example.py
deleted file mode 100644
index 53956e09..00000000
--- a/examples/graph_builder_example.py
+++ /dev/null
@@ -1,31 +0,0 @@
-""" 
-Example of graph builder
-"""
-import os
-from dotenv import load_dotenv
-from scrapegraphai.builders import GraphBuilder
-
-load_dotenv()
-openai_key = os.getenv("OPENAI_APIKEY")
-
-# Define the configuration for the graph
-graph_config = {
-    "llm": {
-        "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
-    },
-}
-
-# Example usage of GraphBuilder
-graph_builder = GraphBuilder(
-    user_prompt="Extract the news and generate a text summary with a voiceover.",
-    config=graph_config
-)
-
-graph_json = graph_builder.build_graph()
-
-# Convert the resulting JSON to Graphviz format
-graphviz_graph = graph_builder.convert_json_to_graphviz(graph_json)
-
-# Save the graph to a file and open it in the default viewer
-graphviz_graph.render('ScrapeGraphAI_generated_graph', view=True)
diff --git a/examples/plain_html_example.txt b/examples/inputs/plain_html_example.txt
similarity index 100%
rename from examples/plain_html_example.txt
rename to examples/inputs/plain_html_example.txt
diff --git a/examples/result.csv b/examples/results/result.csv
similarity index 100%
rename from examples/result.csv
rename to examples/results/result.csv
diff --git a/examples/result.json b/examples/results/result.json
similarity index 100%
rename from examples/result.json
rename to examples/results/result.json
diff --git a/examples/scrape_plain_text.py b/examples/scrape_plain_text.py
index fb2c0f84..81dee0f9 100644
--- a/examples/scrape_plain_text.py
+++ b/examples/scrape_plain_text.py
@@ -5,6 +5,7 @@
 import os
 from dotenv import load_dotenv
 from scrapegraphai.graphs import SmartScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json
 
 load_dotenv()
 openai_key = os.getenv("OPENAI_APIKEY")
@@ -19,7 +20,7 @@
 
 
 # It could be also a http request using the request model
-text = open('plain_html_example.txt', 'r', encoding="utf-8")
+text = open('inputs/plain_html_example.txt', 'r', encoding="utf-8")
 
 # Create the SmartScraperGraph instance
 smart_scraper_graph = SmartScraperGraph(
@@ -32,6 +33,5 @@
 print(result)
 
 # Save to json or csv
-onvert_to_csv(result, "result")
+convert_to_csv(result, "result")
 convert_to_json(result, "result")
-
diff --git a/scrapegraphai/nodes/base_node.py b/scrapegraphai/nodes/base_node.py
index e9766588..6a85f2d3 100644
--- a/scrapegraphai/nodes/base_node.py
+++ b/scrapegraphai/nodes/base_node.py
@@ -40,7 +40,8 @@ class BaseNode(ABC):
                     raised to indicate the incorrect usage.
     """
 
-    def __init__(self, node_name: str, node_type: str, input: str, output: List[str], min_input_len: int = 1, model_config: Optional[dict] = None):
+    def __init__(self, node_name: str, node_type: str, input: str, output: List[str],
+                 min_input_len: int = 1, model_config: Optional[dict] = None):
         """
         Initialize the node with a unique identifier and a specified node type.
 
@@ -73,7 +74,9 @@ def execute(self, state: dict) -> dict:
         pass
 
     def get_input_keys(self, state: dict) -> List[str]:
-        # Use the _parse_input_keys method to identify which state keys are needed based on the input attribute
+        """Use the _parse_input_keys method to identify which state keys are 
+        needed based on the input attribute
+        """
         try:
             input_keys = self._parse_input_keys(state, self.input)
             self._validate_input_keys(input_keys)
diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py
index 703355d5..0a845d05 100644
--- a/scrapegraphai/nodes/image_to_text_node.py
+++ b/scrapegraphai/nodes/image_to_text_node.py
@@ -1,7 +1,7 @@
-""" 
+"""
 Module for the ImageToTextNode class.
 """
-
+from typing import List
 from .base_node import BaseNode
 
 
@@ -10,34 +10,39 @@ class ImageToTextNode(BaseNode):
     A class representing a node that processes an image and returns the text description.
 
     Attributes:
-        llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
+        llm_model (OpenAIImageToText): An instance of the OpenAIImageToText class.
 
     Methods:
         execute(state, url): Execute the node's logic and return the updated state.
     """
 
-    def __init__(self, llm, node_name: str):
+    def __init__(self, input: str, output: List[str], model_config: dict,
+                 node_name: str = "GetProbableTags"):
         """
         Initializes an instance of the ImageToTextNode class.
 
         Args:
-            llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
-            node_name (str): name of the node
+            input (str): The input for the node.
+            output (List[str]): The output of the node.
+            model_config (dict): Configuration for the model.
+            node_name (str): Name of the node.
         """
-        super().__init__(node_name, "node")
-        self.llm = llm
+        super().__init__(node_name, "node", input, output, 2, model_config)
+        self.llm_model = model_config["llm_model"]
 
     def execute(self, state: dict, url: str) -> dict:
         """
         Execute the node's logic and return the updated state.
+
         Args:
             state (dict): The current state of the graph.
-            url (str): url of the image where to 
-        :return: The updated state after executing this node.
-        """
+            url (str): URL of the image to process.
 
+        Returns:
+            dict: The updated state after executing this node.
+        """
         print("---GENERATING TEXT FROM IMAGE---")
-        text_answer = self.llm.run(url)
+        text_answer = self.llm_model.run(url)
 
         state.update({"image_text": text_answer})
         return state
diff --git a/scrapegraphai/utils/parse_state_keys.py b/scrapegraphai/utils/parse_state_keys.py
index c5da7e8a..5c99a60f 100644
--- a/scrapegraphai/utils/parse_state_keys.py
+++ b/scrapegraphai/utils/parse_state_keys.py
@@ -7,6 +7,8 @@
 def parse_expression(expression, state: dict):
     """ 
     Function for parsing the expressions
+    Args:
+        state (dict): state to elaborate
     """
     # Check for empty expression
     if not expression:
@@ -69,14 +71,14 @@ def evaluate_expression(expression):
                 '|'.join(sub_result) + expression[end+1:]
         return evaluate_simple_expression(expression)
 
-    result = evaluate_expression(expression)
+    temp_result = evaluate_expression(expression)
 
-    if not result:
+    if not temp_result:
         raise ValueError("No state keys matched the expression.")
 
     # Remove redundant state keys from the result, without changing their order
     final_result = []
-    for key in result:
+    for key in temp_result:
         if key not in final_result:
             final_result.append(key)
 
diff --git a/scrapegraphai/utils/remover.py b/scrapegraphai/utils/remover.py
index 9f765473..1cde0c0f 100644
--- a/scrapegraphai/utils/remover.py
+++ b/scrapegraphai/utils/remover.py
@@ -18,14 +18,11 @@ def remover(html_content: str) -> str:
 
     soup = BeautifulSoup(html_content, 'html.parser')
 
-    # Estrai il titolo
     title_tag = soup.find('title')
     title = title_tag.get_text() if title_tag else ""
 
-    # Rimuovi i tag <script> in tutto il documento
     [script.extract() for script in soup.find_all('script')]
 
-    # Estrai il corpo del documento
     body_content = soup.find('body')
     body = str(body_content) if body_content else ""
 
diff --git a/scrapegraphai/utils/save_audio_from_bytes.py b/scrapegraphai/utils/save_audio_from_bytes.py
index f250edbf..41c53d7b 100644
--- a/scrapegraphai/utils/save_audio_from_bytes.py
+++ b/scrapegraphai/utils/save_audio_from_bytes.py
@@ -1,11 +1,11 @@
 """
 This utility function saves the byte response as an audio file.
 """
-
 from pathlib import Path
+from typing import Union
 
 
-def save_audio_from_bytes(byte_response, output_path):
+def save_audio_from_bytes(byte_response: bytes, output_path: Union[str, Path]) -> None:
     """
     Saves the byte response as an audio file.
 

From c9951340b51a9a792fa6a3201ee9e1a51a5a37fa Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Wed, 3 Apr 2024 12:56:25 +0200
Subject: [PATCH 2/3] fixed image_to_tex_node and refactoring

---
 scrapegraphai/graphs/smart_scraper_graph.py | 3 +--
 scrapegraphai/models/gemini.py              | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
index 0bfda6fc..16e42b81 100644
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -71,8 +71,7 @@ def _create_llm(self, llm_config: dict):
             return OpenAI(llm_params)
         elif "gemini" in llm_params["model"]:
             return Gemini(llm_params)
-        else:
-            raise ValueError("Model not supported")
+        raise ValueError("Model not supported")
 
     def _create_graph(self):
         """
diff --git a/scrapegraphai/models/gemini.py b/scrapegraphai/models/gemini.py
index 95ee2d57..e35fd684 100644
--- a/scrapegraphai/models/gemini.py
+++ b/scrapegraphai/models/gemini.py
@@ -1,3 +1,6 @@
+"""
+Gemini module configuration
+"""
 from langchain_google_genai import ChatGoogleGenerativeAI
 
 

From 3269b4aaa754c9de943c295832c9178178b43b52 Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Wed, 3 Apr 2024 13:33:17 +0200
Subject: [PATCH 3/3] fixing infos for the pr

---
 scrapegraphai/nodes/image_to_text_node.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py
index 0a845d05..5d0a4949 100644
--- a/scrapegraphai/nodes/image_to_text_node.py
+++ b/scrapegraphai/nodes/image_to_text_node.py
@@ -17,7 +17,7 @@ class ImageToTextNode(BaseNode):
     """
 
     def __init__(self, input: str, output: List[str], model_config: dict,
-                 node_name: str = "GetProbableTags"):
+                 node_name: str = "ImageToText"):
         """
         Initializes an instance of the ImageToTextNode class.
 
@@ -27,21 +27,25 @@ def __init__(self, input: str, output: List[str], model_config: dict,
             model_config (dict): Configuration for the model.
             node_name (str): Name of the node.
         """
-        super().__init__(node_name, "node", input, output, 2, model_config)
+        super().__init__(node_name, "node", input, output, 1, model_config)
         self.llm_model = model_config["llm_model"]
 
-    def execute(self, state: dict, url: str) -> dict:
+    def execute(self, state: dict) -> dict:
         """
         Execute the node's logic and return the updated state.
 
         Args:
             state (dict): The current state of the graph.
-            url (str): URL of the image to process.
 
         Returns:
             dict: The updated state after executing this node.
         """
         print("---GENERATING TEXT FROM IMAGE---")
+        input_keys = self.get_input_keys(state)
+
+        input_data = [state[key] for key in input_keys]
+        url = input_data[0]
+
         text_answer = self.llm_model.run(url)
 
         state.update({"image_text": text_answer})