From 4c3ea8f4ac1e5d86501cbdb1140d4b906cb7fecd Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Wed, 3 Apr 2024 12:53:12 +0200 Subject: [PATCH 1/3] fixed image_to_tex_node and refactoring --- examples/ScrapeGraphAI_generated_graph | 19 ------------ examples/graph_builder_example.py | 31 -------------------- examples/{ => inputs}/plain_html_example.txt | 0 examples/{ => results}/result.csv | 0 examples/{ => results}/result.json | 0 examples/scrape_plain_text.py | 6 ++-- scrapegraphai/nodes/base_node.py | 7 +++-- scrapegraphai/nodes/image_to_text_node.py | 29 ++++++++++-------- scrapegraphai/utils/parse_state_keys.py | 8 +++-- scrapegraphai/utils/remover.py | 3 -- scrapegraphai/utils/save_audio_from_bytes.py | 4 +-- 11 files changed, 32 insertions(+), 75 deletions(-) delete mode 100644 examples/ScrapeGraphAI_generated_graph delete mode 100644 examples/graph_builder_example.py rename examples/{ => inputs}/plain_html_example.txt (100%) rename examples/{ => results}/result.csv (100%) rename examples/{ => results}/result.json (100%) diff --git a/examples/ScrapeGraphAI_generated_graph b/examples/ScrapeGraphAI_generated_graph deleted file mode 100644 index acc3232c..00000000 --- a/examples/ScrapeGraphAI_generated_graph +++ /dev/null @@ -1,19 +0,0 @@ -// ScrapeGraphAI Generated Graph -digraph { - node [color=lightblue2 style=filled] - FetchHTMLNode [shape=doublecircle] - GetProbableTagsNode - ParseNode - RAGNode - GenerateAnswerNode - ConditionalNode - ImageToTextNode - TextToSpeechNode - FetchHTMLNode -> GetProbableTagsNode - GetProbableTagsNode -> ParseNode - ParseNode -> RAGNode - RAGNode -> GenerateAnswerNode - RAGNode -> ConditionalNode - ConditionalNode -> ImageToTextNode - ConditionalNode -> TextToSpeechNode -} diff --git a/examples/graph_builder_example.py b/examples/graph_builder_example.py deleted file mode 100644 index 53956e09..00000000 --- a/examples/graph_builder_example.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Example of graph builder -""" -import os -from dotenv import load_dotenv -from scrapegraphai.builders import GraphBuilder - -load_dotenv() -openai_key = os.getenv("OPENAI_APIKEY") - -# Define the configuration for the graph -graph_config = { - "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", - }, -} - -# Example usage of GraphBuilder -graph_builder = GraphBuilder( - user_prompt="Extract the news and generate a text summary with a voiceover.", - config=graph_config -) - -graph_json = graph_builder.build_graph() - -# Convert the resulting JSON to Graphviz format -graphviz_graph = graph_builder.convert_json_to_graphviz(graph_json) - -# Save the graph to a file and open it in the default viewer -graphviz_graph.render('ScrapeGraphAI_generated_graph', view=True) diff --git a/examples/plain_html_example.txt b/examples/inputs/plain_html_example.txt similarity index 100% rename from examples/plain_html_example.txt rename to examples/inputs/plain_html_example.txt diff --git a/examples/result.csv b/examples/results/result.csv similarity index 100% rename from examples/result.csv rename to examples/results/result.csv diff --git a/examples/result.json b/examples/results/result.json similarity index 100% rename from examples/result.json rename to examples/results/result.json diff --git a/examples/scrape_plain_text.py b/examples/scrape_plain_text.py index fb2c0f84..81dee0f9 100644 --- a/examples/scrape_plain_text.py +++ b/examples/scrape_plain_text.py @@ -5,6 +5,7 @@ import os from dotenv import load_dotenv from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json load_dotenv() openai_key = os.getenv("OPENAI_APIKEY") @@ -19,7 +20,7 @@ # It could be also a http request using the request model -text = open('plain_html_example.txt', 'r', encoding="utf-8") +text = open('inputs/plain_html_example.txt', 'r', encoding="utf-8") # Create the SmartScraperGraph instance smart_scraper_graph = SmartScraperGraph( @@ -32,6 +33,5 @@ print(result) # Save to json or csv -onvert_to_csv(result, "result") +convert_to_csv(result, "result") convert_to_json(result, "result") - diff --git a/scrapegraphai/nodes/base_node.py b/scrapegraphai/nodes/base_node.py index e9766588..6a85f2d3 100644 --- a/scrapegraphai/nodes/base_node.py +++ b/scrapegraphai/nodes/base_node.py @@ -40,7 +40,8 @@ class BaseNode(ABC): raised to indicate the incorrect usage. """ - def __init__(self, node_name: str, node_type: str, input: str, output: List[str], min_input_len: int = 1, model_config: Optional[dict] = None): + def __init__(self, node_name: str, node_type: str, input: str, output: List[str], + min_input_len: int = 1, model_config: Optional[dict] = None): """ Initialize the node with a unique identifier and a specified node type. @@ -73,7 +74,9 @@ def execute(self, state: dict) -> dict: pass def get_input_keys(self, state: dict) -> List[str]: - # Use the _parse_input_keys method to identify which state keys are needed based on the input attribute + """Use the _parse_input_keys method to identify which state keys are + needed based on the input attribute + """ try: input_keys = self._parse_input_keys(state, self.input) self._validate_input_keys(input_keys) diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py index 703355d5..0a845d05 100644 --- a/scrapegraphai/nodes/image_to_text_node.py +++ b/scrapegraphai/nodes/image_to_text_node.py @@ -1,7 +1,7 @@ -""" +""" Module for the ImageToTextNode class. """ - +from typing import List from .base_node import BaseNode @@ -10,34 +10,39 @@ class ImageToTextNode(BaseNode): A class representing a node that processes an image and returns the text description. Attributes: - llm (OpenAIImageToText): An instance of the OpenAIImageToText class. + llm_model (OpenAIImageToText): An instance of the OpenAIImageToText class. Methods: execute(state, url): Execute the node's logic and return the updated state. """ - def __init__(self, llm, node_name: str): + def __init__(self, input: str, output: List[str], model_config: dict, + node_name: str = "GetProbableTags"): """ Initializes an instance of the ImageToTextNode class. Args: - llm (OpenAIImageToText): An instance of the OpenAIImageToText class. - node_name (str): name of the node + input (str): The input for the node. + output (List[str]): The output of the node. + model_config (dict): Configuration for the model. + node_name (str): Name of the node. """ - super().__init__(node_name, "node") - self.llm = llm + super().__init__(node_name, "node", input, output, 2, model_config) + self.llm_model = model_config["llm_model"] def execute(self, state: dict, url: str) -> dict: """ Execute the node's logic and return the updated state. + Args: state (dict): The current state of the graph. - url (str): url of the image where to - :return: The updated state after executing this node. - """ + url (str): URL of the image to process. + Returns: + dict: The updated state after executing this node. + """ print("---GENERATING TEXT FROM IMAGE---") - text_answer = self.llm.run(url) + text_answer = self.llm_model.run(url) state.update({"image_text": text_answer}) return state diff --git a/scrapegraphai/utils/parse_state_keys.py b/scrapegraphai/utils/parse_state_keys.py index c5da7e8a..5c99a60f 100644 --- a/scrapegraphai/utils/parse_state_keys.py +++ b/scrapegraphai/utils/parse_state_keys.py @@ -7,6 +7,8 @@ def parse_expression(expression, state: dict): """ Function for parsing the expressions + Args: + state (dict): state to elaborate """ # Check for empty expression if not expression: @@ -69,14 +71,14 @@ def evaluate_expression(expression): '|'.join(sub_result) + expression[end+1:] return evaluate_simple_expression(expression) - result = evaluate_expression(expression) + temp_result = evaluate_expression(expression) - if not result: + if not temp_result: raise ValueError("No state keys matched the expression.") # Remove redundant state keys from the result, without changing their order final_result = [] - for key in result: + for key in temp_result: if key not in final_result: final_result.append(key) diff --git a/scrapegraphai/utils/remover.py b/scrapegraphai/utils/remover.py index 9f765473..1cde0c0f 100644 --- a/scrapegraphai/utils/remover.py +++ b/scrapegraphai/utils/remover.py @@ -18,14 +18,11 @@ def remover(html_content: str) -> str: soup = BeautifulSoup(html_content, 'html.parser') - # Estrai il titolo title_tag = soup.find('title') title = title_tag.get_text() if title_tag else "" - # Rimuovi i tag