2
2
Module for fetching the HTML node
3
3
"""
4
4
5
- from typing import List
6
- from langchain_community .document_loaders import AsyncHtmlLoader
5
+ from typing import List , Optional
6
+ from langchain_community .document_loaders import AsyncChromiumLoader
7
7
from langchain_core .documents import Document
8
8
from .base_node import BaseNode
9
9
from ..utils .remover import remover
@@ -37,7 +37,7 @@ class FetchNode(BaseNode):
37
37
to succeed.
38
38
"""
39
39
40
- def __init__ (self , input : str , output : List [str ], node_name : str = "Fetch" ):
40
+ def __init__ (self , input : str , output : List [str ], node_config : Optional [ dict ], node_name : str = "Fetch" ):
41
41
"""
42
42
Initializes the FetchHTMLNode with a node name and node type.
43
43
Arguments:
@@ -46,6 +46,8 @@ def __init__(self, input: str, output: List[str], node_name: str = "Fetch"):
46
46
"""
47
47
super ().__init__ (node_name , "node" , input , output , 1 )
48
48
49
+ self .headless = True if node_config is None else node_config .get ("headless" , True )
50
+
49
51
def execute (self , state ):
50
52
"""
51
53
Executes the node's logic to fetch HTML content from a specified URL and
@@ -79,14 +81,21 @@ def execute(self, state):
79
81
80
82
else :
81
83
if self .node_config is not None and self .node_config .get ("endpoint" ) is not None :
82
- loader = AsyncHtmlLoader (
83
- source , proxies = {"http" : self .node_config ["endpoint" ]})
84
+
85
+ loader = AsyncChromiumLoader (
86
+ [source ],
87
+ proxies = {"http" : self .node_config ["endpoint" ]},
88
+ headless = self .headless ,
89
+ )
84
90
else :
85
- loader = AsyncHtmlLoader (source )
91
+ loader = AsyncChromiumLoader (
92
+ [source ],
93
+ headless = self .headless ,
94
+ )
86
95
87
96
document = loader .load ()
88
97
compressed_document = [
89
- Document (page_content = remover (str (document )))]
98
+ Document (page_content = remover (str (document [ 0 ]. page_content )))]
90
99
91
100
state .update ({self .output [0 ]: compressed_document })
92
101
return state
0 commit comments