9
9
async def main ():
10
10
scraper = NYCInfoHubScraper ()
11
11
try :
12
- # 1. Gather Excel links
12
+ # Gather Excel links
13
13
excel_links = await scraper .scrape_excel_links ()
14
14
if not excel_links :
15
15
logging .info ("No Excel links found." )
16
16
return
17
17
18
- # 2. Concurrently download them (async)
18
+ # Concurrently download them (async)
19
19
files_map = await scraper .concurrent_fetch (excel_links )
20
20
if not files_map :
21
21
logging .info ("No files downloaded." )
22
22
return
23
23
24
- # 3. Hash them in parallel (CPU-bound) using ProcessPoolExecutor
24
+ # Hash them in parallel (CPU-bound) using ProcessPoolExecutor
25
25
logging .info ("🔬 Hashing files in parallel..." )
26
26
hash_results = scraper .parallel_hashing (files_map )
27
27
28
- # 4. Save files if changed
28
+ # Save files if changed
29
29
for url , content in files_map .items ():
30
30
new_hash = hash_results .get (url , None )
31
31
if new_hash :
@@ -36,18 +36,18 @@ async def main():
36
36
await scraper .close ()
37
37
38
38
39
- # Run the scraper process
39
+ # Run scraper process
40
40
if __name__ == "__main__" :
41
41
base_dir = os .path .abspath (os .path .join (os .path .dirname (__file__ ), ".." ))
42
42
logs_dir = os .path .join (base_dir , "logs" )
43
43
os .makedirs (logs_dir , exist_ok = True )
44
44
45
- # (2) Create the rotating log handler
45
+ # Create rotating log handler
46
46
log_file_path = os .path .join (logs_dir , "excel_fetch.log" )
47
47
rotating_handler = RotatingFileHandler (log_file_path , maxBytes = 5_242_880 , backupCount = 2 )
48
48
rotating_handler .setFormatter (logging .Formatter ("%(asctime)s - %(levelname)s - %(message)s" ))
49
49
50
- # (3) Call basicConfig once, referencing your rotating handler
50
+ # Call basicConfig once, referencing rotating file handler
51
51
logging .basicConfig (
52
52
level = logging .INFO ,
53
53
format = "%(asctime)s - %(levelname)s - %(message)s" ,
0 commit comments