Clarfication of comments

dylanpicart · dylanpicart · commit cdc7402020cc · 2025-02-19T20:25:09.000-05:00
diff --git a/src/excel_scraper.py b/src/excel_scraper.py
@@ -201,7 +201,7 @@ async def download_excel(self, url):
         try:
             async with self.session.stream("GET", url, timeout=10) as resp:
                 if resp.status_code == 200:
-                    # Option A: Accumulate chunks in memory (still better than reading all at once)
+                    # Accumulate chunks in memory (still better than reading all at once)
                     chunks = []
                     async for chunk in resp.aiter_bytes(chunk_size=CHUNK_SIZE):
                         chunks.append(chunk)
diff --git a/src/main.py b/src/main.py
@@ -9,23 +9,23 @@
 async def main():
         scraper = NYCInfoHubScraper()
         try:
-            # 1. Gather Excel links
+            # Gather Excel links
             excel_links = await scraper.scrape_excel_links()
             if not excel_links:
                 logging.info("No Excel links found.")
                 return
 
-            # 2. Concurrently download them (async)
+            # Concurrently download them (async)
             files_map = await scraper.concurrent_fetch(excel_links)
             if not files_map:
                 logging.info("No files downloaded.")
                 return
 
-            # 3. Hash them in parallel (CPU-bound) using ProcessPoolExecutor
+            # Hash them in parallel (CPU-bound) using ProcessPoolExecutor
             logging.info("🔬 Hashing files in parallel...")
             hash_results = scraper.parallel_hashing(files_map)
 
-            # 4. Save files if changed
+            # Save files if changed
             for url, content in files_map.items():
                 new_hash = hash_results.get(url, None)
                 if new_hash:
@@ -36,18 +36,18 @@ async def main():
             await scraper.close()
 
 
-# Run the scraper process
+# Run scraper process
 if __name__ == "__main__":
     base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
     logs_dir = os.path.join(base_dir, "logs")
     os.makedirs(logs_dir, exist_ok=True)
 
-    # (2) Create the rotating log handler
+    # Create rotating log handler
     log_file_path = os.path.join(logs_dir, "excel_fetch.log")
     rotating_handler = RotatingFileHandler(log_file_path, maxBytes=5_242_880, backupCount=2)
     rotating_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
 
-    # (3) Call basicConfig once, referencing your rotating handler
+    # Call basicConfig once, referencing rotating file handler
     logging.basicConfig(
         level=logging.INFO,
         format="%(asctime)s - %(levelname)s - %(message)s",