Skip to content

Commit cdc7402

Browse files
committed
Clarfication of comments
1 parent ba8c737 commit cdc7402

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

src/excel_scraper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ async def download_excel(self, url):
201201
try:
202202
async with self.session.stream("GET", url, timeout=10) as resp:
203203
if resp.status_code == 200:
204-
# Option A: Accumulate chunks in memory (still better than reading all at once)
204+
# Accumulate chunks in memory (still better than reading all at once)
205205
chunks = []
206206
async for chunk in resp.aiter_bytes(chunk_size=CHUNK_SIZE):
207207
chunks.append(chunk)

src/main.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,23 @@
99
async def main():
1010
scraper = NYCInfoHubScraper()
1111
try:
12-
# 1. Gather Excel links
12+
# Gather Excel links
1313
excel_links = await scraper.scrape_excel_links()
1414
if not excel_links:
1515
logging.info("No Excel links found.")
1616
return
1717

18-
# 2. Concurrently download them (async)
18+
# Concurrently download them (async)
1919
files_map = await scraper.concurrent_fetch(excel_links)
2020
if not files_map:
2121
logging.info("No files downloaded.")
2222
return
2323

24-
# 3. Hash them in parallel (CPU-bound) using ProcessPoolExecutor
24+
# Hash them in parallel (CPU-bound) using ProcessPoolExecutor
2525
logging.info("🔬 Hashing files in parallel...")
2626
hash_results = scraper.parallel_hashing(files_map)
2727

28-
# 4. Save files if changed
28+
# Save files if changed
2929
for url, content in files_map.items():
3030
new_hash = hash_results.get(url, None)
3131
if new_hash:
@@ -36,18 +36,18 @@ async def main():
3636
await scraper.close()
3737

3838

39-
# Run the scraper process
39+
# Run scraper process
4040
if __name__ == "__main__":
4141
base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
4242
logs_dir = os.path.join(base_dir, "logs")
4343
os.makedirs(logs_dir, exist_ok=True)
4444

45-
# (2) Create the rotating log handler
45+
# Create rotating log handler
4646
log_file_path = os.path.join(logs_dir, "excel_fetch.log")
4747
rotating_handler = RotatingFileHandler(log_file_path, maxBytes=5_242_880, backupCount=2)
4848
rotating_handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
4949

50-
# (3) Call basicConfig once, referencing your rotating handler
50+
# Call basicConfig once, referencing rotating file handler
5151
logging.basicConfig(
5252
level=logging.INFO,
5353
format="%(asctime)s - %(levelname)s - %(message)s",

0 commit comments

Comments
 (0)