-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext-reader.py
30 lines (21 loc) · 889 Bytes
/
text-reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Bin\Tesseract-OCR\tesseract.exe'
def read_image(path: str):
text = pytesseract.image_to_string(path).replace('\n', '').split('.')
return text
def highlight_in_list(text: list, to_highlight: str):
returned_list = []
for x in text:
if to_highlight in x:
returned_list.append(x.replace(to_highlight, f'>!>{to_highlight.upper()}<!<'))
if to_highlight.title() in x:
returned_list.append(x.replace(to_highlight.title(), f'>!>{to_highlight.upper()}<!<'))
else:
returned_list.append(x)
return returned_list
def write_to_file(text: list):
file = open('res.txt', 'w')
for x in text:
file.write(f'{x}\n')
if __name__ == '__main__':
write_to_file(highlight_in_list(read_image('test.jpg'), 'word'))