From e60a5d6a093789f9432aa6e4252a8d4e3ed5a506 Mon Sep 17 00:00:00 2001 From: Kian-Meng Ang Date: Wed, 26 Feb 2025 23:01:06 +0800 Subject: [PATCH] Fix typos Found via `codespell -L te,ser,parms` --- README.md | 4 ++-- docs/README.html | 4 ++-- docs/api.html | 4 ++-- docs/api.md | 4 ++-- docs/cli.html | 2 +- docs/cli.md | 2 +- docs/disassembler.html | 4 ++-- docs/disassembler.md | 4 ++-- docs/index.html | 2 +- docs/introduction_pdf_syntax.html | 16 ++++++++-------- pdfsyntax/api.py | 4 ++-- pdfsyntax/filters.py | 2 +- pdfsyntax/markdown.py | 4 ++-- 13 files changed, 28 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index f39e74e..a5413f9 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ PDFSyntax is mostly made of simple functions. Example: The Doc object is probably the only dedicated class you will need to handle. It is a black box that stores all the internal states of a document: - content that is cached/memoized from an original file, -- modifications that add/modifiy/delete content and that are tracked as incremental updates. +- modifications that add/modify/delete content and that are tracked as incremental updates. ```Python >>> doc @@ -97,6 +97,6 @@ You then can write the modified PDF to disk. Note that the resulting file contai ## Open-Source, not Open-Contribution yet PDFSyntax is [MIT licensed](https://github.com/desgeeko/pdfsyntax/blob/main/LICENCE) but is currently closed to contributions. -> Personal note: this is a pet projet of mine and my time is limited. First I need to focus on my roadmap (new features and refactoring) and then I will happily accept contributions when everything is a little more stabilised. +> Personal note: this is a pet project of mine and my time is limited. First I need to focus on my roadmap (new features and refactoring) and then I will happily accept contributions when everything is a little more stabilised. diff --git a/docs/README.html b/docs/README.html index b7d3887..921a7aa 100644 --- a/docs/README.html +++ b/docs/README.html @@ -50,7 +50,7 @@

API overview

The Doc object is probably the only dedicated class you will need to handle. It is a black box that stores all the internal states of a document:

>>> doc
 <PDF Doc in revision 1 with 0 modified object(s)>
@@ -72,7 +72,7 @@ 

API overview

Open-Source, not Open-Contribution yet

PDFSyntax is MIT licensed but is currently closed to contributions.

-

Personal note: this is a pet projet of mine and my time is limited. First I need to focus on my roadmap (new features and refactoring) and then I will happily accept contributions when everything is a little more stabilised.

+

Personal note: this is a pet project of mine and my time is limited. First I need to focus on my roadmap (new features and refactoring) and then I will happily accept contributions when everything is a little more stabilised.

diff --git a/docs/api.html b/docs/api.html index 089e657..aa3e424 100644 --- a/docs/api.html +++ b/docs/api.html @@ -48,7 +48,7 @@

Low-level access to object tree

1j is a complex number (!) representing indirect reference 1 0 R. Why? Because the approach is to map PDF object types to Python basic built-in types as much as possible, and it is a concise way to show both the object number (as the imaginary part) and the generation number (as the real part). Moreover the generation is very often equal to zero, so the real part is not shown.You may think of the j as a "jump" to another object :)

get_object gives direct access to indirect objects.

>>> #Access to document catalog, given that the trailer redirects to 1j for root
->>> #(equivalent to catalog fonction)
+>>> #(equivalent to catalog function)
 >>> doc.get_object(1j)
 {'/Pages': 3j, '/Outlines': 2j, '/Type': '/Catalog'}
 
@@ -62,7 +62,7 @@

Pages

>>> #(In this example, nothing is inherited from upper nodes)

The page function goes further by merging inherited attributes with local attributes of each page and giving the result in a list.

-
>>> #Equivalent list with computed page attribues
+
>>> #Equivalent list with computed page attributes
 >>> pdf.pages(doc)
 [{'/Resources': {'/Font': {'/F1': 7j}, '/ProcSet': 6j},
   '/Contents': 5j,
diff --git a/docs/api.md b/docs/api.md
index 7ea07f2..d0ab3f0 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -53,7 +53,7 @@ You may think of the `j` as a "jump" to another object :)
 
 ```Python
 >>> #Access to document catalog, given that the trailer redirects to 1j for root
->>> #(equivalent to catalog fonction)
+>>> #(equivalent to catalog function)
 >>> doc.get_object(1j)
 {'/Pages': 3j, '/Outlines': 2j, '/Type': '/Catalog'}
 ```
@@ -73,7 +73,7 @@ Page index is a tree structure where attributes can be inherited from parent nod
 The `page` function goes further by merging inherited attributes with local attributes of each page and giving the result in a list.
 
 ```Python
->>> #Equivalent list with computed page attribues
+>>> #Equivalent list with computed page attributes
 >>> pdf.pages(doc)
 [{'/Resources': {'/Font': {'/F1': 7j}, '/ProcSet': 6j},
   '/Contents': 5j,
diff --git a/docs/cli.html b/docs/cli.html
index 86aed60..80386de 100644
--- a/docs/cli.html
+++ b/docs/cli.html
@@ -38,7 +38,7 @@ 

fonts

  • Number of pages where it occurs
  • browse

    -

    This command generates HTML output that looks like the raw PDF file with additionnal hyperlinks and information that expose its internal structure and relations between its objects.Redirect the standard output to a file that you can open in your browser:

    +

    This command generates HTML output that looks like the raw PDF file with additional hyperlinks and information that expose its internal structure and relations between its objects.Redirect the standard output to a file that you can open in your browser:

        python3 -m pdfsyntax browse file.pdf > inspection_file.html
     

    Please refer to the Browse article for details.

    diff --git a/docs/cli.md b/docs/cli.md index e0311b1..0d97c31 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -32,7 +32,7 @@ The output shows a list of fonts used in the file, with the following tabular da - Number of pages where it occurs ### `browse` -This command generates HTML output that looks like the raw PDF file with additionnal hyperlinks and information that expose its internal structure and relations between its objects. +This command generates HTML output that looks like the raw PDF file with additional hyperlinks and information that expose its internal structure and relations between its objects. Redirect the standard output to a file that you can open in your browser: python3 -m pdfsyntax browse file.pdf > inspection_file.html diff --git a/docs/disassembler.html b/docs/disassembler.html index eac85f3..606722b 100644 --- a/docs/disassembler.html +++ b/docs/disassembler.html @@ -76,13 +76,13 @@

    Grep examples

    Columns

    -

    Most of the columns are collapsable in order to save horizontal space. For example the position (#2) may have a maximum width of 10 digits but for small files the unecessary leading zeros are removed.

    +

    Most of the columns are collapsible in order to save horizontal space. For example the position (#2) may have a maximum width of 10 digits but for small files the unnecessary leading zeros are removed.

    - + diff --git a/docs/disassembler.md b/docs/disassembler.md index 91f7eeb..85d7b12 100644 --- a/docs/disassembler.md +++ b/docs/disassembler.md @@ -82,11 +82,11 @@ Search all mentions of an indirect object, both in itself and in xref: ### Columns -Most of the columns are collapsable in order to save horizontal space. For example the position (`#2`) may have a maximum width of 10 digits but for small files the unecessary leading zeros are removed. +Most of the columns are collapsible in order to save horizontal space. For example the position (`#2`) may have a maximum width of 10 digits but for small files the unnecessary leading zeros are removed. | Column | Description | |--------|-------------| -| `1` | `+` for a region with absolute positionning, `-` for a detail line (xref, /XRef, /ObjStm)| +| `1` | `+` for a region with absolute positioning, `-` for a detail line (xref, /XRef, /ObjStm)| | `2` | Position, absolute (`+`) or relative (`-`)| | `3` | Size in bytes| | `4` | Percentage compressed size / plain size| diff --git a/docs/index.html b/docs/index.html index d5bb908..83dcee6 100644 --- a/docs/index.html +++ b/docs/index.html @@ -26,7 +26,7 @@

    API

    CLI

    -

    Get quicky some insights about a PDF file from the command line, like the metadata, the fonts used, the text of the document,...

    +

    Get quickly some insights about a PDF file from the command line, like the metadata, the fonts used, the text of the document,...

    diff --git a/docs/introduction_pdf_syntax.html b/docs/introduction_pdf_syntax.html index 7f42b15..6798a59 100644 --- a/docs/introduction_pdf_syntax.html +++ b/docs/introduction_pdf_syntax.html @@ -261,7 +261,7 @@

    Header

    The first line of a PDF file is a %PDF-X.Y header. These numbers indicate the version of the specification the file complies to. - When the numbers are high, "modern" features may be used, but this is not an obligation because of PDF backward compatibily. + When the numbers are high, "modern" features may be used, but this is not an obligation because of PDF backward compatibility. For example, a PDF 1.2 document is also a valid PDF 1.7 document.

    @@ -340,7 +340,7 @@

    Indirect Objects

    - In the previous example, indirect object #7 contains a payload that is a dictionnary defining 5 key-value pairs. + In the previous example, indirect object #7 contains a payload that is a dictionary defining 5 key-value pairs. The following section is here to describe most of the object types.

    @@ -361,17 +361,17 @@

    Object Types

    And there are collection types :

      -
    • Array : an ordered list of atomic objects written bewteen brackets, like [true 800 (ABC) /Something],
    • +
    • Array : an ordered list of atomic objects written between brackets, like [true 800 (ABC) /Something],
    • Dictionary : a map / associative array of unordered key-value pairs; all keys must be names, and the object is enclosed in double angle brackets like << /Key1 (Value1) /Key2 (Value2) >>; - Note that the same separator (for example space or carriage return) may occur bewteen a key and a value and bewteen distinct pairs: + Note that the same separator (for example space or carriage return) may occur between a key and a value and between distinct pairs: a parser needs to keep a context in order to determine if the next token is a key or a value.

    And there is a composite type for content :

      -
    • Stream : a dictionnary immediately followed by a sequence of bytes enclosed bewteen the stream and endstream keywords; +
    • Stream : a dictionary immediately followed by a sequence of bytes enclosed between the stream and endstream keywords; It typically conveys either a sequence of commands that write content on a page or a blob used in a sequence of commands (font file, image).

    @@ -398,7 +398,7 @@

    Filters

    But very often some filter modifies the bytes sequence. A filter may compress the data or encode it, and several may be chained to form a pipeline. - For example a stream dictionnary containing /Filter [/ASCII85Decode /FlateDecode] (besides the mandatory /Length attribute) + For example a stream dictionary containing /Filter [/ASCII85Decode /FlateDecode] (besides the mandatory /Length attribute) should be decoded from ASCII Base85 into binary and then decompressed with the deflate algorithm.

    @@ -413,7 +413,7 @@

    Cross-Reference Stream

  • and the stream content contains a structure specifying the location of indirect objects
  • - This mecanism adds a feature that was not possible with Cross-Reference tables where all objects are accessed with file offset in bytes: + This mechanism adds a feature that was not possible with Cross-Reference tables where all objects are accessed with file offset in bytes: an indirect object may be located inside another indirect object. In that case the terminology says that the container is an Object Stream that contains compressed objects.

    @@ -456,7 +456,7 @@

    Document Structure

    Incremental Updates

    It is possible to build a new revision of a document without writing a whole new file: changes are appended to the original file. Changes consist in new or modified objects, a Cross-reference, and a startxref that points to it. - The Cross-Reference (either its trailer or its stream dictionary) contains a /Prev attribute thats links the new revision to the original Cross-Reference. + The Cross-Reference (either its trailer or its stream dictionary) contains a /Prev attribute that's links the new revision to the original Cross-Reference.

    diff --git a/pdfsyntax/api.py b/pdfsyntax/api.py index 0a879f7..9d9566a 100644 --- a/pdfsyntax/api.py +++ b/pdfsyntax/api.py @@ -273,9 +273,9 @@ def build_text_fragments(page_contents: list, f: list): """List all text fragmemts that are part of a page, with their coordinates. Each list item is another list made of: - - the intial transformation matrix + - the initial transformation matrix - the text - - the final tranformation matrix + - the final transformation matrix """ tfs = [] gs = [] diff --git a/pdfsyntax/filters.py b/pdfsyntax/filters.py index b7fa72b..14f598f 100644 --- a/pdfsyntax/filters.py +++ b/pdfsyntax/filters.py @@ -117,7 +117,7 @@ def encode_stream(stream, stream_def): def asciihex(stream, columns = None): - """ASCIIHex encoder augmented with a beautifier (colums and newlines) for DEBUG ONLY.""" + """ASCIIHex encoder augmented with a beautifier (columns and newlines) for DEBUG ONLY.""" if columns is None: return (binascii.hexlify(stream)).upper() else: diff --git a/pdfsyntax/markdown.py b/pdfsyntax/markdown.py index 07a950a..d16c3e5 100644 --- a/pdfsyntax/markdown.py +++ b/pdfsyntax/markdown.py @@ -257,7 +257,7 @@ def parse_markdown(lines: list, start_pos = 0, start_indent = 0) -> tuple: def tags(string: str) -> str: - """Tranform both style & links.""" + """Transform both style & links.""" return style(link(string)) @@ -269,7 +269,7 @@ def entities(string: str) -> str: def assemble_html(blocks: list, html = '') -> str: - """Recusively build HTML string for parsed markdown.""" + """Recursively build HTML string for parsed markdown.""" for typ, items in blocks: html += f"<{typ.lower()}>" for x in items:
    Column Description
    1 + for a region with absolute positionning, - for a detail line (xref, /XRef, /ObjStm) + for a region with absolute positioning, - for a detail line (xref, /XRef, /ObjStm)
    2 Position, absolute (+) or relative (-)