Skip to content

Commit ecffddc

Browse files
committed
fix(cli): preserve spaces and newlines in treesitter chunking.
1 parent 7bfcedc commit ecffddc

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

src/vectorcode/chunking.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def __chunk_node(
150150
f"Traversing at node {node.text.decode()} at position {node.byte_range}"
151151
)
152152
current_chunk: str = ""
153-
153+
prev_node = None
154154
current_start = None
155155

156156
for child in node.children:
@@ -184,10 +184,17 @@ def __chunk_node(
184184
current_start = Point(
185185
row=child.start_point.row + 1, column=child.start_point.column
186186
)
187+
prev_node = child
187188

188189
elif len(current_chunk) + child_length <= self.config.chunk_size:
189190
# Add to current chunk
191+
if prev_node:
192+
if prev_node.end_point.row != child.start_point.row:
193+
current_chunk += "\n"
194+
else:
195+
current_chunk += " "
190196
current_chunk += child_bytes.decode()
197+
prev_node = child
191198

192199
else:
193200
# Yield current chunk and start new one

0 commit comments

Comments
 (0)