from tiktoken import encoding_for_model
encoding = encoding_for_model("gpt-4o")
chunk_size = 200
chunks = []
current_chunk = ""
for component in components:
if hasattr(component, 'text') and component.text:
# Check if adding this text would exceed chunk_size
token_count = len(encoding.encode(current_chunk + component.text))
if token_count > chunk_size:
# Save current chunk and start new one
if current_chunk:
chunks.append(current_chunk)
current_chunk = component.text
else:
current_chunk += component.text
if current_chunk:
chunks.append(current_chunk)
print(f"Created {len(chunks)} chunks")