Skip to content

[FIX] Fixed issue where summary context not being used properly #1260

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 23, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions backend/sample.env
Original file line number Diff line number Diff line change
@@ -75,9 +75,9 @@ PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data

# Structure Tool Image (Runs prompt studio exported tools)
# https://hub.docker.com/r/unstract/tool-structure
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.75"
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.76"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.75"
STRUCTURE_TOOL_IMAGE_TAG="0.0.76"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
Original file line number Diff line number Diff line change
@@ -86,7 +86,7 @@ def update_exec_metadata(
metadata = {X2TextConstants.WHISPER_HASH: whisper_hash_value}
for key, value in metadata.items():
tool_exec_metadata[key] = value
metadata_path = str(Path(execution_run_data_folder / IKeys.METADATA_FILE))
metadata_path = str(Path(execution_run_data_folder) / IKeys.METADATA_FILE)
ToolUtils.dump_json(
file_to_dump=metadata_path,
json_to_dump=metadata,
2 changes: 1 addition & 1 deletion tools/structure/src/config/properties.json
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Structure Tool",
"functionName": "structure_tool",
"toolVersion": "0.0.75",
"toolVersion": "0.0.76",
"description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
"input": {
"description": "File that needs to be indexed and parsed for answers"
23 changes: 13 additions & 10 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
@@ -131,14 +131,15 @@ def run(

if tool_settings[SettingsKeys.ENABLE_SINGLE_PASS_EXTRACTION]:
if summarize_as_source:
summarize_file_hash = self._summarize_and_index(
summarize_file_path, summarize_file_hash = self._summarize_and_index(
tool_settings=tool_settings,
tool_data_dir=tool_data_dir,
responder=responder,
outputs=outputs,
usage_kwargs=usage_kwargs,
)
payload[SettingsKeys.FILE_HASH] = summarize_file_hash
payload[SettingsKeys.FILE_PATH] = summarize_file_path
self.stream_log("Fetching response for single pass extraction")
# Since indexing is not involved for single pass
index_metrics = {"time_taken(s)": 0}
@@ -152,15 +153,18 @@ def run(
reindex = True
for output in outputs:
if summarize_as_source:
summarize_file_hash = self._summarize_and_index(
tool_settings=tool_settings,
tool_data_dir=tool_data_dir,
responder=responder,
outputs=outputs,
usage_kwargs=usage_kwargs,
summarize_file_path, summarize_file_hash = (
self._summarize_and_index(
tool_settings=tool_settings,
tool_data_dir=tool_data_dir,
responder=responder,
outputs=outputs,
usage_kwargs=usage_kwargs,
)
)
payload[SettingsKeys.OUTPUTS] = outputs
payload[SettingsKeys.FILE_HASH] = summarize_file_hash
payload[SettingsKeys.FILE_PATH] = summarize_file_path
# Since indexing is not involved for summary
index_metrics[output[SettingsKeys.NAME]] = {"time_taken(s)": 0}
break
@@ -282,7 +286,7 @@ def _summarize_and_index(
responder: PromptTool,
outputs: dict[str, Any],
usage_kwargs: dict[Any, Any] = {},
) -> str:
) -> tuple[str, str]:
"""Summarizes the context of the file and indexes the summarized
content.

@@ -345,11 +349,10 @@ def _summarize_and_index(
path=summarize_file_path, mode="w", data=summarized_context
)

self.stream_log("Indexing summarized context")
summarize_file_hash: str = self.workflow_filestorage.get_hash_from_file(
path=summarize_file_path
)
return summarize_file_hash
return str(summarize_file_path), summarize_file_hash


if __name__ == "__main__":