From e389d820cd0e71bbe95b0844dc9f91a5fc3273ac Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 09:15:01 +0000 Subject: [PATCH 1/5] Patched patchwork/common/client/llm/openai_.py --- patchwork/common/client/llm/openai_.py | 52 +++++++------------------- 1 file changed, 14 insertions(+), 38 deletions(-) diff --git a/patchwork/common/client/llm/openai_.py b/patchwork/common/client/llm/openai_.py index 0d4aa725a..a232baaf6 100644 --- a/patchwork/common/client/llm/openai_.py +++ b/patchwork/common/client/llm/openai_.py @@ -110,46 +110,22 @@ def is_model_supported(self, model: str) -> bool: return model in _cached_list_models_from_openai(self.__api_key) def __get_model_limits(self, model: str) -> int: + """Return the token limit for a given model.""" return self.__MODEL_LIMITS.get(model, 128_000) + + def get_model_limit(self, model: str) -> int: + """ + Public method to get the model's context length limit. + + Args: + model: The model name + + Returns: + The maximum context length in tokens + """ + return self.__get_model_limits(model) + - def is_prompt_supported( - self, - messages: Iterable[ChatCompletionMessageParam], - model: str, - frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, - logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, - logprobs: Optional[bool] | NotGiven = NOT_GIVEN, - max_tokens: Optional[int] | NotGiven = NOT_GIVEN, - n: Optional[int] | NotGiven = NOT_GIVEN, - presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, - response_format: dict | completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, - temperature: Optional[float] | NotGiven = NOT_GIVEN, - tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN, - tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, - top_logprobs: Optional[int] | NotGiven = NOT_GIVEN, - top_p: Optional[float] | NotGiven = NOT_GIVEN, - file: Path | NotGiven = NOT_GIVEN, - ) -> int: - # might not implement model endpoint - if self.__is_not_openai_url(): - return 1 - - model_limit = self.__get_model_limits(model) - token_count = 0 - encoding = None - try: - encoding = tiktoken.encoding_for_model(model) - except Exception as e: - logger.error(f"Error getting encoding for model {model}: {e}, using gpt-4o as fallback") - encoding = tiktoken.encoding_for_model("gpt-4o") - for message in messages: - message_token_count = len(encoding.encode(message.get("content"))) - token_count = token_count + message_token_count - if token_count > model_limit: - return -1 - - return model_limit - token_count def truncate_messages( self, messages: Iterable[ChatCompletionMessageParam], model: str From 5b501d56997a090ffb969a05758858c4b5e5bd81 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 09:15:01 +0000 Subject: [PATCH 2/5] Patched patchwork/common/client/llm/aio.py --- patchwork/common/client/llm/aio.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/patchwork/common/client/llm/aio.py b/patchwork/common/client/llm/aio.py index c25ae7414..6a0f70335 100644 --- a/patchwork/common/client/llm/aio.py +++ b/patchwork/common/client/llm/aio.py @@ -99,6 +99,23 @@ def system(self) -> str: def is_model_supported(self, model: str) -> bool: return any(client.is_model_supported(model) for client in self.__clients) + + def get_model_limit(self, model: str) -> int: + """ + Get the model's context length limit from the appropriate client. + + Args: + model: The model name + + Returns: + The maximum context length in tokens, or a default value if not found + """ + for client in self.__clients: + if client.is_model_supported(model) and hasattr(client, 'get_model_limit'): + return client.get_model_limit(model) + + # Default value if no client found or client doesn't have the method + return 128_000 def is_prompt_supported( self, @@ -119,6 +136,13 @@ def is_prompt_supported( top_p: Optional[float] | NotGiven = NOT_GIVEN, file: Path | NotGiven = NOT_GIVEN, ) -> int: + """ + Check if the prompt is supported by the model and return available tokens. + + Returns: + int: If > 0, represents available tokens remaining after prompt. + If <= 0, indicates that prompt is too large. + """ for client in self.__clients: if client.is_model_supported(model): inputs = dict( From a291de2f456e59b83f4138d63247b1b06e197678 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 09:15:01 +0000 Subject: [PATCH 3/5] Patched patchwork/steps/CallLLM/CallLLM.py --- patchwork/steps/CallLLM/CallLLM.py | 61 +++++++++++------------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/patchwork/steps/CallLLM/CallLLM.py b/patchwork/steps/CallLLM/CallLLM.py index def62b780..129c9f34d 100644 --- a/patchwork/steps/CallLLM/CallLLM.py +++ b/patchwork/steps/CallLLM/CallLLM.py @@ -26,43 +26,7 @@ class _InnerCallLLMResponse: class CallLLM(Step, input_class=CallLLMInputs, output_class=CallLLMOutputs): - def __init__(self, inputs: dict): - super().__init__(inputs) - # Set 'openai_key' from inputs or environment if not already set - inputs.setdefault("openai_api_key", os.environ.get("OPENAI_API_KEY")) - - prompt_file = inputs.get("prompt_file") - if prompt_file is not None: - prompt_file_path = Path(prompt_file) - if not prompt_file_path.is_file(): - raise ValueError(f'Unable to find Prompt file: "{prompt_file}"') - try: - with open(prompt_file_path, "r") as fp: - self.prompts = json.load(fp) - except json.JSONDecodeError as e: - raise ValueError(f'Invalid Json Prompt file "{prompt_file}": {e}') - elif "prompts" in inputs.keys(): - self.prompts = inputs["prompts"] - else: - raise ValueError('Missing required data: "prompt_file" or "prompts"') - - self.call_limit = int(inputs.get("max_llm_calls", -1)) - self.model_args = {key[len("model_") :]: value for key, value in inputs.items() if key.startswith("model_")} - self.save_responses_to_file = inputs.get("save_responses_to_file", None) - self.model = inputs.get("model", "gpt-4o-mini") - self.allow_truncated = inputs.get("allow_truncated", False) - self.file = inputs.get("file", None) - self.client = AioLlmClient.create_aio_client(inputs) - if self.client is None: - raise ValueError( - f"Model API key not found.\n" - f'Please login at: "{TOKEN_URL}",\n' - "Please go to the Integration's tab and generate an API key.\n" - "Please copy the access token that is generated, " - "and add `--patched_api_key=` to the command line.\n" - "\n" - "If you are using an OpenAI API Key, please set `--openai_api_key=`.\n" - ) + def __persist_to_file(self, contents): # Convert relative path to absolute path @@ -121,10 +85,22 @@ def __call(self, prompts: list[list[dict]]) -> list[_InnerCallLLMResponse]: kwargs["file"] = Path(self.file) for prompt in prompts: - is_input_accepted = self.client.is_prompt_supported(model=self.model, messages=prompt, **kwargs) > 0 + available_tokens = self.client.is_prompt_supported(model=self.model, messages=prompt, **kwargs) + is_input_accepted = available_tokens > 0 + if not is_input_accepted: self.set_status(StepStatus.WARNING, "Input token limit exceeded.") prompt = self.client.truncate_messages(prompt, self.model) + + # Handle the case where model_max_tokens was set to -1 + # Calculate max_tokens based on available tokens from the model after prompt + if hasattr(self, '_use_max_tokens') and self._use_max_tokens: + if available_tokens > 0: + kwargs['max_tokens'] = available_tokens + logger.info(f"Setting max_tokens to {available_tokens} based on available model context") + else: + # If we can't determine available tokens, set a reasonable default + logger.warning("Could not determine available tokens. Using model default.") logger.trace(f"Message sent: \n{escape(indent(pformat(prompt), ' '))}") try: @@ -184,4 +160,13 @@ def __parse_model_args(self) -> dict: else: new_model_args[key] = arg + # Handle special case for max_tokens = -1 (use maximum available tokens) + if 'max_tokens' in new_model_args and new_model_args['max_tokens'] == -1: + # Will be handled during the chat completion call + logger.info("Using maximum available tokens for the model") + del new_model_args['max_tokens'] # Remove it for now, we'll calculate it later + self._use_max_tokens = True + else: + self._use_max_tokens = False + return new_model_args From 48e8f73299f2e8c7a32bc0bb2cfb5f84934a7fc1 Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 09:15:01 +0000 Subject: [PATCH 4/5] Patched patchwork/steps/CallLLM/typed.py --- patchwork/steps/CallLLM/typed.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/patchwork/steps/CallLLM/typed.py b/patchwork/steps/CallLLM/typed.py index f3a051f94..c9961f41f 100644 --- a/patchwork/steps/CallLLM/typed.py +++ b/patchwork/steps/CallLLM/typed.py @@ -1,4 +1,4 @@ -from typing_extensions import Annotated, Dict, List, TypedDict +from typing_extensions import Annotated, Dict, List, TypedDict, Literal from patchwork.common.constants import TOKEN_URL from patchwork.common.utils.step_typing import StepTypeConfig @@ -12,6 +12,13 @@ class CallLLMInputs(TypedDict, total=False): allow_truncated: Annotated[bool, StepTypeConfig(is_config=True)] model_args: Annotated[str, StepTypeConfig(is_config=True)] client_args: Annotated[str, StepTypeConfig(is_config=True)] + model_max_tokens: Annotated[ + int | Literal[-1], + StepTypeConfig( + is_config=True, + description="Maximum number of tokens to generate. Use -1 to use maximum available tokens for the model." + ) + ] openai_api_key: Annotated[ str, StepTypeConfig( From 9e63279161173f467d8017f566e6c644e5a838bb Mon Sep 17 00:00:00 2001 From: "patched.codes[bot]" <298395+patched.codes[bot]@users.noreply.github.com> Date: Mon, 14 Apr 2025 09:15:01 +0000 Subject: [PATCH 5/5] Patched patchwork/patchflows/GenerateREADME/defaults.yml --- patchwork/patchflows/GenerateREADME/defaults.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patchwork/patchflows/GenerateREADME/defaults.yml b/patchwork/patchflows/GenerateREADME/defaults.yml index 4facc575b..0a10f55d6 100644 --- a/patchwork/patchflows/GenerateREADME/defaults.yml +++ b/patchwork/patchflows/GenerateREADME/defaults.yml @@ -11,7 +11,7 @@ # model: codellama/CodeLlama-70b-Instruct-hf # model_temperature: 0.2 # model_top_p: 0.95 -# model_max_tokens: 2000 +# model_max_tokens: 2000 # Use -1 to automatically use the maximum tokens available for the model # CommitChanges Inputs disable_branch: false