diff --git a/.github/workflows/comment_resolver.yml b/.github/workflows/comment_resolver.yml new file mode 100644 index 0000000000..24ea8d3c0a --- /dev/null +++ b/.github/workflows/comment_resolver.yml @@ -0,0 +1,38 @@ +name: Deploy Code Generation Function + +on: workflow_dispatch + +jobs: + deploy: + runs-on: ubuntu-latest + + permissions: + contents: read + id-token: write + + steps: + - uses: actions/checkout@v4 + + - name: Google Cloud Auth + id: auth + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_CREDENTIALS }} + + - name: Set up gcloud + uses: google-github-actions/setup-gcloud@v2 + + - name: Deploy to Cloud Functions + working-directory: functions/code_generation + run: | + gcloud functions deploy code-generation \ + --gen2 \ + --trigger-http \ + --allow-unauthenticated \ + --region=us-central1 \ + --timeout=300 \ + --memory=2Gi \ + --runtime=python311 \ + --entry-point=handle_phabricator_webhook \ + --service-account=review-helper@moz-bugbug.iam.gserviceaccount.com \ + --set-secrets=OPENAI_API_KEY=openai-api-key:latest diff --git a/bugbug/tools/comment_resolver.py b/bugbug/tools/comment_resolver.py index 35aef7de6f..31d15ea8b6 100644 --- a/bugbug/tools/comment_resolver.py +++ b/bugbug/tools/comment_resolver.py @@ -1,676 +1,382 @@ -import csv -import json import logging -import re -from types import SimpleNamespace -from langchain_openai import OpenAIEmbeddings -from libmozdata.phabricator import PhabricatorAPI -from qdrant_client import QdrantClient +import requests +from langchain.chains import LLMChain +from langchain.prompts import ( + PromptTemplate, +) -from bugbug.generative_model_tool import GenerativeModelTool -from bugbug.phabricator import fetch_diff_from_url +from bugbug.phabricator import get, set_api_key from bugbug.tools.code_review import PhabricatorReviewData from bugbug.utils import get_secret -from bugbug.vectordb import QdrantVectorDB, VectorPoint review_data = PhabricatorReviewData() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -api = PhabricatorAPI(get_secret("PHABRICATOR_TOKEN")) +PHABRICATOR_API_URL = "https://phabricator.services.mozilla.com/api/" +PHABRICATOR_API_TOKEN = get_secret("PHABRICATOR_TOKEN") -class LocalQdrantVectorDB(QdrantVectorDB): - def __init__(self, collection_name: str, location: str = "http://localhost:6333"): - self.collection_name = collection_name - self.client = QdrantClient(location=location) +class CodeGeneratorTool: + def __init__( + self, + client, + model, + hunk_size, + llm, + ) -> None: + self.client = client + self.model = model + self.hunk_size = hunk_size + self.default_hunk_size = hunk_size + self.llm = llm + self.actionability_prompt_template = PromptTemplate( + input_variables=["comment", "code"], + template="""Given the following code and a reviewer comment, determine if the comment is actionable. +An actionable comment is one that: +- Clearly requests a change in the code. +- Does not require external actions (e.g. filing a bug). +- Is not just pointing something out without asking for changes. +- Is not too vague or unclear to act on. +Respond with only YES or NO. +Comment: +{comment} +Code: +{code} +""", + ) + + self.actionability_chain = LLMChain( + llm=self.llm, + prompt=self.actionability_prompt_template, + ) - def setup(self): - super().setup() + self.generate_fix_prompt_template = PromptTemplate( + input_variables=[ + "comment_start_line", + "comment_end_line", + "filepath", + "comment_content", + "numbered_snippet", + ], + template="""You are an expert Firefox software engineer who must modify a Code Snippet based on a given Code Review Comment. The section of the code that the comment refers to is explicitly marked with `>>> START COMMENT <<<` and `>>> END COMMENT <<<` within the snippet. +Instructions: +- The new code changes must be presented in valid Git diff format. +- Lines added should have a `+` prefix. +- Lines removed should have a `-` prefix. +- Lines that are modified should have two lines, one with `-` and one with `+` prefix. +- Remove the line number prefix and the comment markers in your final diff output. They are only there for your reference. +- You are not limited to modifying only the marked section; make any necessary changes to improve the code according to the review comment. +- If the comment is suggesting to either delete or modify a code comment, settle with deleting it unless more context suggests modification. +- Your response must contain changes—do not return an empty diff. +- If the comment spans a singular line, it is most likely referring to the first line (e.g. line 10 to 11, it is most likely referring to line 10). +- Do NOT repeat the prompt or add any extra text. +- Do NOT call functions that don't exist. +Input Details: +Comment Start Line: {comment_start_line} +Comment End Line: {comment_end_line} +Comment File: {filepath} +Code Review Comment: {comment_content} +Code Snippet (with Inline Comment Markers): +{numbered_snippet} +Example Output Format: +--- a/File.cpp ++++ b/File.cpp +@@ -10,7 +10,7 @@ +- old line ++ new line +Expected Output Format: +Your response must only contain the following, with no extra text: +(diff output here) +""", + ) + self.generate_fix_chain = LLMChain( + llm=self.llm, + prompt=self.generate_fix_prompt_template, + ) - def delete_collection(self): - self.client.delete_collection(self.collection_name) + self.more_context_prompt_template = PromptTemplate( + input_variables=["comment_content", "snippet_preview"], + template="""We have the following Code Review Comment: +{comment_content} +Below is a snippet of code we believe might need changes (short hunk): +{snippet_preview} +Question: With this snippet, can you confidently fix the code review comment, +or do you need a larger snippet for more context? You need to be 100% sure you +have ALL the code necessary to fix the comment. +Answer with strictly either YES I CAN FIX or NO I NEED MORE CONTEXT +""", + ) + self.more_context_chain = LLMChain( + llm=self.llm, + prompt=self.more_context_prompt_template, + ) + self.clarify_comment_prompt_template = PromptTemplate( + input_variables=["raw_comment", "code_snippet"], + template="""You are helping a tool understand a code review comment more precisely. +Here is the raw reviewer comment: +{raw_comment} +Here is the code being reviewed: +{code_snippet} +Rephrase the comment so it can be clearly understood and acted upon by an LLM. +Be specific about what to do in the code (e.g. "change this to that" or "add this here"). Rephrase the reviewer comment so that it's precise and does not overgeneralize. +Output only the rephrased, actionable version of the comment, without any explanation. +""", + ) -class FixCommentDB: - def __init__(self, db: LocalQdrantVectorDB): - self.db = db - self.embeddings = OpenAIEmbeddings( - model="text-embedding-3-large", api_key=get_secret("OPENAI_API_KEY") + self.clarify_comment_chain = LLMChain( + llm=self.llm, + prompt=self.clarify_comment_prompt_template, ) - def line_to_vector_point(self, line: str): - data = json.loads(line) - comment_content = data["comment"]["content"] + def get_comment_transaction_from_revision(self, revision_id, comment_id): + set_api_key(PHABRICATOR_API_URL, PHABRICATOR_API_TOKEN) - embedding = self.embeddings.embed_query(comment_content) + revisions = get(rev_ids=[revision_id]) - vector_point = VectorPoint( - id=data["comment"]["id"], - vector=embedding, - payload={"comment": comment_content, "fix_info": data}, - ) - return vector_point - - def upload_dataset(self, dataset_file: str): - with open(dataset_file, "r") as f: - points = [] - for line in f: - vector_point = self.line_to_vector_point(line) - points.append(vector_point) - self.db.insert(points) - - def search_similar_comments( - self, comment_content: str, revision_id: int, diff_length_limit: int, top_n: int - ): - query_embedding = self.embeddings.embed_query(comment_content) - results = self.db.search(query_embedding) - similar_comments = [] + if not revisions: + return None - for result in results: - if ( - result.payload["fix_info"]["revision_id"] != revision_id - and len(result.payload["fix_info"]["fix_patch_diff"]) - < diff_length_limit - ): - similar_comments.append( - (result.payload["comment"], result.payload["fix_info"]) - ) + for revision in revisions: + for transaction in revision.get("transactions", []): + if transaction["type"] == "inline": + for comment in transaction.get("comments", []): + if comment["id"] == comment_id: + return transaction - if len(similar_comments) >= top_n: - break + def get_changeset_id_for_file(self, diff_id, file_path): + url = f"{PHABRICATOR_API_URL}differential.diff.search" + payload = {"api.token": PHABRICATOR_API_TOKEN, "constraints[ids][0]": diff_id} - return similar_comments if similar_comments else None + response = requests.post(url, data=payload) + data = response.json() + if data.get("error_info"): + raise Exception(f"Error retrieving diff PHID: {data['error_info']}") -class CodeGeneratorTool(GenerativeModelTool): - version = "0.0.1" + results = data.get("result", {}).get("data", []) + if not results: + raise Exception(f"No results found for Diff ID {diff_id}") - def __init__( - self, - llm, - db: FixCommentDB, - ) -> None: - self.db = db - self.llm = llm + diff_phid = results[0]["phid"] - def run(self, prompt: str): - messages = [("system", "You are a code review bot."), ("user", prompt)] - response = self.llm.invoke(messages) - return response.content + url = f"{PHABRICATOR_API_URL}differential.changeset.search" + changesets = [] + after_cursor = None - def generate_fix( + while True: + payload = { + "api.token": PHABRICATOR_API_TOKEN, + "constraints[diffPHIDs][0]": diff_phid, + } + if after_cursor: + payload["after"] = after_cursor + + response = requests.post(url, data=payload) + data = response.json() + + if data.get("error_info"): + raise Exception(f"Error retrieving changesets: {data['error_info']}") + + results = data.get("result", {}).get("data", []) + changesets.extend(results) + + after_cursor = data.get("result", {}).get("cursor", {}).get("after") + if not after_cursor: + break + + for changeset in changesets: + if changeset["fields"]["path"]["displayPath"] == file_path: + return changeset["id"] + + raise Exception(f"File '{file_path}' not found in Diff {diff_id}") + + def fetch_file_content_from_url(self, changeset_id): + url = f"https://phabricator.services.mozilla.com/differential/changeset/?view=new&ref={changeset_id}" + response = requests.get(url) + response.raise_for_status() + return response.text + + def create_numbered_snippet( self, - comment, - relevant_diff, - prompt_type, + comment_start_line, + comment_end_line, + raw_file_content, hunk_size, - similar_comments_and_fix_infos, - evaluation, - generated_fix, ): - if not evaluation: - prompt = generate_prompt( - comment.content, - relevant_diff, - comment.start_line, - comment.end_line, - similar_comments_and_fix_infos, - prompt_type, - hunk_size, - ) - else: - prompt = f""" - Comment: {comment.content} - Diff (before fix): {relevant_diff} - Generated Fix: {generated_fix} + lines = raw_file_content.splitlines() + total_lines = len(lines) - Does the generated fix address the comment correctly? Answer YES or NO, followed by a very short and succinct explanation. It is considered a valid fix if the generated fix CONTAINS a fix for the comment despite having extra unnecessary fluff addressing other stuff. - """ + start_line = max(comment_start_line - hunk_size, 1) + end_line = min(comment_end_line + hunk_size, total_lines) - generated_fix = self.run(prompt=prompt) - return generated_fix + snippet_lines = [] + for i in range(start_line, end_line + 1): + prefix = "" + if i == comment_start_line: + prefix = ">>> START COMMENT <<<\n" + if i == comment_end_line: + snippet_lines.append(f"{prefix}{i} {lines[i - 1]}\n>>> END COMMENT <<<") + continue -class CodeGeneratorEvaluatorTool(GenerativeModelTool): - version = "0.0.1" + snippet_lines.append(f"{prefix}{i} {lines[i - 1]}") - def __init__(self, llm, db) -> None: - self.db = db - self.llm = llm + numbered_snippet = "\n".join(snippet_lines) + return numbered_snippet - def run(self, prompt: str): - messages = [ - ( - "system", - "You are an evaluator of code generation to address review comments.", - ), - ("user", prompt), - ] - response = self.llm.invoke(messages) - return response.content - - def generate_fix(self, comment, relevant_diff, generated_fix): - prompt = f""" - Comment: {comment} - Diff (before fix): {relevant_diff} - Generated Fix: {generated_fix} - - Does the generated fix address the comment correctly? Answer YES or NO, followed by a very short and succinct explanation. It is considered a valid fix if the generated fix CONTAINS a fix for the comment despite having extra unnecessary fluff addressing other stuff. - """ - qualitative_feedback = self.run(prompt=prompt) - return qualitative_feedback - - -def fetch_patch_diff(patch_id): - diffs = api.search_diffs(diff_id=patch_id) - if diffs: - return diffs - else: - logger.error(f"No diffs found for patch ID: {patch_id}") - return None - - -def extract_relevant_diff(patch_diff, filename, start_line, end_line, hunk_size): - file_diff_pattern = rf"diff --git a/{re.escape(filename)} b/{re.escape(filename)}\n.*?(?=\ndiff --git|$)" - match = re.search(file_diff_pattern, patch_diff, re.DOTALL) - - if match: - hunk_header_pattern = r"@@ -(\d+),(\d+) \+(\d+),(\d+) @@" - match2 = re.finditer(hunk_header_pattern, match.group(0)) - first_index = None - last_index = None - - for m in match2: - diff_lines = match.group(0).split("\n") - - deletion_start_line = int(m.group(1)) - deletion_num_lines = int(m.group(2)) - addition_start_line = int(m.group(3)) - addition_num_lines = int(m.group(4)) - - if ( - start_line < deletion_start_line and start_line < addition_start_line - ) or ( - start_line > (deletion_start_line + deletion_num_lines) - and start_line > (addition_start_line + addition_num_lines) - ): - continue + def ask_llm_if_needs_more_context( + self, + comment_content, + snippet_preview, + ): + answer = self.more_context_chain.run( + {"comment_content": comment_content, "snippet_preview": snippet_preview} + ) + return answer + + def clarify_comment(self, raw_comment, snippet_preview): + return self.clarify_comment_chain.run( + { + "raw_comment": raw_comment, + "code_snippet": snippet_preview, + } + ) - added_lines = [] - deleted_lines = [] + def generate_fix( + self, + revision_id, + diff_id, + comment_id, + ): + self.hunk_size = self.default_hunk_size + transaction = self.get_comment_transaction_from_revision( + revision_id, comment_id + ) - for line in diff_lines[diff_lines.index(m.group()) + 1 :]: - if line.startswith("-"): - deleted_lines.append(line) - elif line.startswith("+"): - added_lines.append(line) + filepath = transaction["fields"]["path"] + comment_start_line = transaction["fields"]["line"] + comment_end_line = comment_start_line + transaction["fields"]["length"] - if not deleted_lines or not added_lines: - logger.error(f"No deleted or added lines found for file: {filename}") - return None + for comment in transaction["comments"]: + if comment["id"] == comment_id: + comment_content = comment["content"]["raw"] + break - deletion_start_diff_line = deleted_lines[ - min( - len(deleted_lines) - 1, - max(0, start_line - deletion_start_line - hunk_size), - ) - ] - deletion_end_diff_line = deleted_lines[ - max( - 0, - min( - len(deleted_lines) - 1, - end_line - deletion_start_line + hunk_size, - ), - ) - ] + changeset_id = self.get_changeset_id_for_file(diff_id, filepath) + raw_file_content = self.fetch_file_content_from_url(changeset_id) + step_size = 10 + max_hunk_size = 30 - addition_start_diff_line = added_lines[ - min( - len(added_lines) - 1, - max(0, start_line - addition_start_line - hunk_size), - ) + initial_snippet = "\n".join( + raw_file_content.splitlines()[ + max(0, comment_start_line - 5) : comment_end_line + 5 ] - addition_end_diff_line = added_lines[ - max( - 0, - min( - len(added_lines) - 1, end_line - addition_start_line + hunk_size - ), - ) - ] - - first_index = None - last_index = None - - diff_lines = match.group(0).split("\n") - - for i, line in enumerate(diff_lines): - if line in [ - deletion_start_diff_line, - deletion_end_diff_line, - addition_start_diff_line, - addition_end_diff_line, - ]: - if first_index is None: - first_index = i - last_index = i - - if first_index is not None and last_index is not None: - relevant_diff = "\n".join(diff_lines[first_index : last_index + 1]) - return relevant_diff - else: - logger.error(f"No relevant diff found for lines: {start_line}-{end_line}") - return None - else: - logger.error(f"No diff found for file: {filename}") - return None - - -def get_revision_id_from_patch(patch_id): - diffs = api.search_diffs(diff_id=patch_id) - - if diffs: - revision_phid = diffs[0]["revisionPHID"] - - revision = api.load_revision(rev_phid=revision_phid) - - return revision["id"] - else: - logger.error(f"No diffs found for patch ID: {patch_id}") - return None - - -def generate_prompt( - comment_content, - relevant_diff, - start_line, - end_line, - similar_comments_and_fix_infos, - prompt_type, - hunk_size, -): - if prompt_type == "zero-shot": - prompt = f""" - CONTEXT: - You are a code review bot that generates fixes in code given an inline review comment. - You will be provided with the COMMENT, the LINE NUMBERS the comment is referring to, - and the relevant DIFF for the file affected. Your goal is to generate a code fix based - on the COMMENT, LINE NUMBERS, and DIFF provided, and nothing more. Generate ONLY the - lines you are adding/deleting, indicated by + and -. For example, if you are modifying - a single line, show that you are deleting (-) the line from the original diff and adding - (+) the fixed line. The line numbers help to contextualize the changes within the diff. - ONLY address the comment. Do not make any other changes. - - COMMENT: - "{comment_content}" - - LINE NUMBERS: - {start_line}-{end_line} - - DIFF: - ``` - {relevant_diff} - ``` - - FIX: - """ - if prompt_type == "single-shot": - similar_comment, fix_info = similar_comments_and_fix_infos[0] - - example_initial_diff = fetch_diff_from_url( - fix_info["revision_id"], fix_info["initial_patch_id"], single_patch=True ) - example_relevant_initial_diff = extract_relevant_diff( - example_initial_diff, - fix_info["comment"]["filename"], - fix_info["comment"]["start_line"], - fix_info["comment"]["end_line"], - hunk_size, + actionability = ( + self.actionability_chain.run( + { + "comment": comment_content, + "code": initial_snippet, + } + ) + .strip() + .upper() ) - example_relevant_fix_diff = extract_relevant_diff( - fix_info["fix_patch_diff"], - fix_info["comment"]["filename"], - fix_info["comment"]["start_line"], - fix_info["comment"]["end_line"], - hunk_size, + if actionability != "YES": + logger.info("Comment is not actionable. Skipping.") + return "Not Actionable" + + while self.hunk_size <= max_hunk_size: + lines = raw_file_content.splitlines() + total_lines = len(lines) + snippet_start = max(comment_start_line - self.hunk_size, 1) + snippet_end = min(comment_end_line + self.hunk_size, total_lines) + snippet_preview_lines = lines[snippet_start - 1 : snippet_end] + snippet_preview = "\n".join(snippet_preview_lines) + + answer = self.ask_llm_if_needs_more_context( + comment_content=comment_content, + snippet_preview=snippet_preview, + ).lower() + + if answer == "yes i can fix": + break + elif answer == "no i need more context": + self.hunk_size += step_size + else: + break + + clarified_comment = self.clarify_comment( + raw_comment=comment_content, snippet_preview=snippet_preview ) - prompt = f""" - CONTEXT: - You are a code review bot that generates fixes in code given an inline review comment. - You will be provided with the COMMENT, the LINE NUMBERS the comment is referring to, - and the relevant DIFF for the file affected. Your goal is to generate a code fix based - on the COMMENT, LINE NUMBERS, and DIFF provided, and nothing more. Generate ONLY the - lines you are adding/deleting, indicated by + and -. For example, if you are modifying - a single line, show that you are deleting (-) the line from the original diff and adding - (+) the fixed line. The line numbers help to contextualize the changes within the diff. - An EXAMPLE has been provided for your reference. ONLY address the comment. Do not make - any other changes. - - EXAMPLE: - COMMENT: - "{similar_comment}" - - LINE NUMBERS: - {fix_info["comment"]["start_line"]}-{fix_info["comment"]["end_line"]} - - DIFF: - ``` - {example_relevant_initial_diff} - ``` - - FIX: - ``` - {example_relevant_fix_diff} - ``` - - YOUR TURN: - COMMENT: - "{comment_content}" - - LINE NUMBERS: - {start_line}-{end_line} - - DIFF: - ``` - {relevant_diff} - ``` - - FIX: - """ - if prompt_type == "chain-of-thought": - prompt = f""" - CONTEXT: - You are a code review bot that generates fixes in code based on an inline review comment. - You will be provided with the COMMENT, the LINE NUMBERS the comment is referring to, - and the relevant DIFF for the affected file. Your goal is to carefully analyze the COMMENT, - LINE NUMBERS, and DIFF provided, and generate a code fix accordingly. Only make changes - directly relevant to the feedback. - - THINKING PROCESS: - 1. **Understand the COMMENT**: Carefully read the comment to grasp the reviewer’s intention. - 2. **Locate the Relevant Lines**: Use the provided LINE NUMBERS to pinpoint the exact lines - in the DIFF that need modification. - 3. **Analyze the DIFF**: Review the current state of the code in the DIFF to understand - what is currently implemented. - 4. **Determine Necessary Changes**: Based on the COMMENT, decide what needs to be added, - modified, or removed in the code. Focus on addressing the feedback without introducing - unnecessary changes. - 5. **Generate the FIX**: Output the exact lines you are adding or deleting, using + and - - symbols to indicate modifications. For example, if a line is being modified, show it as - being removed (-) and then the corrected line as being added (+). ONLY address the comment. - Do not make any other changes. - - COMMENT: - "{comment_content}" - - LINE NUMBERS: - {start_line}-{end_line} - - DIFF: - ``` - {relevant_diff} - ``` - - FIX: - """ - - if prompt_type == "multi-shot": - examples = "" - for similar_comment, fix_info in similar_comments_and_fix_infos: - example_initial_diff = fetch_diff_from_url( - fix_info["revision_id"], fix_info["initial_patch_id"], single_patch=True - ) - example_relevant_initial_diff = extract_relevant_diff( - example_initial_diff, - fix_info["comment"]["filename"], - fix_info["comment"]["start_line"], - fix_info["comment"]["end_line"], - hunk_size, - ) - example_relevant_fix_diff = extract_relevant_diff( - fix_info["fix_patch_diff"], - fix_info["comment"]["filename"], - fix_info["comment"]["start_line"], - fix_info["comment"]["end_line"], - hunk_size, - ) - examples += f""" - EXAMPLE: - COMMENT: - "{similar_comment}" - - LINE NUMBERS: - {fix_info["comment"]["start_line"]}-{fix_info["comment"]["end_line"]} - - DIFF: - ``` - {example_relevant_initial_diff} - ``` - - - FIX: - {example_relevant_fix_diff} - """ - - prompt = f""" - CONTEXT: - You are a code review bot that generates fixes in code given an inline review comment. - You will be provided with the COMMENT, the LINE NUMBERS the comment is referring to, - and the relevant DIFF for the file affected. Your goal is to generate a code fix based - on the COMMENT, LINE NUMBERS, and DIFF provided, and nothing more. Generate ONLY the - lines you are adding/deleting, indicated by + and -. For example, if you are modifying - a single line, show that you are deleting (-) the line from the original diff and adding - (+) the fixed line. The line numbers help to contextualize the changes within the diff. - Two EXAMPLES has been provided for your reference. ONLY address the comment. Do not make - any other changes. - - EXAMPLES: - {examples} - - YOUR TURN: - COMMENT: - "{comment_content}" - - LINE NUMBERS: - {start_line}-{end_line} - - DIFF: - ``` - {relevant_diff} - ``` - - FIX: - """ - - return prompt - - -def generate_fixes( - llm_tool, - db, - generation_limit, - diff_length_limits, - prompt_types, - hunk_sizes, - output_csv, -): - counter = 0 - revision_ids = extract_revision_id_list_from_dataset("data/fixed_comments.json") - - with open(output_csv, mode="w", newline="") as file: - writer = csv.writer(file) - writer.writerow( - [ - "Revision ID", - "Patch ID", - "Prompt Type", - "Length Limit", - "Hunk Size", - "Comment Length", - "Generated Code Length", - "File Path", - "Comment", - "Start Line", - "End Line", - "Relevant Diff", - "Generated Fix", - ] + numbered_snippet = self.create_numbered_snippet( + comment_start_line=comment_start_line, + comment_end_line=comment_end_line, + raw_file_content=raw_file_content, + hunk_size=self.hunk_size, ) - for i, (patch_id, comments) in enumerate( - review_data.get_all_inline_comments(lambda c: True) - ): - revision_id = get_revision_id_from_patch(patch_id) + generated_fix = self.generate_fix_chain.run( + { + "comment_start_line": comment_start_line, + "comment_end_line": comment_end_line, + "filepath": filepath, + "comment_content": clarified_comment, + "numbered_snippet": numbered_snippet, + } + ) + return generated_fix - if not revision_id: - logger.error(f"Skipping Patch ID {patch_id} as no revision ID found.") - continue + def generate_fixes_for_all_comments(self, revision_id): + set_api_key(PHABRICATOR_API_URL, PHABRICATOR_API_TOKEN) - if revision_id not in revision_ids: - logger.error( - f"Skipping Patch ID {patch_id} as revision ID {revision_id} not in dataset." - ) - continue + revisions = get(rev_ids=[int(revision_id)]) + if not revisions: + raise Exception(f"No revision found for ID {revision_id}") - diff = fetch_diff_from_url(revision_id, patch_id, single_patch=True) + revision = revisions[0] + latest_diff_id = int(revision["fields"]["diffID"]) + comment_map = {} - if not diff: - logger.error(f"Skipping Patch ID {patch_id} as no diff found.") - continue + reviewer_phids = { + reviewer["reviewerPHID"] + for reviewer in revision.get("attachments", {}) + .get("reviewers", {}) + .get("reviewers", []) + } - for comment in comments: - if counter >= generation_limit: - return + for transaction in revision.get("transactions", []): + if transaction["type"] != "inline": + continue - for hunk_size in hunk_sizes: - if counter >= generation_limit: - break + author_phid = transaction["authorPHID"] + if author_phid not in reviewer_phids: + continue - filename = comment.filename - relevant_diff = extract_relevant_diff( - diff, filename, comment.start_line, comment.end_line, hunk_size + for comment in transaction.get("comments", []): + comment_id = comment["id"] + try: + fix = self.generate_fix( + revision_id=revision_id, + diff_id=latest_diff_id, + comment_id=comment_id, ) - - if relevant_diff: - for prompt_type in prompt_types: - if counter >= generation_limit: - break - - for diff_length_limit in diff_length_limits: - if counter >= generation_limit: - break - - similar_comments_and_fix_infos = ( - db.search_similar_comments( - comment.content, - revision_id, - diff_length_limit, - 2, - ) - ) - - if similar_comments_and_fix_infos is None: - logger.info( - f"No similar comment found for comment: {comment.content}" - ) - continue - - generated_fix = llm_tool.generate_fix( - comment, - relevant_diff, - prompt_type, - hunk_size, - similar_comments_and_fix_infos, - False, - None, - ) - - comment_length = len(comment.content) - generated_code_length = len(generated_fix) - file_path = filename - - writer.writerow( - [ - revision_id, - patch_id, - prompt_type, - diff_length_limit, - hunk_size, - comment_length, - generated_code_length, - file_path, - comment.content, - comment.start_line, - comment.end_line, - relevant_diff, - generated_fix, - ] - ) - - counter += 1 - - else: - print(f"No relevant diff found for Comment ID {comment.id}.\n") - - -def extract_revision_id_list_from_dataset(dataset_file): - revision_ids = [] - - with open(dataset_file, "r") as f: - for line in f: - data = json.loads(line) - revision_ids.append(data["revision_id"]) - - return revision_ids - - -def generate_individual_fix(llm_tool, db, revision_id, diff_id, comment_id): - revision_details = api.load_revision(rev_id=revision_id) - revision_phid = revision_details["phid"] - transactions = api.request("transaction.search", objectIdentifier=revision_phid) - - target_comment = {} - - found = False - - for transaction in transactions["data"]: - if transaction["type"] == "inline": - for comment in transaction["comments"]: - if comment["id"] == comment_id: - target_comment["filepath"] = transaction["fields"]["path"] - target_comment["content"] = comment["content"] - target_comment["start_line"] = transaction["fields"]["line"] - target_comment["end_line"] = ( - transaction["fields"]["line"] + transaction["fields"]["length"] + is_actionable = fix != "Not Actionable" + comment_map[comment_id] = { + "fix": fix, + "is_actionable": is_actionable, + } + except Exception as e: + logger.warning( + f"Error generating fix for comment {comment_id}: {e}" ) - found = True - break - if found: - break - - target_comment = SimpleNamespace(**target_comment) - - diff = fetch_diff_from_url(revision_id, diff_id, single_patch=True) - relevant_diff = extract_relevant_diff( - diff, - target_comment.filepath, - target_comment.start_line, - target_comment.end_line, - hunk_size=100, - ) - - generated_fix = llm_tool.generate_fix( - target_comment, - relevant_diff, - prompt_type="zero-shot", - hunk_size=100, - similar_comments_and_fix_infos=None, - evaluation=False, - generated_fix=None, - ) - - print(generated_fix) + comment_map[comment_id] = { + "fix": f"Error: {e}", + "is_actionable": False, + } + + return comment_map diff --git a/functions/comment_resolver/main.py b/functions/comment_resolver/main.py new file mode 100644 index 0000000000..05c27164d9 --- /dev/null +++ b/functions/comment_resolver/main.py @@ -0,0 +1,43 @@ +import logging + +import flask +import functions_framework + +from bugbug.generative_model_tool import create_openai_llm +from bugbug.tools.comment_resolver import CodeGeneratorTool + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@functions_framework.http +def handle_code_generation(request: flask.Request): + revision_id = None + + if request.method == "GET": + revision_id = request.args.get("revisionID") + elif request.method == "POST": + data = request.get_json() + if not data: + return "Invalid JSON payload", 400 + revision_id = data.get("revisionID") + else: + return "Only GET and POST requests are allowed", 405 + + if not revision_id: + return "Missing revisionID", 400 + + try: + llm = create_openai_llm() + codegen = CodeGeneratorTool( + client=None, + model="gpt-4", + hunk_size=10, + llm=llm, + ) + result = codegen.generate_fixes_for_all_comments(int(revision_id)) + return result, 200 + + except Exception as e: + logger.exception("Error processing request") + return {"error": str(e)}, 500 diff --git a/functions/comment_resolver/requirements.txt b/functions/comment_resolver/requirements.txt new file mode 100644 index 0000000000..cbc45fa128 --- /dev/null +++ b/functions/comment_resolver/requirements.txt @@ -0,0 +1,3 @@ +bugbug==0.0.573 +Flask==2.2.5 +functions-framework==3.5.0 diff --git a/scripts/comment_resolver_evaluator.py b/scripts/comment_resolver_evaluator.py deleted file mode 100644 index a47109c51f..0000000000 --- a/scripts/comment_resolver_evaluator.py +++ /dev/null @@ -1,139 +0,0 @@ -import argparse -import csv -import json -import logging -import sys - -from dotenv import load_dotenv - -from bugbug.generative_model_tool import create_llm_from_args -from bugbug.tools.comment_resolver import ( - CodeGeneratorEvaluatorTool, - FixCommentDB, - LocalQdrantVectorDB, -) - - -def find_fix_in_dataset(revision_id, initial_patch_id, dataset_file): - with open(dataset_file, "r") as f: - for line in f: - data = json.loads(line) - if data["revision_id"] == int(revision_id) and data[ - "initial_patch_id" - ] == int(initial_patch_id): - return data["fix_patch_diff"] - return None - - -def calculate_metrics(reference_fix, generated_fix): - reference_tokens = reference_fix.split() - generated_tokens = generated_fix.split() - - common_tokens = set(reference_tokens) & set(generated_tokens) - precision = len(common_tokens) / len(generated_tokens) if generated_tokens else 0 - recall = len(common_tokens) / len(reference_tokens) if reference_tokens else 0 - f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) else 0 - - return {"precision": precision, "recall": recall, "f1": f1} - - -def compare_fixes(revision_id, initial_patch_id, generated_fix, reference_fix): - if reference_fix: - metrics = calculate_metrics(reference_fix, generated_fix) - return metrics - else: - logging.info( - f"No matching fix found in the dataset for Revision {revision_id} and Patch {initial_patch_id}." - ) - return None - - -def conduct_evaluation(input_csv, output_csv, llm_tool): - with ( - open(input_csv, "r") as infile, - open(output_csv, mode="w", newline="") as outfile, - ): - reader = csv.DictReader(infile) - - fieldnames = reader.fieldnames + [ - "Reference Fix", - "Precision", - "Recall", - "F1", - "Qualitative Feedback", - ] - writer = csv.DictWriter(outfile, fieldnames=fieldnames) - writer.writeheader() - - for row in reader: - revision_id = row["Revision ID"] - initial_patch_id = row["Patch ID"] - generated_fix = row["Generated Fix"] - comment = row["Comment"] - relevant_diff = row["Relevant Diff"] - - reference_fix = find_fix_in_dataset( - revision_id=revision_id, - initial_patch_id=initial_patch_id, - dataset_file="data/fixed_comments.json", - ) - - metrics = compare_fixes( - revision_id=revision_id, - initial_patch_id=initial_patch_id, - generated_fix=generated_fix, - reference_fix=reference_fix, - ) - - qualitative_feedback = llm_tool.generate_fix( - comment, relevant_diff, generated_fix - ) - - if metrics is not None: - writer.writerow( - { - **row, - "Reference Fix": reference_fix, - "Precision": metrics["precision"], - "Recall": metrics["recall"], - "F1": metrics["f1"], - "Qualitative Feedback": qualitative_feedback, - } - ) - - -def run(args) -> None: - load_dotenv() - logging.basicConfig(level=logging.INFO) - - db = FixCommentDB(LocalQdrantVectorDB(collection_name="fix_comments")) - llm = create_llm_from_args(args) - llm_tool = CodeGeneratorEvaluatorTool(llm=llm, db=db) - - input_csv = args.input_csv - output_csv = args.output_csv - - conduct_evaluation(input_csv, output_csv, llm_tool) - - -def parse_args(args): - parser = argparse.ArgumentParser() - parser.add_argument("--llm", help="LLM", choices=["openai"], default="openai") - parser.add_argument( - "--input-csv", - type=str, - default="code_generations.csv", - help="Input CSV file from the generation script.", - ) - parser.add_argument( - "--output-csv", - type=str, - default="evaluated_code_generations.csv", - help="Output CSV file for results.", - ) - return parser.parse_args(args) - - -if __name__ == "__main__": - args = parse_args(sys.argv[1:]) - run(args) diff --git a/scripts/comment_resolver_runner.py b/scripts/comment_resolver_runner.py index 0639377fc9..033c0c0324 100644 --- a/scripts/comment_resolver_runner.py +++ b/scripts/comment_resolver_runner.py @@ -1,129 +1,33 @@ import argparse -import logging import sys -from dotenv import load_dotenv - -import bugbug.db as db -import bugbug.phabricator as phabricator -from bugbug.generative_model_tool import create_llm_from_args -from bugbug.tools.comment_resolver import ( - CodeGeneratorTool, - FixCommentDB, - LocalQdrantVectorDB, - generate_fixes, - generate_individual_fix, -) +from bugbug.generative_model_tool import create_openai_llm +from bugbug.tools.comment_resolver import CodeGeneratorTool def run(args) -> None: - load_dotenv() - - logging.basicConfig(level=logging.INFO) - - db = FixCommentDB(LocalQdrantVectorDB(collection_name="fix_comments")) - - if args.create_db: - db.db.delete_collection() - db.db.setup() - db.upload_dataset(args.dataset_file) - - llm = create_llm_from_args(args) - llm_tool = CodeGeneratorTool(llm=llm, db=db) - - if args.revision_id and args.diff_id and args.comment_id: - pass - # TODO: Create this function - generate_individual_fix( - llm_tool=llm_tool, - db=db, - revision_id=args.revision_id, - diff_id=args.diff_id, - comment_id=args.comment_id, - ) - else: - generate_fixes( - llm_tool=llm_tool, - db=db, - generation_limit=args.generation_limit, - prompt_types=args.prompt_types, - hunk_sizes=args.hunk_sizes, - diff_length_limits=args.diff_length_limits, - output_csv=args.output_csv, - ) + llm = create_openai_llm() + codegen = CodeGeneratorTool( + client=None, + model="gpt-4", + hunk_size=10, + llm=llm, + ) + result = codegen.generate_fixes_for_all_comments(int(args.revision_id)) + print(result) def parse_args(args): parser = argparse.ArgumentParser() parser.add_argument( - "--llm", - help="LLM", - choices=["openai"], - default="openai", - ) - parser.add_argument( - "--create-db", - action="store_true", - help="If set, the local Qdrant database will be created and populated.", - ) - parser.add_argument( - "--dataset-file", - type=str, - default="data/fixed_comments.json", - help="Dataset file to upload as Qdrant database.", - ) - parser.add_argument( - "--output-csv", - type=str, - default="metrics_results.csv", - help="Output CSV file for results.", - ) - parser.add_argument( - "--prompt-types", - nargs="+", - default=["zero-shot"], - help="Types of prompts to use.", - ) - parser.add_argument( - "--diff-length-limits", - nargs="+", + "--revision_id", type=int, - default=[1000], - help="Diff length limits to enforce when searching for examples.", + required=True, + help="The revision ID to process.", ) - parser.add_argument( - "--hunk-sizes", - nargs="+", - type=int, - default=[20], - help="Hunk sizes to enforce when searching for examples.", - ) - parser.add_argument( - "--generation-limit", - type=int, - default=100, - help="Maximum number of generations.", - ) - parser.add_argument( - "--revision-id", - type=int, - help="Revision ID for individual fix generation.", - ) - parser.add_argument( - "--diff-id", - type=int, - help="Diff ID for individual fix generation.", - ) - parser.add_argument( - "--comment-id", - type=int, - help="Comment ID for individual fix generation.", - ) - return parser.parse_args(args) if __name__ == "__main__": - db.download(phabricator.FIXED_COMMENTS_DB) args = parse_args(sys.argv[1:]) run(args)