From fafe6c03ce91b13833b6950800b123252636dfe4 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Wed, 30 Apr 2025 08:05:22 +0000 Subject: [PATCH 1/7] Fix ingestion pagination for jira / zendesk --- .../backend/core/requester.py | 53 ++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/gurubase-backend/backend/core/requester.py b/src/gurubase-backend/backend/core/requester.py index 2f8097ca..5015a3a4 100644 --- a/src/gurubase-backend/backend/core/requester.py +++ b/src/gurubase-backend/backend/core/requester.py @@ -947,7 +947,7 @@ def list_issues(self, jql_query, start=0, max_results=50): List Jira issues using JQL query with pagination Args: jql_query (str): JQL query string to filter issues - start (int): Starting index for pagination + start (int): Starting index for pagination (unused, kept for compatibility) max_results (int): Maximum number of results to fetch per request Returns: list: List of Jira issues matching the query @@ -955,24 +955,39 @@ def list_issues(self, jql_query, start=0, max_results=50): ValueError: If API request fails """ try: - # Get issues using JQL - issues_data = self.jira.jql(jql_query, start=start, limit=max_results) - issues = [] + all_issues = [] + current_start = 0 + page_size = max_results - for issue in issues_data.get('issues', []): - formatted_issue = { - 'id': issue.get('id'), - # 'key': issue.get('key'), - # 'summary': issue.get('fields', {}).get('summary'), - # 'issue_type': issue.get('fields', {}).get('issuetype', {}).get('name'), - # 'status': issue.get('fields', {}).get('status', {}).get('name'), - # 'priority': issue.get('fields', {}).get('priority', {}).get('name'), - # 'assignee': issue.get('fields', {}).get('assignee', {}).get('displayName'), - 'link': f"{self.url}/browse/{issue.get('key')}" - } - issues.append(formatted_issue) + while True: + # Get issues using JQL + issues_data = self.jira.jql(jql_query, start=current_start, limit=page_size) + issues = issues_data.get('issues', []) + + if not issues: + break + + for issue in issues: + formatted_issue = { + 'id': issue.get('id'), + # 'key': issue.get('key'), + # 'summary': issue.get('fields', {}).get('summary'), + # 'issue_type': issue.get('fields', {}).get('issuetype', {}).get('name'), + # 'status': issue.get('fields', {}).get('status', {}).get('name'), + # 'priority': issue.get('fields', {}).get('priority', {}).get('name'), + # 'assignee': issue.get('fields', {}).get('assignee', {}).get('displayName'), + 'link': f"{self.url}/browse/{issue.get('key')}" + } + all_issues.append(formatted_issue) + + # If we got fewer issues than requested, we've reached the end + if len(issues) < page_size: + break + + # Move to the next page + current_start += page_size - return issues + return all_issues except Exception as e: logger.error(f"Error listing Jira issues: {str(e)}", exc_info=True) if "401" in str(e): @@ -1296,7 +1311,7 @@ def list_articles(self, batch_size=100): if article.get('draft') is False: all_articles.append(self._format_article(article)) - url = data.get('next_page') + url = data.get('links', {}).get('next') return all_articles except requests.exceptions.RequestException as e: status_code = e.response.status_code if e.response is not None else None @@ -1384,7 +1399,7 @@ def _get_article_comments(self, article_id, batch_size=100): comments = data.get('comments', []) all_comments.extend([self._format_article_comment(comment) for comment in comments]) - url = data.get('next_page') + url = data.get('links', {}).get('next') all_comments.sort(key=lambda x: x['created_at']) return all_comments except requests.exceptions.RequestException as e: From b58103e314b96ed64dac43c5d862562cf13a339c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aral=20Yekta=20Yar=C4=B1mca?= <44121565+aralyekta@users.noreply.github.com> Date: Wed, 30 Apr 2025 01:15:18 -0700 Subject: [PATCH 2/7] Update requester.py Reduce confluence page size to 25 --- src/gurubase-backend/backend/core/requester.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gurubase-backend/backend/core/requester.py b/src/gurubase-backend/backend/core/requester.py index 5015a3a4..0ab5d903 100644 --- a/src/gurubase-backend/backend/core/requester.py +++ b/src/gurubase-backend/backend/core/requester.py @@ -1975,7 +1975,7 @@ def list_pages(self, cql=None): try: # Use pagination to get all pages from the space page_start = 0 - page_limit = 100 # Fetch 100 pages at a time + page_limit = 25 # Fetch 25 pages at a time while True: pages_batch = self.confluence.get_all_pages_from_space( space_key, @@ -2007,7 +2007,7 @@ def list_pages(self, cql=None): # Implement pagination for the CQL query as well cql_page_ids = set() cql_start = 0 - cql_limit = 100 # Fetch 100 results at a time + cql_limit = 25 # Fetch 100 results at a time while True: cql_results = self.confluence.cql(cql, start=cql_start, limit=cql_limit) @@ -2104,7 +2104,7 @@ def get_page_comments(self, page_id, start=0, limit=50): # Use pagination to get all comments page_start = 0 - page_limit = 100 # Fetch 100 comments at a time + page_limit = 25 # Fetch 100 comments at a time while True: try: # The Confluence API doesn't support direct pagination for comments @@ -2178,7 +2178,7 @@ def list_spaces(self, start=0, limit=50): # Use pagination to get all spaces space_start = 0 - space_limit = 100 # Fetch 100 spaces at a time + space_limit = 25 # Fetch 100 spaces at a time while True: spaces_data = self.confluence.get_all_spaces(start=space_start, limit=space_limit) results = spaces_data.get('results', []) From 92c812c0d4e0ac94079b4afecb8b2ea264fb28fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aral=20Yekta=20Yar=C4=B1mca?= <44121565+aralyekta@users.noreply.github.com> Date: Wed, 30 Apr 2025 01:16:45 -0700 Subject: [PATCH 3/7] Update requester.py Update comments --- src/gurubase-backend/backend/core/requester.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gurubase-backend/backend/core/requester.py b/src/gurubase-backend/backend/core/requester.py index 0ab5d903..5bf16575 100644 --- a/src/gurubase-backend/backend/core/requester.py +++ b/src/gurubase-backend/backend/core/requester.py @@ -2007,7 +2007,7 @@ def list_pages(self, cql=None): # Implement pagination for the CQL query as well cql_page_ids = set() cql_start = 0 - cql_limit = 25 # Fetch 100 results at a time + cql_limit = 25 # Fetch 25 results at a time while True: cql_results = self.confluence.cql(cql, start=cql_start, limit=cql_limit) @@ -2104,7 +2104,7 @@ def get_page_comments(self, page_id, start=0, limit=50): # Use pagination to get all comments page_start = 0 - page_limit = 25 # Fetch 100 comments at a time + page_limit = 25 # Fetch 25 comments at a time while True: try: # The Confluence API doesn't support direct pagination for comments @@ -2178,7 +2178,7 @@ def list_spaces(self, start=0, limit=50): # Use pagination to get all spaces space_start = 0 - space_limit = 25 # Fetch 100 spaces at a time + space_limit = 25 # Fetch 25 spaces at a time while True: spaces_data = self.confluence.get_all_spaces(start=space_start, limit=space_limit) results = spaces_data.get('results', []) From 234a0799f31c585a0bef22a36d422f93695d08e2 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Wed, 30 Apr 2025 12:10:56 +0000 Subject: [PATCH 4/7] Add scripts to populate ingestion sources --- .../scripts/onetime/populate_confluence.py | 104 +++++++++ .../backend/scripts/onetime/populate_jira.py | 112 ++++++++++ .../scripts/onetime/populate_zendesk.py | 210 ++++++++++++++++++ 3 files changed, 426 insertions(+) create mode 100644 src/gurubase-backend/backend/scripts/onetime/populate_confluence.py create mode 100644 src/gurubase-backend/backend/scripts/onetime/populate_jira.py create mode 100644 src/gurubase-backend/backend/scripts/onetime/populate_zendesk.py diff --git a/src/gurubase-backend/backend/scripts/onetime/populate_confluence.py b/src/gurubase-backend/backend/scripts/onetime/populate_confluence.py new file mode 100644 index 00000000..c824b28f --- /dev/null +++ b/src/gurubase-backend/backend/scripts/onetime/populate_confluence.py @@ -0,0 +1,104 @@ +# Create confluence spaces and pages +import os +import django +import random +import string +from datetime import datetime +import sys +import time # Add time module for throttling +# Set up Django environment +sys.path.append('/workspaces/gurubase/src/gurubase-backend/backend') +sys.path.append('/workspaces/gurubase-backend/backend') +sys.path.append('/workspace/backend') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings") +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'backend.settings') +django.setup() + +from core.models import Integration, GuruType +from core.requester import ConfluenceRequester +from django.contrib.auth import get_user_model + +User = get_user_model() + +def generate_random_text(length=1000): + """Generate random text for page content""" + words = ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit'] + return ' '.join(random.choices(words, k=length)) + +def create_confluence_spaces_and_pages(): + # Get or create a Confluence integration + integration = Integration.objects.get( + type=Integration.Type.CONFLUENCE, + guru_type=GuruType.objects.get(slug='gurubase') + ) + + # Initialize Confluence requester + confluence = ConfluenceRequester(integration) + + # Create 5 spaces + space_names = [ + 'Engineering Documentation', + 'Product Requirements', + 'Design Guidelines', + 'Development Standards', + 'Project Management' + ] + + for space_name in space_names: + try: + # Generate space key + space_key = ''.join(random.choices(string.ascii_uppercase, k=3)) + + # Check if space exists + try: + existing_space = confluence.confluence.get_space(space_key) + print(f"Space {space_name} ({space_key}) already exists, skipping creation") + space = existing_space + except Exception: + # Space doesn't exist, create it + space = confluence.confluence.create_space( + space_key=space_key, + space_name=space_name, + ) + print(f"Created space: {space_name} ({space_key})") + + # Create 500 pages in each space + for i in range(300): + page_title = f"Page {i+1} - {space_name}" + page_content = f""" + # {page_title} + + Created on: {datetime.now().isoformat()} + + {generate_random_text()} + """ + + # Create page + page = confluence.confluence.create_page( + space=space_key, + title=page_title, + body=page_content, + type='page' + ) + + # Add throttling between page creation and comments + time.sleep(1) # Wait 1 second between page creation and comments + + # Add some comments to the page + for j in range(3): # Add 3 comments per page + comment_content = f"Comment {j+1} on {page_title}\n\n{generate_random_text(100)}" + confluence.confluence.add_comment( + page_id=page['id'], + text=comment_content + ) + time.sleep(0.5) # Wait 0.5 seconds between comment additions + + print(f"Created page {i+1}/500 in space {space_name}") + time.sleep(2) # Wait 2 seconds between page creations + + except Exception as e: + print(f"Error creating space {space_name}: {str(e)}") + continue + +if __name__ == '__main__': + create_confluence_spaces_and_pages() \ No newline at end of file diff --git a/src/gurubase-backend/backend/scripts/onetime/populate_jira.py b/src/gurubase-backend/backend/scripts/onetime/populate_jira.py new file mode 100644 index 00000000..05977c45 --- /dev/null +++ b/src/gurubase-backend/backend/scripts/onetime/populate_jira.py @@ -0,0 +1,112 @@ +# Create jira issues in Done status + +import os +import sys +import time +import django + +# Set up Django environment +sys.path.append('/workspaces/gurubase/src/gurubase-backend/backend') +sys.path.append('/workspace/backend') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings") +django.setup() + +from core.models import Integration, GuruType +from core.requester import JiraRequester + +def throttle_request(func): + """Decorator to throttle API requests""" + last_request_time = 0 + min_interval = 1.0 # Minimum seconds between requests + + def wrapper(*args, **kwargs): + nonlocal last_request_time + current_time = time.time() + time_since_last = current_time - last_request_time + + if time_since_last < min_interval: + sleep_time = min_interval - time_since_last + time.sleep(sleep_time) + + result = func(*args, **kwargs) + last_request_time = time.time() + return result + + return wrapper + +class ThrottledJiraRequester(JiraRequester): + """Extended JiraRequester with throttling""" + + @throttle_request + def create_issue(self, project_key, summary, description, issue_type="Task", priority="Medium"): + """Create a Jira issue with throttling""" + fields = { + 'project': {'key': project_key}, + 'summary': summary, + 'description': description, + 'issuetype': {'name': issue_type}, + } + + return self.jira.create_issue(fields=fields) + + @throttle_request + def add_comment(self, issue_key, comment): + """Add a comment to a Jira issue with throttling""" + return self.jira.issue_add_comment(issue_key, comment) + + @throttle_request + def transitions(self, issue_key): + """Get available transitions for an issue""" + return self.jira.get_issue_transitions(issue_key) + + +def create_jira_content(): + # Get the Jira integration + integration = Integration.objects.get( + type=Integration.Type.JIRA, + guru_type=GuruType.objects.get(slug='gurubase') + ) + + # Initialize Jira requester + jira = ThrottledJiraRequester(integration) + + # Get available projects + try: + projects = jira.jira.projects() + if not projects: + print("No projects found in Jira. Please create at least one project first.") + return + + project_keys = [project['key'] for project in projects] + print(f"Found projects: {', '.join(project_keys)}") + except Exception as e: + print(f"Error fetching projects: {e}") + return + + # Create 100 issues + print("Creating Jira issues...") + for i in range(500): + try: + # Create issue + issue = jira.create_issue( + project_key=project_keys[0], # Use first project + summary=f"Test Issue {i+1}", + description="Test description", + issue_type="Task" + ) + + # Transition to Done + transitions = jira.transitions(issue['key']) + for transition in transitions: + if transition['to'].lower() == 'done': + jira.jira.issue_transition(issue['key'], 'done') + break + + print(f"Created issue {i+1}/100: {issue['key']}") + + except Exception as e: + print(f"Error creating issue {i+1}: {e}") + continue + +if __name__ == '__main__': + create_jira_content() \ No newline at end of file diff --git a/src/gurubase-backend/backend/scripts/onetime/populate_zendesk.py b/src/gurubase-backend/backend/scripts/onetime/populate_zendesk.py new file mode 100644 index 00000000..cd1a2876 --- /dev/null +++ b/src/gurubase-backend/backend/scripts/onetime/populate_zendesk.py @@ -0,0 +1,210 @@ +# Create zendesk tickets and articles +import os +import django +import random +import string +import sys +from datetime import datetime, timedelta + +import requests + +# Set up Django environment +sys.path.append('/workspaces/gurubase/src/gurubase-backend/backend') +sys.path.append('/workspace/backend') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "backend.settings") +django.setup() + +from core.models import Integration, GuruType +from core.requester import ZendeskRequester + +def generate_random_text(length=1000): + """Generate random text for content""" + words = ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit'] + return ' '.join(random.choices(words, k=length)) + +def create_zendesk_content(): + # Get the Zendesk integration + integration = Integration.objects.get( + type=Integration.Type.ZENDESK, + guru_type=GuruType.objects.get(slug='gurubase') + ) + + # Initialize Zendesk requester + zendesk = ZendeskRequester(integration) + + # Create 100 tickets + ticket_subjects = [ + "Bug Report: Application Crash", + "Feature Request: New Dashboard", + "Support: Login Issues", + "Question: API Documentation", + "Feedback: User Interface", + "Issue: Performance Problems", + "Request: Account Access", + "Problem: Data Synchronization", + "Inquiry: Pricing Plans", + "Report: Security Concern" + ] + + ticket_priorities = ['urgent', 'high', 'normal', 'low'] + ticket_statuses = ['new', 'open', 'pending', 'solved'] + + # print("Creating tickets...") + # for i in range(100): + # try: + # # Create ticket with random subject and priority + # subject = f"{random.choice(ticket_subjects)} #{i+1}" + # description = f""" + # Issue Description: + # {generate_random_text()} + + # Steps to Reproduce: + # 1. {generate_random_text(50)} + # 2. {generate_random_text(50)} + # 3. {generate_random_text(50)} + + # Expected Behavior: + # {generate_random_text()} + + # Actual Behavior: + # {generate_random_text()} + # """ + + # # Create ticket using Zendesk API + # ticket_data = { + # 'ticket': { + # 'subject': subject, + # 'comment': { + # 'body': description + # }, + # 'priority': random.choice(ticket_priorities), + # 'status': random.choice(ticket_statuses) + # } + # } + + # # Add ticket using requests since ZendeskRequester doesn't have create method + # import requests + # response = requests.post( + # f"https://{integration.zendesk_domain}/api/v2/tickets.json", + # json=ticket_data, + # auth=(f"{integration.zendesk_user_email}/token", integration.zendesk_api_token) + # ) + # response.raise_for_status() + + # # Add 3 comments to the ticket + # ticket_id = response.json()['ticket']['id'] + # for j in range(3): + # comment_data = { + # 'ticket': { + # 'comment': { + # 'body': f"Comment {j+1} on ticket {subject}\n\n{generate_random_text(200)}", + # 'public': True + # } + # } + # } + + # requests.put( + # f"https://{integration.zendesk_domain}/api/v2/tickets/{ticket_id}.json", + # json=comment_data, + # auth=(f"{integration.zendesk_user_email}/token", integration.zendesk_api_token) + # ) + + # print(f"Created ticket {i+1}/100: {subject}") + + # except Exception as e: + # print(f"Error creating ticket {i+1}: {str(e)}") + # continue + + # Create 50 help center articles + article_categories = [ + "Getting Started", + "User Guide", + "API Documentation", + "Troubleshooting", + "Best Practices" + ] + + print("\nCreating help center section...") + try: + # First create a section + section_data = { + 'section': { + 'name': 'General Documentation', + 'locale': 'en-us' + } + } + + response = requests.get( + f"https://{integration.zendesk_domain}/api/v2/help_center/en-us/sections.json", + auth=(f"{integration.zendesk_user_email}/token", integration.zendesk_api_token) + ) + response.raise_for_status() + section_id = response.json()['sections'][0]['id'] + print(f"Created section with ID: {section_id}") + except Exception as e: + print(f"Error creating section: {str(e)}") + return + + print("\nCreating help center articles...") + for i in range(50): + try: + # Create article with random category + title = f"Article {i+1}: {random.choice(article_categories)} Guide" + body = f""" +

{title}

+ +

Overview

+

{generate_random_text(5)}

+ """ + + # Create article using Zendesk API + article_data = { + 'article': { + 'title': title, + 'body': body, + 'locale': 'en-us', + 'section_id': 78910, # Replace with your actual section ID + 'label_names': ['documentation', 'guide'], + 'comments_disabled': False + } + } + + # Add article using requests with proper headers + headers = { + 'Content-Type': 'application/json' + } + + response = requests.post( + f"https://{integration.zendesk_domain}/api/v2/help_center/articles.json", + json=article_data, + headers=headers, + auth=(f"{integration.zendesk_user_email}/token", integration.zendesk_api_token) + ) + response.raise_for_status() + + # Add 2 comments to the article + article_id = response.json()['article']['id'] + for j in range(2): + comment_data = { + 'comment': { + 'body': f"Comment {j+1} on article {title}\n\n{generate_random_text(100)}", + 'public': True + } + } + + requests.post( + f"https://{integration.zendesk_domain}/api/v2/help_center/articles/{article_id}/comments.json", + json=comment_data, + headers=headers, + auth=(f"{integration.zendesk_user_email}/token", integration.zendesk_api_token) + ) + + print(f"Created article {i+1}/50: {title}") + + except Exception as e: + print(f"Error creating article {i+1}: {str(e)}") + continue + +if __name__ == '__main__': + create_zendesk_content() + create_zendesk_content() \ No newline at end of file From 676fb1ecabfbc013728e9198e5f26a132de68516 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Wed, 30 Apr 2025 13:28:06 +0000 Subject: [PATCH 5/7] Fix confluence pagination and filtering --- .../backend/core/requester.py | 118 ++++++++++++------ 1 file changed, 79 insertions(+), 39 deletions(-) diff --git a/src/gurubase-backend/backend/core/requester.py b/src/gurubase-backend/backend/core/requester.py index 5bf16575..55b6d10f 100644 --- a/src/gurubase-backend/backend/core/requester.py +++ b/src/gurubase-backend/backend/core/requester.py @@ -1962,8 +1962,78 @@ def list_pages(self, cql=None): ValueError: If API request fails """ try: - # Get all pages from all spaces + # If CQL is provided, use it directly to get pages + if cql: + cql += " AND type=page" + all_pages = [] + seen_page_ids = set() # Track unique page IDs + cql_limit = 100 # Fetch 100 results at a time + + # Initial request + url = f"{self.url}/wiki/rest/api/search" + params = { + 'cql': cql, + 'limit': cql_limit, + 'expand': 'space' + } + + while True: + response = requests.get( + url, + auth=(self.confluence.username, self.confluence.password), + params=params, + timeout=30 + ) + + if response.status_code == 401: + raise ValueError("Invalid Confluence credentials") + elif response.status_code == 403: + raise ValueError("Confluence API access forbidden") + elif response.status_code != 200: + raise ValueError(f"Confluence API request failed with status {response.status_code}") + + cql_results = response.json() + results = cql_results.get('results', []) + + if not results: + break + + for result in results: + content = result.get('content', {}) + page_id = content.get('id') + + # Skip if we've seen this page ID before + if not page_id or page_id in seen_page_ids: + continue + + seen_page_ids.add(page_id) + + # Get space info from the expanded result + space = content.get('space', {}) + space_key = space.get('key') + space_name = space.get('name', '') + + # Format and add the page + formatted_page = self._format_page(content, space_key, space_name) + all_pages.append(formatted_page) + + # Check if there's a next page + next_link = cql_results.get('_links', {}).get('next') + if not next_link: + break + + # Update URL and params for next request + url = f"{self.url}/wiki{next_link}" + params = {} # Clear params as they're included in the next_link + + return { + 'pages': all_pages, + 'page_count': len(all_pages) + } + + # If no CQL, get all pages from all spaces all_pages = [] + seen_page_ids = set() # Track unique page IDs spaces_data = self.confluence.get_all_spaces() spaces = spaces_data.get('results', []) @@ -1975,7 +2045,7 @@ def list_pages(self, cql=None): try: # Use pagination to get all pages from the space page_start = 0 - page_limit = 25 # Fetch 25 pages at a time + page_limit = 100 # Fetch 100 pages at a time while True: pages_batch = self.confluence.get_all_pages_from_space( space_key, @@ -1987,8 +2057,12 @@ def list_pages(self, cql=None): break for page in pages_batch: - formatted_page = self._format_page(page, space_key, space_name) - all_pages.append(formatted_page) + page_id = page.get('id') + # Only add page if we haven't seen its ID before + if page_id and page_id not in seen_page_ids: + seen_page_ids.add(page_id) + formatted_page = self._format_page(page, space_key, space_name) + all_pages.append(formatted_page) # If we got fewer pages than requested, we've reached the end if len(pages_batch) < page_limit: @@ -2002,40 +2076,6 @@ def list_pages(self, cql=None): logger.warning(f"Error fetching pages from space {space_key}: {str(e)}") continue - # Filter pages by CQL if provided - if cql: - # Implement pagination for the CQL query as well - cql_page_ids = set() - cql_start = 0 - cql_limit = 25 # Fetch 25 results at a time - - while True: - cql_results = self.confluence.cql(cql, start=cql_start, limit=cql_limit) - results = cql_results.get('results', []) - - if not results: - break - - for result in results: - content = result.get('content', {}) - cql_page_ids.add(content.get('id')) - - # If we got fewer results than requested, we've reached the end - if len(results) < cql_limit: - break - - # Move to the next page - cql_start += cql_limit - - # Filter the all_pages list to only include pages that match the CQL - filtered_pages = [page for page in all_pages if page.get('id') in cql_page_ids] - - return { - 'pages': filtered_pages, - 'page_count': len(filtered_pages) - } - - # No CQL filtering, just return all pages return { 'pages': all_pages, 'page_count': len(all_pages) @@ -2104,7 +2144,7 @@ def get_page_comments(self, page_id, start=0, limit=50): # Use pagination to get all comments page_start = 0 - page_limit = 25 # Fetch 25 comments at a time + page_limit = 100 # Fetch 100 comments at a time while True: try: # The Confluence API doesn't support direct pagination for comments From d195d6d7709355beb8b252e7af85782806e8c11d Mon Sep 17 00:00:00 2001 From: aralyekta Date: Wed, 30 Apr 2025 13:33:34 +0000 Subject: [PATCH 6/7] Improve confluence error handling --- src/gurubase-backend/backend/core/requester.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/gurubase-backend/backend/core/requester.py b/src/gurubase-backend/backend/core/requester.py index 55b6d10f..a2604ef8 100644 --- a/src/gurubase-backend/backend/core/requester.py +++ b/src/gurubase-backend/backend/core/requester.py @@ -1990,7 +1990,14 @@ def list_pages(self, cql=None): elif response.status_code == 403: raise ValueError("Confluence API access forbidden") elif response.status_code != 200: - raise ValueError(f"Confluence API request failed with status {response.status_code}") + if 'could not parse' in response.text.lower(): + raise ValueError(f"Invalid CQL query.") + else: + split = response.json().get('message', '').split(':', 1) + if len(split) > 1: + raise ValueError(split[1].strip()) + else: + raise ValueError(f"Confluence API request failed with status {response.status_code}") cql_results = response.json() results = cql_results.get('results', []) From b43f337598858f92a0bdbca5e2d71b7153e1c690 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Wed, 30 Apr 2025 14:23:49 +0000 Subject: [PATCH 7/7] Add direct message support to slack --- .../core/integrations/slack_strategy.py | 3 +- .../migrations/0083_integration_allow_dm.py | 18 +++++++ src/gurubase-backend/backend/core/models.py | 2 + src/gurubase-backend/backend/core/views.py | 54 +++++++++++++------ src/gurubase-frontend/src/app/actions.js | 9 +++- .../Integrations/ChannelsComponent.jsx | 26 ++++++++- 6 files changed, 93 insertions(+), 19 deletions(-) create mode 100644 src/gurubase-backend/backend/core/migrations/0083_integration_allow_dm.py diff --git a/src/gurubase-backend/backend/core/integrations/slack_strategy.py b/src/gurubase-backend/backend/core/integrations/slack_strategy.py index eb1a41d3..3cfb1742 100644 --- a/src/gurubase-backend/backend/core/integrations/slack_strategy.py +++ b/src/gurubase-backend/backend/core/integrations/slack_strategy.py @@ -55,7 +55,8 @@ def _list_channels() -> list: { 'id': c['id'], 'name': c['name'], - 'allowed': False + 'allowed': False, + 'direct_messages': False } for c in data.get('channels', []) ]) diff --git a/src/gurubase-backend/backend/core/migrations/0083_integration_allow_dm.py b/src/gurubase-backend/backend/core/migrations/0083_integration_allow_dm.py new file mode 100644 index 00000000..c289212c --- /dev/null +++ b/src/gurubase-backend/backend/core/migrations/0083_integration_allow_dm.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.18 on 2025-04-30 14:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0082_gurutype_private'), + ] + + operations = [ + migrations.AddField( + model_name='integration', + name='allow_dm', + field=models.BooleanField(default=False), + ), + ] diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index 91573997..57f1bc1c 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -2028,6 +2028,8 @@ class Type(models.TextChoices): zendesk_api_token = models.TextField(null=True, blank=True) zendesk_user_email = models.TextField(null=True, blank=True) + allow_dm = models.BooleanField(default=False) + date_created = models.DateTimeField(auto_now_add=True) date_updated = models.DateTimeField(auto_now=True) diff --git a/src/gurubase-backend/backend/core/views.py b/src/gurubase-backend/backend/core/views.py index c869d960..8517bf3f 100644 --- a/src/gurubase-backend/backend/core/views.py +++ b/src/gurubase-backend/backend/core/views.py @@ -1941,13 +1941,18 @@ def manage_channels(request, guru_type, integration_type): try: channels = request.data.get('channels', []) integration.channels = channels + + if 'allow_dm' in request.data: + integration.allow_dm = request.data['allow_dm'] + integration.save() - + return Response({ 'id': integration.id, 'type': integration.type, 'guru_type': integration.guru_type.slug, - 'channels': integration.channels + 'channels': integration.channels, + 'allow_dm': integration.allow_dm }) except Exception as e: logger.error(f"Error updating channels: {e}", exc_info=True) @@ -2422,17 +2427,25 @@ async def send_channel_unauthorized_message( client: WebClient, channel_id: str, thread_ts: str, - guru_slug: str + guru_slug: str, + dm: bool ) -> None: """Send a message explaining how to authorize the channel.""" try: base_url = await sync_to_async(get_base_url)() settings_url = f"{base_url.rstrip('/')}/guru/{guru_slug}/integrations/slack" - message = ( - "❌ This channel is not authorized to use the bot.\n\n" - f"Please visit <{settings_url}|Gurubase Settings> to configure " - "the bot and add this channel to the allowed channels list." - ) + if dm: + message = ( + "❌ Bot direct messages are not enabled.\n\n" + f"Please visit <{settings_url}|Gurubase Settings> to configure " + "the bot and enable direct messages." + ) + else: + message = ( + "❌ This channel is not authorized to use the bot.\n\n" + f"Please visit <{settings_url}|Gurubase Settings> to configure " + "the bot and add this channel to the allowed channels list." + ) client.chat_postMessage( channel=channel_id, thread_ts=thread_ts, @@ -2465,12 +2478,17 @@ def process_event(): # Only proceed if it's a message event and not from a bot if event["type"] == "message" and "subtype" not in event and event.get("user") != event.get("bot_id"): + dm = False + if event['channel_type'] == 'im': + dm = True # Get bot user ID from authorizations bot_user_id = data.get("authorizations", [{}])[0].get("user_id") user_message = event["text"] # First check if the bot is mentioned - if not (bot_user_id and f"<@{bot_user_id}>" in user_message): + if not dm and not (bot_user_id and f"<@{bot_user_id}>" in user_message): + return + elif dm and event['user'] == bot_user_id: return team_id = data.get('team_id') @@ -2500,12 +2518,15 @@ def process_event(): channel_id = event["channel"] # Check if the current channel is allowed - channels = integration.channels - channel_allowed = False - for channel in channels: - if str(channel.get('id')) == channel_id and channel.get('allowed', False): - channel_allowed = True - break + if not dm: + channels = integration.channels + channel_allowed = False + for channel in channels: + if str(channel.get('id')) == channel_id and channel.get('allowed', False): + channel_allowed = True + break + else: + channel_allowed = integration.allow_dm # Get thread_ts if it exists (means we're in a thread) thread_ts = event.get("thread_ts") or event.get("ts") @@ -2519,7 +2540,8 @@ def process_event(): client=client, channel_id=channel_id, thread_ts=thread_ts, - guru_slug=integration.guru_type.slug + guru_slug=integration.guru_type.slug, + dm=dm )) finally: loop.close() diff --git a/src/gurubase-frontend/src/app/actions.js b/src/gurubase-frontend/src/app/actions.js index 6c7a214f..f3119a9e 100644 --- a/src/gurubase-frontend/src/app/actions.js +++ b/src/gurubase-frontend/src/app/actions.js @@ -918,12 +918,19 @@ export async function saveIntegrationChannels( channels ) { try { + const payload = + integrationType === "SLACK" && + typeof channels === "object" && + "direct_messages" in channels + ? { channels: channels.channels, allow_dm: channels.direct_messages } + : { channels }; + const response = await makeAuthenticatedRequest( `${process.env.NEXT_PUBLIC_BACKEND_FETCH_URL}/${guruType}/integrations/${integrationType}/channels/`, { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ channels }) + body: JSON.stringify(payload) } ); diff --git a/src/gurubase-frontend/src/components/Integrations/ChannelsComponent.jsx b/src/gurubase-frontend/src/components/Integrations/ChannelsComponent.jsx index 10a26e3a..bda004d3 100644 --- a/src/gurubase-frontend/src/components/Integrations/ChannelsComponent.jsx +++ b/src/gurubase-frontend/src/components/Integrations/ChannelsComponent.jsx @@ -42,6 +42,7 @@ const ChannelsComponent = ({ const [hasChanges, setHasChanges] = useState(false); const [isSaving, setIsSaving] = useState(false); const [open, setOpen] = useState(false); + const [directMessages, setDirectMessages] = useState(false); useEffect(() => { const fetchChannels = async () => { @@ -60,6 +61,7 @@ const ChannelsComponent = ({ } else { setChannels(channelsData?.channels || []); setOriginalChannels(channelsData?.channels || []); + setDirectMessages(channelsData?.allow_dm || false); setInternalError(null); } } catch (err) { @@ -116,6 +118,23 @@ const ChannelsComponent = ({ {/* Allowed Channels */}
+ {type === "slack" && ( +
+ { + setDirectMessages(e.target.checked); + setHasChanges(true); + }} + className="h-4 w-4 rounded border-gray-300" + /> + +
+ )} {channels .filter((c) => c.allowed) .map((channel) => ( @@ -272,7 +291,12 @@ const ChannelsComponent = ({ const response = await saveIntegrationChannels( guruData?.slug, type.toUpperCase(), - channels.filter((c) => c.allowed) + type === "slack" + ? { + channels: channels.filter((c) => c.allowed), + direct_messages: directMessages + } + : channels.filter((c) => c.allowed) ); if (!response?.error) { setHasChanges(false);