From 4f5e3c65236cbadeeaf68daf6edfb3e5f4ce5aa7 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 09:08:20 +0000 Subject: [PATCH 01/81] Update guru edit page - Add github repositories in the table, remove its own section - Add file size info --- .../backend/core/serializers.py | 4 +- .../NewEditGuru/SourcesTableSection.jsx | 131 +++++++++++++--- .../src/components/NewGuru.jsx | 146 ++++++++---------- .../src/config/sourceTypes.js | 42 +++-- src/gurubase-frontend/src/utils/common.js | 8 + 5 files changed, 216 insertions(+), 115 deletions(-) diff --git a/src/gurubase-backend/backend/core/serializers.py b/src/gurubase-backend/backend/core/serializers.py index 2ae4e6ae..b903d11d 100644 --- a/src/gurubase-backend/backend/core/serializers.py +++ b/src/gurubase-backend/backend/core/serializers.py @@ -1,5 +1,5 @@ from rest_framework import serializers -from core.models import WidgetId, Binge, DataSource, GuruType, Question, FeaturedDataSource, APIKey, Settings, CrawlState +from core.models import GithubFile, WidgetId, Binge, DataSource, GuruType, Question, FeaturedDataSource, APIKey, Settings, CrawlState from core.gcp import replace_media_root_with_nginx_base_url from django.conf import settings @@ -88,6 +88,8 @@ def to_representation(self, instance): if instance.type == DataSource.Type.GITHUB_REPO: if instance.error: repr['error'] = format_github_repo_error(instance.error, instance.user_error) + + repr['file_count'] = GithubFile.objects.filter(data_source=instance).count() return repr diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index 56186412..b14aff26 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -29,6 +29,8 @@ import { } from "@/components/ui/tooltip"; import { AlertTriangle, + Check, + Clock, Edit, Info, LinkIcon, @@ -50,8 +52,10 @@ import { getNormalizedDomain } from "@/utils/common"; import { SourceActions } from "@/components/NewEditGuru/SourceActions"; import { getSourceFilterItems, - getSourceTypeConfigById + getSourceTypeConfigById, + SOURCE_TYPES_CONFIG } from "@/config/sourceTypes"; +import { formatFileSize } from "@/utils/common"; export function SourcesTableSection({ sources, @@ -137,8 +141,86 @@ export function SourcesTableSection({ const renderBadges = (source) => { const config = getSourceTypeConfigById(source.type); + const isGithubSource = + source.type?.toLowerCase() === SOURCE_TYPES_CONFIG.GITHUB.id; + const isPdfSource = + source.type?.toLowerCase() === SOURCE_TYPES_CONFIG.PDF.id; + if (!config) return null; + if (isGithubSource) { + let badgeProps = { + className: + "flex items-center rounded-full gap-1 px-2 py-0.5 text-xs font-medium pointer-events-none", + icon: Clock, + iconColor: "text-gray-500", + text: "Pending" + }; + + switch (source.status) { + case "SUCCESS": + badgeProps.icon = Check; + badgeProps.iconColor = "text-green-700"; + badgeProps.text = `Indexed ${source.file_count} files`; + badgeProps.className += " bg-green-50 text-green-700"; + break; + case "FAIL": + badgeProps.icon = AlertTriangle; + badgeProps.iconColor = "text-red-700"; + badgeProps.text = "Failed"; + badgeProps.className += " bg-red-50 text-red-700"; + break; + case "NOT_PROCESSED": + default: + badgeProps.icon = Clock; + badgeProps.iconColor = "text-yellow-700"; + badgeProps.text = "Processing"; + badgeProps.className += " bg-yellow-50 text-yellow-700"; + break; + } + + const badgeElement = ( + + + {badgeProps.text} + + ); + + return ( +
+ {source.status === "FAIL" && source.error ? ( + + + + {badgeElement} + + +

+ {source.error} +

+
+ + + + ) : ( + badgeElement + )} +
+ ); + } + if (config.hasPrivacyToggle) { return (
@@ -219,7 +301,7 @@ export function SourcesTableSection({ ); } - if (source.domains && config.canEdit) { + if (!isPdfSource && !isGithubSource && config.canEdit) { const statusGroups = source.domains.reduce((acc, domain) => { const status = domain.status === "NOT_PROCESSED" @@ -310,18 +392,21 @@ export function SourcesTableSection({ (source) => source.type.toLowerCase() === filterType.toLowerCase() ); - const urlSources = filteredSources.filter( - (source) => - source.type.toLowerCase() === "youtube" || - source.type.toLowerCase() === "website" || - source.type.toLowerCase() === "jira" || - source.type.toLowerCase() === "zendesk" - ); - const fileSources = filteredSources.filter( - (source) => source.type.toLowerCase() === "pdf" - ); + // Separate sources based on the willGroup flag from config + const sourcesToGroup = []; + const sourcesNotToGroup = []; + + filteredSources.forEach((source) => { + const config = getSourceTypeConfigById(source.type); + if (config?.willGroup) { + sourcesToGroup.push(source); + } else { + sourcesNotToGroup.push(source); + } + }); - const groupedSources = urlSources.reduce((acc, source) => { + // Group the sources marked for grouping + const groupedSources = sourcesToGroup.reduce((acc, source) => { const domain = getNormalizedDomain(source.url); if (!domain) return acc; const existingSource = acc.find( @@ -341,7 +426,7 @@ export function SourcesTableSection({ return acc; }, []); - const displaySources = [...groupedSources, ...fileSources]; + const displaySources = [...groupedSources, ...sourcesNotToGroup]; const sourceFilterItems = getSourceFilterItems(); return ( @@ -388,7 +473,7 @@ export function SourcesTableSection({ Type Name - + Details @@ -408,23 +493,27 @@ export function SourcesTableSection({
{IconComponent && ( - + )} {config?.displaySourceText || source.sources}
- {isSourceProcessing(source) && isSourcesProcessing ? ( + {source.status === "NOT_PROCESSED" && + isSourcesProcessing ? (
- Processing source... + Processing...
) : ( - {source?.type?.toLowerCase() === "pdf" - ? source?.name - : source?.domain} + {source.name} + {source.type === "pdf" && source.size && ( + + ({formatFileSize(source.size)}) + + )} )}
diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index 4998fd41..b267dfe8 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -43,7 +43,6 @@ import { useCrawler } from "@/hooks/useCrawler"; import { DeleteConfirmationModal } from "@/components/NewEditGuru/DeleteConfirmationModal"; import { LongUpdatesIndicator } from "@/components/NewEditGuru/LongUpdatesIndicator"; import { PendingChangesIndicator } from "@/components/NewEditGuru/PendingChangesIndicator"; -import { GithubSourceSection } from "@/components/NewEditGuru/GithubSourceSection"; import { GuruDetailsSection } from "@/components/NewEditGuru/GuruDetailsSection"; // Import new component import { SourcesTableSection } from "@/components/NewEditGuru/SourcesTableSection"; // Import new component import { IntegrationRequiredModal } from "@/components/NewEditGuru/IntegrationRequiredModal"; @@ -225,7 +224,6 @@ export default function NewGuru({ guruData, isProcessing }) { const isOllamaConfigValid = isOllamaUrlValid && isEmbeddingModelValid && isBaseModelValid; - // Modify the auth check effect useEffect(() => { if (!isSelfHosted && !user && !authLoading) { @@ -267,10 +265,8 @@ export default function NewGuru({ guruData, isProcessing }) { const [jiraEditorContent, setJiraEditorContent] = useState(""); // <-- New state for Jira editor const [isZendeskSidebarOpen, setIsZendeskSidebarOpen] = useState(false); // <-- Add Zendesk sidebar state const [zendeskEditorContent, setZendeskEditorContent] = useState(""); // <-- Add Zendesk editor state - - // First, add a state to track the GitHub repository source status - const [githubRepoStatuses, setGithubRepoStatuses] = useState({}); - const [githubRepoErrors, setGithubRepoErrors] = useState({}); + const [isGithubSidebarOpen, setIsGithubSidebarOpen] = useState(false); // Add state for potential future Github sidebar + const [githubEditorContent, setGithubEditorContent] = useState(""); // Add state for potential future Github editor const [jiraIntegration, setJiraIntegration] = useState(null); // <-- State for Jira integration details const [isLoadingIntegration, setIsLoadingIntegration] = useState(true); // <-- State for loading integration @@ -500,58 +496,51 @@ export default function NewGuru({ guruData, isProcessing }) { // Update the useEffect where we process dataSources to find and set GitHub repo status useEffect(() => { if (customGuruData && dataSources?.results) { - const newSources = dataSources.results.map((source) => ({ - id: source.id, - sources: - source.type === "YOUTUBE" - ? "Video" - : source.type === "PDF" - ? "File" - : source.type === "JIRA" - ? "Jira" - : source.type === "ZENDESK" - ? "Zendesk" // <-- Add Zendesk type display - : "Website", - name: source.title, - type: source.type.toLowerCase(), - size: source.type === "PDF" ? source.size : "N/A", - url: source.url || "", - status: source.status, - last_reindex_date: source.last_reindex_date || "", - error: source.error || "", - private: source.type === "PDF" ? !!source.private : undefined - })); - - // Find GitHub repository source status - if (customGuruData?.github_repos) { - const githubSources = dataSources.results.filter( - (source) => source.url && source.url.startsWith("https://github.com") - ); + const newSources = dataSources.results.map((source) => { + const baseSource = { + id: source.id, + name: source.title, // Use title for PDF, domain/repo name for others + type: source.type.toLowerCase(), + size: source.type === "PDF" ? source.size : "N/A", + url: source.url || "", + status: source.status, + file_count: source.file_count, // Add file_count + last_reindex_date: source.last_reindex_date || "", + error: source.error || "", + private: source.type === "PDF" ? !!source.private : undefined + }; - if (githubSources.length > 0) { - const newStatuses = {}; - const newErrors = {}; - let hasUnprocessed = false; + // Adjust name for non-PDF types + if ( + source.type === "WEBSITE" || + source.type === "YOUTUBE" || + source.type === "JIRA" || + source.type === "ZENDESK" + ) { + baseSource.name = getNormalizedDomain(source.url); + } else if (source.type === "GITHUB") { + // Extract repo name from URL for GitHub + const urlParts = (source.url || "").split("/"); + baseSource.name = urlParts.slice(-2).join("/"); // e.g., username/repo + } - githubSources.forEach((source) => { - newStatuses[source.url] = source.status; - newErrors[source.url] = source.error || null; - if (source.status === "NOT_PROCESSED") { - hasUnprocessed = true; - } - }); + return baseSource; + }); - setGithubRepoStatuses(newStatuses); - setGithubRepoErrors(newErrors); + // Check for unprocessed sources (including GitHub) + const hasUnprocessedSources = newSources.some( + (source) => source.status === "NOT_PROCESSED" + ); - if (hasUnprocessed) { - setIsSourcesProcessing(true); - pollForGuruReadiness(customGuru); - } + if (hasUnprocessedSources) { + setIsSourcesProcessing(true); + if (customGuru && !isPollingRef.current) { + pollForGuruReadiness(customGuru); } } setSources(newSources); + // Update form values for all source types, including github form.setValue( "youtubeLinks", newSources.filter((s) => s.type === "youtube").map((s) => s.url) @@ -875,36 +864,28 @@ export default function NewGuru({ guruData, isProcessing }) { // Update sources state while preserving privacy settings const updatedSources = latestSources.results.map((source) => ({ id: source.id, - sources: - source.type === "YOUTUBE" - ? "Video" + name: + source.type === "GITHUB" + ? source.url.split("/").slice(-2).join("/") : source.type === "PDF" - ? "File" - : source.type === "JIRA" - ? "Jira" - : source.type === "ZENDESK" - ? "Zendesk" - : "Website", - name: source.title, + ? source.title + : getNormalizedDomain(source.url), type: source.type.toLowerCase(), size: source.type === "PDF" ? source.size : "N/A", url: source.url || "", status: source.status, + file_count: source.file_count, + last_reindex_date: source.last_reindex_date || "", error: source.error || "", - // Preserve existing privacy setting or use the one from backend - private: - source.type === "PDF" - ? existingPrivacySettings[source.id] !== undefined - ? existingPrivacySettings[source.id] - : !!source.private - : undefined + private: source.type === "PDF" ? !!source.private : undefined })); setSources(updatedSources); // Update form values and reset states + const currentFormValues = form.getValues(); const newFormValues = { - ...form.getValues(), + ...currentFormValues, youtubeLinks: updatedSources .filter((s) => s.type === "youtube") .map((s) => s.url), @@ -1359,6 +1340,20 @@ export default function NewGuru({ guruData, isProcessing }) { hasNewSources = true; } + // Add GitHub Repos from dirtyChanges (newly added via input) + const githubSources = dirtyChanges.sources + .filter( + (source) => + source.type === "github" && source.newAddedSource && !source.deleted + ) + .map((source) => source.url); + + if (githubSources.length > 0) { + // Add github_urls to the form data + newSourcesFormData.append("github_urls", JSON.stringify(githubSources)); + hasNewSources = true; + } + if (hasNewSources) { hasChanges = true; setIsSourcesProcessing(true); @@ -2108,20 +2103,6 @@ export default function NewGuru({ guruData, isProcessing }) { setDirtyChanges={setDirtyChanges} /> -
-
- -
-
- {/* Use SourcesTableSection component */}
diff --git a/src/gurubase-frontend/src/config/sourceTypes.js b/src/gurubase-frontend/src/config/sourceTypes.js index a33a2ec4..3173251a 100644 --- a/src/gurubase-frontend/src/config/sourceTypes.js +++ b/src/gurubase-frontend/src/config/sourceTypes.js @@ -6,12 +6,13 @@ import { } from "lucide-react"; import { + GitHubIcon, JiraIcon, LogosYoutubeIcon, SolarFileTextBold, // SolarTrashBinTrashBold, // Keep for potential future use if needed directly SolarVideoLibraryBold, - ZendeskIcon // Placeholder Icon + ZendeskIcon } from "@/components/Icons"; // Assuming Icons barrel file exists or adjust path // Check if beta features are enabled via environment variable @@ -26,13 +27,14 @@ const baseSourceTypesConfig = { displaySourceText: "Website", // Text shown in the table 'Type' column icon: LinkIcon, // Icon for the table row actionButtonIcon: LinkIcon, // Icon for the 'Add' button - actionButtonText: "Add Website", + actionButtonText: "Website", sidebarStateSetterName: "setIsUrlSidebarOpen", // Name of the state setter in NewGuru formField: "websiteUrls", // Corresponding field in the form schema canReindex: true, // Can this source type be reindexed? canEdit: true, // Can this source type be edited (open sidebar)? requiresIntegrationCheck: false, // Does adding require an integration check? - filterValue: "website" // Value used in the filter dropdown + filterValue: "website", // Value used in the filter dropdown + willGroup: true // Group by domain }, YOUTUBE: { id: "youtube", @@ -41,13 +43,14 @@ const baseSourceTypesConfig = { displaySourceText: "Video", icon: SolarVideoLibraryBold, actionButtonIcon: LogosYoutubeIcon, - actionButtonText: "Add YouTube", + actionButtonText: "YouTube", sidebarStateSetterName: "setIsYoutubeSidebarOpen", formField: "youtubeLinks", canReindex: false, canEdit: true, requiresIntegrationCheck: false, - filterValue: "youtube" + filterValue: "youtube", + willGroup: true // Group by domain }, PDF: { id: "pdf", @@ -56,7 +59,7 @@ const baseSourceTypesConfig = { displaySourceText: "File", icon: SolarFileTextBold, actionButtonIcon: Upload, - actionButtonText: "Upload PDFs", + actionButtonText: "PDFs", sidebarStateSetterName: null, // PDFs don't open a sidebar for editing content formField: "uploadedFiles", canReindex: false, @@ -64,7 +67,24 @@ const baseSourceTypesConfig = { requiresIntegrationCheck: false, hasPrivacyToggle: true, // Specific to PDF actionHandlerName: "onUploadPdfClick", // Specific handler for PDF upload trigger + willGroup: false, // Do not group PDFs filterValue: "pdf" + }, + GITHUB: { + id: "github_repo", + apiType: "GITHUB_REPO", + displayName: "GitHub", + displaySourceText: "GitHub", + icon: GitHubIcon, + actionButtonIcon: GitHubIcon, + actionButtonText: "GitHub", + sidebarStateSetterName: "setIsGithubSidebarOpen", + formField: "githubRepos", + canReindex: true, + canEdit: false, + requiresIntegrationCheck: false, + filterValue: "github_repo", + willGroup: false // Do not group GitHub repos } }; @@ -77,7 +97,7 @@ if (isBetaFeaturesEnabled) { displaySourceText: "Jira", icon: JiraIcon, actionButtonIcon: JiraIcon, - actionButtonText: "Add Jira Issues", + actionButtonText: "Jira Issues", sidebarStateSetterName: "setIsJiraSidebarOpen", formField: "jiraIssues", canReindex: true, @@ -86,7 +106,8 @@ if (isBetaFeaturesEnabled) { integrationCheckProp: "jiraIntegration", // Prop name in parent for integration status integrationLoadingProp: "isLoadingIntegration", // Prop name for loading status integrationModalSetterName: "setShowJiraIntegrationModal", // State setter for the integration modal - filterValue: "jira" + filterValue: "jira", + willGroup: true // Group Jira issues by domain (project?) }; baseSourceTypesConfig.ZENDESK = { id: "zendesk", @@ -95,7 +116,7 @@ if (isBetaFeaturesEnabled) { displaySourceText: "Zendesk", icon: ZendeskIcon, // Placeholder actionButtonIcon: ZendeskIcon, // Placeholder - actionButtonText: "Add Zendesk Data", + actionButtonText: "Zendesk Data", sidebarStateSetterName: "setIsZendeskSidebarOpen", // Assuming a similar pattern formField: "zendeskTickets", // Assuming a form field name canReindex: true, @@ -104,7 +125,8 @@ if (isBetaFeaturesEnabled) { integrationCheckProp: "zendeskIntegration", // Prop name for integration status integrationLoadingProp: "isLoadingIntegration", // Prop name for loading status integrationModalSetterName: "setShowZendeskIntegrationModal", // State setter for the integration modal - filterValue: "zendesk" + filterValue: "zendesk", + willGroup: true // Group Zendesk tickets by domain }; } diff --git a/src/gurubase-frontend/src/utils/common.js b/src/gurubase-frontend/src/utils/common.js index 944f0a85..41bb5776 100644 --- a/src/gurubase-frontend/src/utils/common.js +++ b/src/gurubase-frontend/src/utils/common.js @@ -105,3 +105,11 @@ export const determineInitialTab = (domains) => { return defaultTab; }; + +export const formatFileSize = (size) => { + if (size < 1024) return `${size} B`; + if (size < 1024 * 1024) return `${(size / 1024).toFixed(2)} KB`; + if (size < 1024 * 1024 * 1024) + return `${(size / (1024 * 1024)).toFixed(2)} MB`; + return `${(size / (1024 * 1024 * 1024)).toFixed(2)} GB`; +}; From b5a1db0760a2926f8c3f85b0868467249671d5e8 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 10:39:17 +0000 Subject: [PATCH 02/81] Improve filter --- src/gurubase-backend/backend/core/serializers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gurubase-backend/backend/core/serializers.py b/src/gurubase-backend/backend/core/serializers.py index b903d11d..f1b065d8 100644 --- a/src/gurubase-backend/backend/core/serializers.py +++ b/src/gurubase-backend/backend/core/serializers.py @@ -89,7 +89,7 @@ def to_representation(self, instance): if instance.error: repr['error'] = format_github_repo_error(instance.error, instance.user_error) - repr['file_count'] = GithubFile.objects.filter(data_source=instance).count() + repr['file_count'] = GithubFile.objects.filter(data_source=instance, in_milvus=True).count() return repr From 0ee040e4c76be62aebc321cb037c71deabdc07ea Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 12:50:13 +0000 Subject: [PATCH 03/81] Fix pending state + name for jira and zendesk --- .../src/components/NewEditGuru/SourcesTableSection.jsx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index b14aff26..b78bbc1c 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -500,15 +500,14 @@ export function SourcesTableSection({ - {source.status === "NOT_PROCESSED" && - isSourcesProcessing ? ( + {isSourceProcessing(source) && isSourcesProcessing ? (
- Processing... + Processing source...
) : ( - {source.name} + {source.name || source.domain} {source.type === "pdf" && source.size && ( ({formatFileSize(source.size)}) From 8310284c787cd5510217f37f1665a31e1e6b4e50 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 12:52:11 +0000 Subject: [PATCH 04/81] Fix polling --- .../src/components/NewGuru.jsx | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index b267dfe8..6c2f72dc 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -827,31 +827,6 @@ export default function NewGuru({ guruData, isProcessing }) { } if (latestSources?.results) { - // Check GitHub repository status if it exists - if (customGuruData?.github_repos) { - const githubSources = latestSources.results.filter( - (source) => - source.url && source.url.startsWith("https://github.com") - ); - - if (githubSources.length > 0) { - const newStatuses = {}; - const newErrors = {}; - let hasUnprocessed = false; - - githubSources.forEach((source) => { - newStatuses[source.url] = source.status; - newErrors[source.url] = source.error || null; - if (source.status === "NOT_PROCESSED") { - hasUnprocessed = true; - } - }); - - setGithubRepoStatuses(newStatuses); - setGithubRepoErrors(newErrors); - } - } - // Create a map of existing privacy settings const existingPrivacySettings = sources.reduce((acc, source) => { if (source.type?.toLowerCase() === "pdf") { From 7e2213e42821bb41c973377bc1e5f282bdb9ea7c Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 12:56:26 +0000 Subject: [PATCH 05/81] Fix table visual --- .../src/components/NewEditGuru/SourcesTableSection.jsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index b78bbc1c..1082d3ae 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -472,8 +472,9 @@ export function SourcesTableSection({ Type - Name - Details + Name + + @@ -520,6 +521,7 @@ export function SourcesTableSection({
{renderBadges(source)} + From 145b5c4fa68935e081815f89328803e412e2dcc0 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 13:01:44 +0000 Subject: [PATCH 06/81] Fix github badge visuals --- .../NewEditGuru/SourcesTableSection.jsx | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index 1082d3ae..5d9a9ae2 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -181,7 +181,13 @@ export function SourcesTableSection({ const badgeElement = ( - + {badgeProps.text} ); @@ -192,7 +198,12 @@ export function SourcesTableSection({ - {badgeElement} + + {badgeElement} + ) : ( - badgeElement + + {badgeElement} + )}
); @@ -474,7 +490,6 @@ export function SourcesTableSection({ Type Name - From 765ba96b8f8090b1e8a939b8067579119e2778da Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 14:20:05 +0000 Subject: [PATCH 07/81] Add github sidebar + form management --- .../backend/core/serializers.py | 2 + .../NewEditGuru/GitHubSourceDialog.jsx | 273 +++++++++++++++++ .../NewEditGuru/SourcesTableSection.jsx | 28 +- .../src/components/NewGuru.jsx | 279 +++++++++++++++--- .../src/components/ui/switch.jsx | 23 ++ 5 files changed, 561 insertions(+), 44 deletions(-) create mode 100644 src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx create mode 100644 src/gurubase-frontend/src/components/ui/switch.jsx diff --git a/src/gurubase-backend/backend/core/serializers.py b/src/gurubase-backend/backend/core/serializers.py index f1b065d8..53daea0c 100644 --- a/src/gurubase-backend/backend/core/serializers.py +++ b/src/gurubase-backend/backend/core/serializers.py @@ -90,6 +90,8 @@ def to_representation(self, instance): repr['error'] = format_github_repo_error(instance.error, instance.user_error) repr['file_count'] = GithubFile.objects.filter(data_source=instance, in_milvus=True).count() + repr['glob_pattern'] = instance.github_glob_pattern + repr['glob_include'] = instance.github_glob_include return repr diff --git a/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx new file mode 100644 index 00000000..47d0ff7c --- /dev/null +++ b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx @@ -0,0 +1,273 @@ +import * as DialogPrimitive from "@radix-ui/react-dialog"; +import { X } from "lucide-react"; +import * as React from "react"; + +import { Button } from "@/components/ui/button"; +import { cn } from "@/lib/utils"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { isValidUrl } from "@/utils/common"; +import { Checkbox } from "@/components/ui/checkbox"; + +const Dialog = DialogPrimitive.Root; +const DialogPortal = DialogPrimitive.Portal; + +const StyledDialogContent = React.forwardRef( + ({ children, isMobile, ...props }, ref) => ( + + + + {children} + + + ) +); + +StyledDialogContent.displayName = "StyledDialogContent"; + +const GitHubSourceDialog = React.memo( + ({ + isOpen, + onOpenChange, + repoUrl, + globPattern, + onRepoUrlChange, + onGlobPatternChange, + onAddGithubRepo, + isMobile, + isProcessing, + isEditingRepo, + editingRepo + }) => { + const [isClosing, setIsClosing] = React.useState(false); + const [urlError, setUrlError] = React.useState(""); + const [includeGlobPattern, setIncludeGlobPattern] = React.useState(false); + + // Reset state when dialog is opened + React.useEffect(() => { + if (isOpen) { + setUrlError(""); + // Only reset include pattern if we're not editing + if (!isEditingRepo) { + setIncludeGlobPattern(false); + } + } + }, [isOpen, isEditingRepo]); + + // When editing, populate the form with existing data + React.useEffect(() => { + if (isEditingRepo && editingRepo) { + // If we're in edit mode, set the includeGlobPattern based on the repo settings + setIncludeGlobPattern(editingRepo.include_glob || false); + } + }, [isEditingRepo, editingRepo]); + + const handleClose = React.useCallback(() => { + setTimeout(() => { + document.body.style.pointerEvents = ""; + }, 500); + onOpenChange(false); + }, [onOpenChange]); + + const handleDialogClose = React.useCallback( + (e) => { + e?.preventDefault(); + handleClose(); + }, + [handleClose] + ); + + const validateUrl = React.useCallback((url) => { + if (!url) { + setUrlError("Repository URL is required"); + return false; + } + + if (!isValidUrl(url)) { + setUrlError("Please enter a valid URL"); + return false; + } + + // Check if it's a GitHub URL + if (!url.includes("github.com")) { + setUrlError("URL must be from github.com"); + return false; + } + + setUrlError(""); + return true; + }, []); + + const handleSubmit = React.useCallback( + (e) => { + e.preventDefault(); + + if (validateUrl(repoUrl)) { + // Only include glob pattern if it's enabled + const finalGlobPattern = globPattern || ""; + onAddGithubRepo( + repoUrl, + finalGlobPattern, + includeGlobPattern, + isEditingRepo ? editingRepo.id : null + ); + handleClose(); + } + }, + [ + repoUrl, + globPattern, + includeGlobPattern, + onAddGithubRepo, + handleClose, + validateUrl, + isEditingRepo, + editingRepo + ] + ); + + return ( + { + if (!open && !isClosing) { + handleClose(); + } else if (open) { + document.body.style.pointerEvents = "none"; + } + }}> + +
+
+
+

+ {isEditingRepo + ? "Edit GitHub Repository" + : "Add GitHub Repository"} +

+

+ {isEditingRepo + ? "Update glob pattern for this repository" + : "Add a GitHub repository to index its code for your guru"} +

+
+ +
+ +
+
+
+ + onRepoUrlChange(e.target.value)} + className={urlError ? "border-red-500" : ""} + required + readOnly={isEditingRepo} + disabled={isEditingRepo} + /> + {urlError ? ( +

{urlError}

+ ) : ( +

+ Enter the full URL to the GitHub repository +

+ )} +
+ +
+ + onGlobPatternChange(e.target.value)} + /> + + {globPattern && ( +
+
+
+ + +
+
+

+ {includeGlobPattern + ? "Files matching the pattern will be included." + : "Files matching the pattern will be excluded."} +

+
+ )} + + {!globPattern && ( +

+ Specify file patterns to include. Leave empty to include + all files. +

+ )} +
+ +
+ + +
+
+
+
+
+
+ ); + } +); + +GitHubSourceDialog.displayName = "GitHubSourceDialog"; + +export default GitHubSourceDialog; diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index 5d9a9ae2..11f58be9 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -77,7 +77,11 @@ export function SourcesTableSection({ setIsZendeskSidebarOpen, setShowZendeskIntegrationModal, zendeskIntegration, - isEditMode + isEditMode, + setIsGithubSidebarOpen, + handleEditGithubGlob, + setIsEditingRepo, + setEditingRepo }) { const [filterType, setFilterType] = useState("all"); @@ -109,6 +113,17 @@ export function SourcesTableSection({ } }; + const handleAddGithub = () => { + setClickedSource([]); + if (typeof setIsEditingRepo === "function") { + setIsEditingRepo(false); + } + if (typeof setEditingRepo === "function") { + setEditingRepo(null); + } + setIsGithubSidebarOpen(true); + }; + const handleUploadPdf = () => { if (fileInputRef.current) { fileInputRef.current.click(); @@ -120,7 +135,8 @@ export function SourcesTableSection({ youtube: handleAddYoutube, jira: handleAddJira, onUploadPdfClick: handleUploadPdf, - zendesk: handleAddZendesk + zendesk: handleAddZendesk, + github_repo: handleAddGithub }; const sourceLoadingStates = { @@ -558,6 +574,14 @@ export function SourcesTableSection({ Edit )} + {source.type === "github_repo" && ( + handleEditGithubGlob(source)}> + + Edit Glob + + )} {config?.canReindex && ( !value || value.startsWith("https://github.com"), { - message: "Must be a valid GitHub repository URL." - }) + z.object({ + id: z.string().optional(), + url: z + .string() + .refine((value) => !value || value.startsWith("https://github.com"), { + message: "Must be a valid GitHub repository URL." + }), + glob_pattern: z.string().optional(), + include_glob: z.boolean().optional() + }) ) .default([]) .optional(), @@ -240,9 +246,7 @@ export default function NewGuru({ guruData, isProcessing }) { const isEditMode = !!customGuru; const [selectedFile, setSelectedFile] = useState(null); const [iconUrl, setIconUrl] = useState(customGuruData?.icon_url || null); - const [index_repo, setIndexRepo] = useState( - customGuruData?.index_repo || false - ); + const [sources, setSources] = useState([]); const fileInputRef = useRef(null); const [isSourcesProcessing, setIsSourcesProcessing] = useState(isProcessing); @@ -279,6 +283,11 @@ export default function NewGuru({ guruData, isProcessing }) { const [showZendeskIntegrationModal, setShowZendeskIntegrationModal] = useState(false); // <-- Add Zendesk modal state + const [repoUrl, setRepoUrl] = useState(""); + const [globPattern, setGlobPattern] = useState(""); + const [isEditingRepo, setIsEditingRepo] = useState(false); + const [editingRepo, setEditingRepo] = useState(null); + useEffect(() => { const fetchIntegration = async () => { const integration = await getIntegrationDetails(customGuru, "JIRA"); @@ -330,7 +339,7 @@ export default function NewGuru({ guruData, isProcessing }) { guruName: customGuruData?.name || "", guruLogo: customGuruData?.icon_url || "", guruContext: customGuruData?.domain_knowledge || "", - githubRepos: customGuruData?.github_repos || [], + githubRepos: [], uploadedFiles: [], youtubeLinks: [], websiteUrls: [], @@ -518,15 +527,22 @@ export default function NewGuru({ guruData, isProcessing }) { source.type === "ZENDESK" ) { baseSource.name = getNormalizedDomain(source.url); - } else if (source.type === "GITHUB") { + } else if (source.type === "GITHUB_REPO") { // Extract repo name from URL for GitHub const urlParts = (source.url || "").split("/"); baseSource.name = urlParts.slice(-2).join("/"); // e.g., username/repo + baseSource.glob_pattern = source.github_glob_pattern || ""; + baseSource.include_glob = source.github_include_glob || false; } return baseSource; }); + console.log( + "newSources", + newSources.filter((s) => s.type === "github_repo") + ); + // Check for unprocessed sources (including GitHub) const hasUnprocessedSources = newSources.some( (source) => source.status === "NOT_PROCESSED" @@ -545,6 +561,17 @@ export default function NewGuru({ guruData, isProcessing }) { "youtubeLinks", newSources.filter((s) => s.type === "youtube").map((s) => s.url) ); + form.setValue( + "githubRepos", + newSources + .filter((s) => s.type === "github_repo") + .map((s) => ({ + id: String(s.id), // Convert to string + url: s.url, + glob_pattern: s.glob_pattern || "", // Ensure string + include_glob: !!s.include_glob // Ensure boolean + })) + ); form.setValue( "websiteUrls", newSources.filter((s) => s.type === "website").map((s) => s.url) @@ -574,18 +601,26 @@ export default function NewGuru({ guruData, isProcessing }) { // const formValues = form.watch(); useEffect(() => { - // if there is a error in the youtubeLinks or websiteUrls, then show the error message - if ( - form.formState.errors.youtubeLinks || - form.formState.errors.websiteUrls - ) { + // Check for any errors in the form and show an appropriate error message + const errorFields = Object.keys(form.formState.errors); + if (errorFields.length > 0) { + // Get the first error field and message + const firstErrorField = errorFields[0]; + const errorMessage = + form.formState.errors[firstErrorField]?.message || + `Please check the ${firstErrorField.replace("guru", "")} field.`; + CustomToast({ - message: `Please enter a valid ${form.formState.errors.youtubeLinks ? "YouTube" : "website"} link.`, + message: errorMessage, variant: "error" }); } }, [form.formState.errors]); + useEffect(() => { + console.log("Form values:", form.getValues()); + }, [form.getValues()]); + const handleFileUpload = (event) => { const files = Array.from(event.target.files); const newSources = files @@ -852,7 +887,9 @@ export default function NewGuru({ guruData, isProcessing }) { file_count: source.file_count, last_reindex_date: source.last_reindex_date || "", error: source.error || "", - private: source.type === "PDF" ? !!source.private : undefined + private: source.type === "PDF" ? !!source.private : undefined, + glob_pattern: source.github_glob_pattern || "", + include_glob: source.github_include_glob || false })); setSources(updatedSources); @@ -1030,6 +1067,7 @@ export default function NewGuru({ guruData, isProcessing }) { const onSubmit = async (data) => { try { + console.log("Sending data:", data); // Check data source limits first if (!validateSourceLimits(sources, dirtyChanges, customGuruData)) { return; @@ -1059,6 +1097,10 @@ export default function NewGuru({ guruData, isProcessing }) { } if (!data.guruContext) { + console.log( + "Stopping submission because guruContext is empty:", + data.guruContext + ); return; } @@ -1076,21 +1118,13 @@ export default function NewGuru({ guruData, isProcessing }) { data.jiraIssues?.length > 0 || data.zendeskTickets?.length > 0; - // Add check for GitHub repo changes - const hasGithubChanges = isEditMode - ? (data.githubRepos || []) !== (customGuruData?.github_repos || []) - : !!data.githubRepos; - if ( - (!hasResources && (!index_repo || !hasGithubChanges) && !isEditMode) || - (sources.length === 0 && - (!index_repo || !hasGithubChanges) && - isEditMode) + (!hasResources && !isEditMode) || + (sources.length === 0 && !isEditMode) ) { CustomToast({ - message: index_repo - ? "At least one resource (PDF, YouTube link, website URL) must be added, or GitHub repository settings must be changed." - : "At least one resource (PDF, YouTube link, website URL) must be added.", + message: + "At least one resource (PDF, YouTube link, website URL) must be added, or GitHub repository settings must be changed.", variant: "error" }); @@ -1107,10 +1141,6 @@ export default function NewGuru({ guruData, isProcessing }) { formData.append("name", data.guruName); } formData.append("domain_knowledge", data.guruContext); - formData.append( - "github_repos", - JSON.stringify(data.githubRepos.filter(Boolean)) - ); // Handle guruLogo if (data.guruLogo instanceof File) { @@ -1141,7 +1171,7 @@ export default function NewGuru({ guruData, isProcessing }) { await fetchGuruData(guruSlug); // If there are GitHub-related changes, mark for polling - if (index_repo && hasGithubChanges) { + if (hasGithubChanges) { hasChanges = true; await fetchDataSources(guruSlug); } @@ -1501,6 +1531,10 @@ export default function NewGuru({ guruData, isProcessing }) { const [hasFormChanged, setHasFormChanged] = useState(false); + useEffect(() => { + console.log("Has form changed:", hasFormChanged); + }, [hasFormChanged]); + // Modify hasFormChanged to be a pure function useEffect(() => { if (!isEditMode || isPublishing) { @@ -1512,6 +1546,7 @@ export default function NewGuru({ guruData, isProcessing }) { setHasFormChanged(false); return; } + if (dirtyChanges.sources.length > 0) { setHasFormChanged(true); return; @@ -1529,12 +1564,7 @@ export default function NewGuru({ guruData, isProcessing }) { const currentValues = form.getValues(); // Check for changes in basic fields const basicFieldsChanged = - currentValues.guruContext !== initialFormValues.guruContext || - (currentValues.githubRepos || []).some( - (repo) => !initialFormValues.githubRepos.includes(repo) - ) || - (currentValues.githubRepos || []).length !== - (initialFormValues.githubRepos || []).length; + currentValues.guruContext !== initialFormValues.guruContext; // Check for changes in arrays (files, links, urls) const compareArrays = (arr1 = [], arr2 = []) => { @@ -1588,7 +1618,14 @@ export default function NewGuru({ guruData, isProcessing }) { guruName: customGuruData.name || "", guruLogo: customGuruData.icon_url || "", guruContext: customGuruData.domain_knowledge || "", - githubRepos: customGuruData.github_repos || [], + githubRepos: dataSources.results + .filter((s) => s.type.toLowerCase() === "github_repo") + .map((s) => ({ + id: String(s.id), // Convert id to string + url: s.url, + glob_pattern: s.github_glob_pattern || "", // Default to empty string + include_glob: s.github_include_glob === true // Ensure boolean + })), uploadedFiles: dataSources.results .filter((s) => s.type.toLowerCase() === "pdf") .map((s) => ({ name: s.title })), @@ -1729,6 +1766,16 @@ export default function NewGuru({ guruData, isProcessing }) { [form, jiraEditorContent, sources] ); + // Add this effect to reset GitHub dialog state when opened + useEffect(() => { + if (isGithubSidebarOpen) { + if (!isEditingRepo) { + setRepoUrl(""); + setGlobPattern(""); + } + } + }, [isGithubSidebarOpen, isEditingRepo]); + // <-- Add handler for Zendesk URLs --> const handleAddZendeskUrls = useCallback( (links) => { @@ -2021,6 +2068,125 @@ export default function NewGuru({ guruData, isProcessing }) { isZendeskSidebarOpen ]); // <-- Add isZendeskSidebarOpen dependency + // Add handler for GitHub repo URL changes + const handleRepoUrlChange = useCallback((value) => { + setRepoUrl(value); + }, []); + + // Add handler for GitHub glob pattern changes + const handleGlobPatternChange = useCallback((value) => { + setGlobPattern(value); + }, []); + + // Add handler for adding GitHub repositories + const handleAddGithubRepo = useCallback( + (repoUrl, globPattern, includeGlob, repoId) => { + if (!repoUrl) return; + + // Get current GitHub repos from form + const githubRepos = form.getValues("githubRepos") || []; + + if (isEditingRepo && repoId) { + // Update existing repo + const updatedRepos = githubRepos.map((repo) => { + if (repo.id === repoId || repo.url === repoUrl) { + return { + id: String(repo.id || repoId), // Ensure string + url: repoUrl, + glob_pattern: globPattern || "", // Ensure string + include_glob: !!includeGlob // Ensure boolean + }; + } + return repo; + }); + + // Update form state + form.setValue("githubRepos", updatedRepos); + + // Mark as dirty changes + setDirtyChanges((prev) => ({ + ...prev, + guruUpdated: true, // Mark form as changed + sources: [ + ...prev.sources.filter((s) => s.id !== repoId && s.url !== repoUrl), + { + id: String(repoId || `github-${Date.now()}`), // Ensure string + updated: true, + url: repoUrl, + type: "github_repo", + glob_pattern: globPattern || "", // Ensure string + include_glob: !!includeGlob // Ensure boolean + } + ] + })); + } else { + // Create new repo + const urlParts = (repoUrl || "").split("/"); + const newRepoId = `github-${Date.now()}`; + const newRepo = { + id: newRepoId, // Already a string + url: repoUrl, + glob_pattern: globPattern || "", // Ensure string + include_glob: !!includeGlob, // Ensure boolean + name: urlParts.slice(-2).join("/"), + type: "github_repo", + sources: "GitHub", + status: "NOT_PROCESSED", + error: null, + newAddedSource: true + }; + + // Update form state + form.setValue("githubRepos", [ + ...githubRepos, + { + id: newRepoId, + url: repoUrl, + glob_pattern: globPattern || "", // Ensure string + include_glob: !!includeGlob // Ensure boolean + } + ]); + + setSources((prevSources) => [...prevSources, newRepo]); + + // Mark as dirty changes + setDirtyChanges((prev) => ({ + ...prev, + guruUpdated: true, // Mark form as changed + sources: [ + ...prev.sources, + { + id: newRepoId, + added: true, + url: repoUrl, + type: "github_repo", + glob_pattern: globPattern || "", // Ensure string + include_glob: !!includeGlob // Ensure boolean + } + ] + })); + } + + // Reset editing state + setIsEditingRepo(false); + setEditingRepo(null); + + // Reset the input fields + setRepoUrl(""); + setGlobPattern(""); + }, + [form, setDirtyChanges, isEditingRepo] + ); + + // Add handler for editing GitHub repositories + const handleEditGithubGlob = useCallback((repo) => { + setIsEditingRepo(true); + setEditingRepo(repo); + setRepoUrl(repo.url); + setGlobPattern(repo.glob_pattern || ""); + setIsGithubSidebarOpen(true); + }, []); + // If still loading auth or no user, show loading state if (!isSelfHosted && (authLoading || (!user && !authLoading))) { return ( @@ -2059,8 +2225,19 @@ export default function NewGuru({ guruData, isProcessing }) { onSubmit={(e) => { e.preventDefault(); form.trigger().then((isValid) => { + console.log( + "Form valid:", + isValid, + "Errors:", + form.formState.errors + ); if (isValid) { form.handleSubmit(onSubmit)(e); + } else { + console.log( + "Form validation failed with values:", + form.getValues() + ); } }); }}> @@ -2091,7 +2268,7 @@ export default function NewGuru({ guruData, isProcessing }) { handleDeleteSource={handleDeleteSource} handleReindexSource={handleReindexSource} handlePrivacyBadgeClick={handlePrivacyBadgeClick} - setClickedSource={setClickedSource} // Pass needed setters + setClickedSource={setClickedSource} setIsYoutubeSidebarOpen={setIsYoutubeSidebarOpen} setIsJiraSidebarOpen={setIsJiraSidebarOpen} setIsUrlSidebarOpen={setIsUrlSidebarOpen} @@ -2101,6 +2278,10 @@ export default function NewGuru({ guruData, isProcessing }) { setIsZendeskSidebarOpen={setIsZendeskSidebarOpen} setShowZendeskIntegrationModal={setShowZendeskIntegrationModal} isEditMode={isEditMode} + setIsGithubSidebarOpen={setIsGithubSidebarOpen} + handleEditGithubGlob={handleEditGithubGlob} + setIsEditingRepo={setIsEditingRepo} + setEditingRepo={setEditingRepo} />
@@ -2309,6 +2490,20 @@ export default function NewGuru({ guruData, isProcessing }) { IntegrationIcon={ZendeskIcon} integrationId="zendesk" /> + {/* Add GitHubSourceDialog component */} + ); } diff --git a/src/gurubase-frontend/src/components/ui/switch.jsx b/src/gurubase-frontend/src/components/ui/switch.jsx new file mode 100644 index 00000000..a817d6fb --- /dev/null +++ b/src/gurubase-frontend/src/components/ui/switch.jsx @@ -0,0 +1,23 @@ +import * as React from "react"; +import * as SwitchPrimitives from "@radix-ui/react-switch"; + +import { cn } from "@/lib/utils"; + +const Switch = React.forwardRef(({ className, ...props }, ref) => ( + + + +)); +Switch.displayName = SwitchPrimitives.Root.displayName; + +export { Switch }; From 580f9f83779cb9c3e2699a31f7876fff82995f23 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Mon, 21 Apr 2025 14:44:44 +0000 Subject: [PATCH 08/81] Remove index_repo, github_repos fields from guru type and link github data source creation --- src/gurubase-backend/backend/core/admin.py | 2 +- .../backend/core/data_sources.py | 37 ++-- ...8_remove_gurutype_github_repos_and_more.py | 21 ++ src/gurubase-backend/backend/core/models.py | 19 +- .../core/services/data_source_service.py | 18 +- src/gurubase-backend/backend/core/signals.py | 73 ------- src/gurubase-backend/backend/core/tasks.py | 6 +- .../core/tests/test_guru_type_github.py | 194 ------------------ src/gurubase-backend/backend/core/utils.py | 3 +- src/gurubase-backend/backend/core/views.py | 44 ++-- .../src/components/NewGuru.jsx | 65 ++---- 11 files changed, 98 insertions(+), 384 deletions(-) create mode 100644 src/gurubase-backend/backend/core/migrations/0078_remove_gurutype_github_repos_and_more.py delete mode 100644 src/gurubase-backend/backend/core/tests/test_guru_type_github.py diff --git a/src/gurubase-backend/backend/core/admin.py b/src/gurubase-backend/backend/core/admin.py index e76b1bc8..dce3cb1b 100644 --- a/src/gurubase-backend/backend/core/admin.py +++ b/src/gurubase-backend/backend/core/admin.py @@ -165,7 +165,7 @@ def guru_type(self, obj): @admin.register(GuruType) class GuruTypeAdmin(admin.ModelAdmin): - list_display = ['id', 'slug', 'active', 'has_sitemap_added_questions', 'icon_url', 'stackoverflow_tag', 'domain_knowledge', 'colors', 'custom', 'maintainers_list', 'github_repos', 'text_embedding_model', 'code_embedding_model', 'date_created', 'date_updated', 'github_details_updated_date'] + list_display = ['id', 'slug', 'active', 'has_sitemap_added_questions', 'icon_url', 'stackoverflow_tag', 'domain_knowledge', 'colors', 'custom', 'maintainers_list', 'text_embedding_model', 'code_embedding_model', 'date_created', 'date_updated', 'github_details_updated_date'] search_fields = ['id', 'slug', 'icon_url', 'stackoverflow_tag', 'domain_knowledge', 'date_created', 'date_updated', 'maintainers__email'] list_filter = ('active', 'custom', 'has_sitemap_added_questions', 'text_embedding_model', 'code_embedding_model') ordering = ('-id',) diff --git a/src/gurubase-backend/backend/core/data_sources.py b/src/gurubase-backend/backend/core/data_sources.py index 690cf8d1..d3153777 100644 --- a/src/gurubase-backend/backend/core/data_sources.py +++ b/src/gurubase-backend/backend/core/data_sources.py @@ -516,38 +516,43 @@ def create(self, guru_type_object, ticket_url): 'message': str(e) } -class GitHubRepoStrategy(DataSourceStrategy): - def create(self, guru_type_object, repo_url): +class GitHubStrategy(DataSourceStrategy): + def create(self, guru_type_object, repo): + url = repo['url'] + glob_pattern = repo['glob_pattern'] + glob_include = repo['include_glob'] try: - # Create the data source data_source = DataSource.objects.create( type=DataSource.Type.GITHUB_REPO, guru_type=guru_type_object, - url=repo_url + url=url, + github_glob_pattern=glob_pattern, + github_glob_include=glob_include ) - - data_source.save() - return { - 'type': 'GITHUB_REPO', - 'url': repo_url, + 'type': 'GitHub', + 'url': url, 'status': 'success', 'id': data_source.id, - 'title': data_source.title + 'title': data_source.title, + 'github_glob_pattern': glob_pattern, + 'github_glob_include': glob_include } except DataSourceExists as e: return { - 'type': 'GITHUB_REPO', - 'url': repo_url, + 'type': 'GitHub', + 'url': url, 'status': 'exists', 'id': e.args[0]['id'], - 'title': e.args[0]['title'] + 'title': e.args[0]['title'], + 'github_glob_pattern': e.args[0]['github_glob_pattern'], + 'github_glob_include': e.args[0]['github_glob_include'] } except Exception as e: - logger.error(f'Error processing GitHub repository {repo_url}: {traceback.format_exc()}') + logger.error(f'Error processing GitHub repository {url}: {traceback.format_exc()}') return { - 'type': 'GITHUB_REPO', - 'url': repo_url, + 'type': 'GitHub', + 'url': url, 'status': 'error', 'message': str(e) } diff --git a/src/gurubase-backend/backend/core/migrations/0078_remove_gurutype_github_repos_and_more.py b/src/gurubase-backend/backend/core/migrations/0078_remove_gurutype_github_repos_and_more.py new file mode 100644 index 00000000..9e4bc45c --- /dev/null +++ b/src/gurubase-backend/backend/core/migrations/0078_remove_gurutype_github_repos_and_more.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.18 on 2025-04-21 14:44 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0077_settings_split_min_length_settings_split_overlap_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='gurutype', + name='github_repos', + ), + migrations.RemoveField( + model_name='gurutype', + name='index_repo', + ), + ] diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index 1ac145fd..b959870a 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -304,7 +304,6 @@ class EmbeddingModel(models.TextChoices): name = models.CharField(max_length=50, blank=True, null=True) maintainers = models.ManyToManyField(User, blank=True, related_name='maintained_guru_types') stackoverflow_tag = models.CharField(max_length=100, blank=True, null=True) - github_repos = models.JSONField(default=list, blank=True) github_details = models.JSONField(default=dict, blank=True, null=False) github_details_updated_date = models.DateTimeField(null=True, blank=True) colors = models.JSONField(default=dict, blank=True, null=False) @@ -319,7 +318,6 @@ class EmbeddingModel(models.TextChoices): typesense_collection_name = models.CharField(max_length=100, blank=True, null=True) domain_knowledge = models.TextField(default='', blank=True, null=True) has_sitemap_added_questions = models.BooleanField(default=False) - index_repo = models.BooleanField(default=True) # GitHub repository limits github_repo_count_limit = models.IntegerField(default=1) github_file_count_limit_per_repo_soft = models.IntegerField(default=1000) # Warning threshold @@ -384,19 +382,12 @@ def save(self, *args, **kwargs): if self.slug == '': raise ValidationError({'msg': 'Guru type name cannot be empty'}) - unique_github_repos = set(self.github_repos) - - if settings.ENV != 'selfhosted' and len(unique_github_repos) > self.github_repo_count_limit: - raise ValidationError({'msg': f'You have reached the maximum number ({self.github_repo_count_limit}) of GitHub repositories for this guru type.'}) - if settings.ENV == 'selfhosted': if self.text_embedding_model == GuruType.EmbeddingModel.IN_HOUSE: raise ValidationError({'msg': 'In-house embedding model is not allowed in selfhosted environment.'}) if self.code_embedding_model == GuruType.EmbeddingModel.IN_HOUSE: raise ValidationError({'msg': 'In-house embedding model is not allowed in selfhosted environment.'}) - self.github_repos = list(unique_github_repos) - super().save(*args, **kwargs) def generate_widget_id(self, domain_url): @@ -478,7 +469,7 @@ def ready(self): return non_processed_count == 0 and non_written_count == 0 - def check_datasource_limits(self, user, file=None, website_urls_count=0, youtube_urls_count=0, github_urls_count=0, jira_urls_count=0, zendesk_urls_count=0): + def check_datasource_limits(self, user, file=None, website_urls_count=0, youtube_urls_count=0, jira_urls_count=0, zendesk_urls_count=0, github_repos_count=0): """ Checks if adding a new datasource would exceed the limits for this guru type. Returns (bool, str) tuple - (is_allowed, error_message) @@ -527,6 +518,12 @@ def check_datasource_limits(self, user, file=None, website_urls_count=0, youtube guru_type=self, type=DataSource.Type.PDF ) + + github_repo_count = DataSource.objects.filter( + guru_type=self, + type=DataSource.Type.GITHUB_REPO + ).count() + total_pdf_mb = 0 for source in pdf_sources: if source.file: @@ -541,7 +538,7 @@ def check_datasource_limits(self, user, file=None, website_urls_count=0, youtube return False, f"YouTube video limit ({self.youtube_count_limit}) reached" # Check GitHub repo limit - if (github_count + github_urls_count) > self.github_repo_count_limit: + if (github_repo_count + github_repos_count) > self.github_repo_count_limit: return False, f"GitHub repository limit ({self.github_repo_count_limit}) reached" # Check Jira issue limit diff --git a/src/gurubase-backend/backend/core/services/data_source_service.py b/src/gurubase-backend/backend/core/services/data_source_service.py index d78413a5..aad693e1 100644 --- a/src/gurubase-backend/backend/core/services/data_source_service.py +++ b/src/gurubase-backend/backend/core/services/data_source_service.py @@ -2,7 +2,7 @@ from django.core.files.uploadedfile import UploadedFile from core.models import DataSource, GuruType, Integration -from core.data_sources import JiraStrategy, PDFStrategy, YouTubeStrategy, WebsiteStrategy, ZendeskStrategy +from core.data_sources import JiraStrategy, PDFStrategy, YouTubeStrategy, WebsiteStrategy, ZendeskStrategy, GitHubStrategy from core.utils import clean_data_source_urls from core.tasks import data_source_retrieval @@ -18,7 +18,8 @@ def __init__(self, guru_type_object: GuruType, user): 'youtube': YouTubeStrategy(), 'website': WebsiteStrategy(), 'jira': JiraStrategy(), - 'zendesk': ZendeskStrategy() + 'zendesk': ZendeskStrategy(), + 'github': GitHubStrategy() } def validate_pdf_files(self, pdf_files: List[UploadedFile], pdf_privacies: List[bool]) -> None: @@ -46,7 +47,7 @@ def validate_url_limits(self, urls: List[str], url_type: str) -> None: Args: urls: List of URLs to validate - url_type: Type of URLs ('website' or 'youtube' or 'jira' or 'zendesk') + url_type: Type of URLs ('website' or 'youtube' or 'jira' or 'zendesk' or 'github') Raises: ValueError: If validation fails @@ -57,7 +58,8 @@ def validate_url_limits(self, urls: List[str], url_type: str) -> None: website_urls_count=len(urls) if url_type == 'website' else 0, youtube_urls_count=len(urls) if url_type == 'youtube' else 0, jira_urls_count=len(urls) if url_type == 'jira' else 0, - zendesk_urls_count=len(urls) if url_type == 'zendesk' else 0 + zendesk_urls_count=len(urls) if url_type == 'zendesk' else 0, + github_repos_count=len(urls) if url_type == 'github' else 0 ) if not is_allowed: raise ValueError(error_msg) @@ -86,7 +88,8 @@ def create_data_sources( youtube_urls: List[str], website_urls: List[str], jira_urls: List[str], - zendesk_urls: List[str] + zendesk_urls: List[str], + github_repos: List[Dict[str, Any]] ) -> List[Dict[str, Any]]: """ Creates data sources of different types @@ -98,6 +101,7 @@ def create_data_sources( website_urls: List of website URLs jira_urls: List of Jira URLs zendesk_urls: List of Zendesk URLs + github_repos: List of GitHub repos Returns: List of created data source results """ @@ -127,6 +131,10 @@ def create_data_sources( for url in clean_zendesk_urls: results.append(self.strategies['zendesk'].create(self.guru_type_object, url)) + # Process GitHub repos + for repo in github_repos: + results.append(self.strategies['github'].create(self.guru_type_object, repo)) + # Trigger background task data_source_retrieval.delay(guru_type_slug=self.guru_type_object.slug) diff --git a/src/gurubase-backend/backend/core/signals.py b/src/gurubase-backend/backend/core/signals.py index deee844b..e3137747 100644 --- a/src/gurubase-backend/backend/core/signals.py +++ b/src/gurubase-backend/backend/core/signals.py @@ -781,79 +781,6 @@ def clear_github_file(sender, instance: GithubFile, **kwargs): logger.info(f"Clearing github file: {instance.id}") instance.delete_from_milvus() -@receiver(pre_save, sender=GuruType) -def validate_github_repos(sender, instance, **kwargs): - """Validate GitHub repo URLs format if provided""" - if instance.github_repos: - # Ensure github_repos is a list - if not isinstance(instance.github_repos, list): - raise ValidationError({'msg': 'github_repos must be a list'}) - - for repo_url in instance.github_repos: - # Normalize URL by removing trailing slash - repo_url = repo_url.rstrip('/') - - # Validate URL format - url_validator = URLValidator() - try: - url_validator(repo_url) - except ValidationError: - raise ValidationError({'msg': f'Invalid URL format: {repo_url}'}) - - # Ensure it's a GitHub URL - parsed_url = urlparse(repo_url) - if not parsed_url.netloc.lower() in ['github.com', 'www.github.com']: - raise ValidationError({'msg': f'URL must be a GitHub repository: {repo_url}'}) - - # Ensure it has a path (repository) - if not parsed_url.path or parsed_url.path == '/': - raise ValidationError({'msg': f'Invalid GitHub repository URL: {repo_url}'}) - - # Ensure URL has valid scheme - if parsed_url.scheme not in ['http', 'https']: - raise ValidationError({'msg': f'URL must start with http:// or https://: {repo_url}'}) - -@receiver(post_save, sender=GuruType) -def manage_github_repo_datasource(sender, instance, **kwargs): - from core.tasks import data_source_retrieval - """Manage DataSource based on github_repos and index_repo fields""" - - # Get all existing GitHub repo data sources for this guru type - existing_datasources = DataSource.objects.filter( - guru_type=instance, - type=DataSource.Type.GITHUB_REPO, - ) - - # Create a map of existing data sources by URL - existing_datasources_map = {ds.url: ds for ds in existing_datasources} - - # Case 1: URLs exist and index_repo is True - Create/Update DataSources - if instance.github_repos and instance.index_repo: - current_urls = set(instance.github_repos) - existing_urls = set(existing_datasources_map.keys()) - - # URLs to add - urls_to_add = current_urls - existing_urls - for url in urls_to_add: - DataSource.objects.create( - guru_type=instance, - type=DataSource.Type.GITHUB_REPO, - url=url, - status=DataSource.Status.NOT_PROCESSED - ) - - # URLs to remove - urls_to_remove = existing_urls - current_urls - for url in urls_to_remove: - existing_datasources_map[url].delete() - - if urls_to_add or urls_to_remove: - data_source_retrieval.delay(guru_type_slug=instance.slug, countdown=1) - - # Case 2: Either URLs list is empty or index_repo is False - Delete all DataSources - elif existing_datasources.exists(): - existing_datasources.delete() - @receiver(post_save, sender=DataSource) def data_source_retrieval_on_creation(sender, instance: DataSource, created, **kwargs): from core.tasks import data_source_retrieval diff --git a/src/gurubase-backend/backend/core/tasks.py b/src/gurubase-backend/backend/core/tasks.py index d3dca8e7..465c5589 100644 --- a/src/gurubase-backend/backend/core/tasks.py +++ b/src/gurubase-backend/backend/core/tasks.py @@ -1261,11 +1261,11 @@ def update_guru_type_details(): for guru_type in guru_types: # Update GitHub details if missing if not guru_type.github_details: - github_repos = guru_type.github_repos - if github_repos: + github_repo = DataSource.objects.filter(guru_type=guru_type, type=DataSource.Type.GITHUB_REPO).first() + if github_repo: try: # Only fetch details for the first GitHub repo - first_repo = github_repos[0] + first_repo = github_repo.url if not first_repo: continue try: diff --git a/src/gurubase-backend/backend/core/tests/test_guru_type_github.py b/src/gurubase-backend/backend/core/tests/test_guru_type_github.py deleted file mode 100644 index 4c6e5d72..00000000 --- a/src/gurubase-backend/backend/core/tests/test_guru_type_github.py +++ /dev/null @@ -1,194 +0,0 @@ -from django.test import TestCase -from django.core.exceptions import ValidationError -from core.models import GuruType, DataSource -from django.contrib.auth import get_user_model - -User = get_user_model() - -class GuruTypeGithubTests(TestCase): - def setUp(self): - self.user = User.objects.create(email='testuser@getanteon.com') - self.valid_guru_type_data = { - 'name': 'Test Guru', - 'slug': 'test-guru', - 'domain_knowledge': 'Test domain knowledge' - } - self.valid_github_urls = [ - 'https://github.com/username/repo1', - 'https://github.com/username/repo2' - ] - - def create_guru_type(self, **kwargs): - data = self.valid_guru_type_data.copy() - data.update(kwargs) - return GuruType.objects.create(**data) - - def test_initial_state(self): - """Test initial state with no GitHub settings""" - guru_type = self.create_guru_type() - self.assertTrue(guru_type.index_repo) - self.assertEqual(guru_type.github_repos, []) - self.assertEqual(DataSource.objects.count(), 0) - - def test_url_validation(self): - """Test GitHub URL validation""" - invalid_urls = [ - 'not-a-url', - 'http://not-github.com/user/repo', - 'https://github.com', # Missing repository path - 'ftp://github.com/user/repo', # Invalid protocol - ] - - for url in invalid_urls: - with self.assertRaises(ValidationError): - self.create_guru_type(github_repos=[url]) - - def test_add_urls_without_indexing(self): - """Test adding GitHub URLs without enabling indexing""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls, - index_repo=False - ) - self.assertEqual(guru_type.github_repos, self.valid_github_urls) - self.assertEqual(DataSource.objects.count(), 0) - - def test_add_urls_and_enable_indexing(self): - """Test adding URLs and enabling indexing""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls, - index_repo=True - ) - self.assertEqual(DataSource.objects.count(), 2) - for i, url in enumerate(self.valid_github_urls): - datasource = DataSource.objects.get(url=url) - self.assertEqual(datasource.type, DataSource.Type.GITHUB_REPO) - self.assertEqual(datasource.status, DataSource.Status.NOT_PROCESSED) - - def test_update_urls_while_indexed(self): - """Test updating URLs while indexing is enabled""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls[:1], # Start with one repo - index_repo=True - ) - self.assertEqual(DataSource.objects.count(), 1) - - # Update URLs to include both repos - guru_type.github_repos = self.valid_github_urls - guru_type.save() - - self.assertEqual(DataSource.objects.count(), 2) - for url in self.valid_github_urls: - datasource = DataSource.objects.get(url=url) - self.assertEqual(datasource.type, DataSource.Type.GITHUB_REPO) - - def test_disable_indexing(self): - """Test disabling indexing""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls, - index_repo=True - ) - self.assertEqual(DataSource.objects.count(), 2) - - # Disable indexing - guru_type.index_repo = False - guru_type.save() - - self.assertEqual(DataSource.objects.count(), 0) - - def test_remove_urls_while_indexed(self): - """Test removing URLs while indexed""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls, - index_repo=True - ) - self.assertEqual(DataSource.objects.count(), 2) - - # Remove URLs - guru_type.github_repos = [] - guru_type.index_repo = False - guru_type.save() - - self.assertEqual(DataSource.objects.count(), 0) - - def test_multiple_updates(self): - """Test multiple sequential updates""" - guru_type = self.create_guru_type() - - # Add first URL - guru_type.github_repos = [self.valid_github_urls[0]] - guru_type.save() - self.assertEqual(DataSource.objects.count(), 1) - - # Add second URL - guru_type.github_repos = self.valid_github_urls - guru_type.save() - self.assertEqual(DataSource.objects.count(), 2) - - # Remove first URL - guru_type.github_repos = [self.valid_github_urls[1]] - guru_type.save() - self.assertEqual(DataSource.objects.count(), 1) - self.assertEqual(DataSource.objects.first().url, self.valid_github_urls[1]) - - # Disable indexing - guru_type.index_repo = False - guru_type.save() - self.assertEqual(DataSource.objects.count(), 0) - - def test_concurrent_url_and_index_changes(self): - """Test changing URLs and index status simultaneously""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls, - index_repo=True - ) - new_url = 'https://github.com/username/another-repo' - - # Update both fields - guru_type.github_repos = [new_url] - guru_type.index_repo = False - guru_type.save() - - self.assertEqual(DataSource.objects.count(), 0) - - def test_reactivate_indexing(self): - """Test re-enabling indexing after it was disabled""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls, - index_repo=True - ) - - # Disable indexing - guru_type.index_repo = False - guru_type.save() - self.assertEqual(DataSource.objects.count(), 0) - - # Re-enable indexing - guru_type.index_repo = True - guru_type.save() - self.assertEqual(DataSource.objects.count(), 2) - for url in self.valid_github_urls: - self.assertTrue(DataSource.objects.filter(url=url).exists()) - - def test_repo_limit(self): - """Test repository count limit""" - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls[:1], - index_repo=True, - github_repo_count_limit=1 # Set limit to 1 - ) - - # Try to add a third repository - new_url = 'https://github.com/username/repo3' - guru_type.github_repos.append(new_url) - - with self.assertRaises(Exception): - guru_type.save() - - def test_repo_limit_with_multiple_urls(self): - """Test repository count limit with multiple URLs""" - with self.assertRaises(Exception): - guru_type = self.create_guru_type( - github_repos=self.valid_github_urls, - index_repo=True, - github_repo_count_limit=1 # Set limit to 1 - ) diff --git a/src/gurubase-backend/backend/core/utils.py b/src/gurubase-backend/backend/core/utils.py index 9c327c3f..9bbf5a36 100644 --- a/src/gurubase-backend/backend/core/utils.py +++ b/src/gurubase-backend/backend/core/utils.py @@ -1974,7 +1974,7 @@ def lighten_color(hex_color): # Convert to 6-digit hex and return return '#{:02x}{:02x}{:02x}'.format(*lightened_rgb) -def create_guru_type_object(slug, name, intro_text, domain_knowledge, icon_url, stackoverflow_tag, stackoverflow_source, github_repos, maintainer=None): +def create_guru_type_object(slug, name, intro_text, domain_knowledge, icon_url, stackoverflow_tag, stackoverflow_source, maintainer=None): base_color = get_dominant_color(icon_url) light_color = lighten_color(base_color) colors = {"base_color": base_color, "light_color": light_color} @@ -1991,7 +1991,6 @@ def create_guru_type_object(slug, name, intro_text, domain_knowledge, icon_url, ogimage_url=ogimage_url, stackoverflow_tag=stackoverflow_tag, stackoverflow_source=stackoverflow_source, - github_repos=github_repos, active=active ) if maintainer: diff --git a/src/gurubase-backend/backend/core/views.py b/src/gurubase-backend/backend/core/views.py index 6ccee595..8b079162 100644 --- a/src/gurubase-backend/backend/core/views.py +++ b/src/gurubase-backend/backend/core/views.py @@ -545,8 +545,6 @@ def my_gurus(request, guru_slug=None): 'icon': icon_url, 'icon_url': icon_url, 'domain_knowledge': guru.domain_knowledge, - 'github_repos': guru.github_repos, - 'index_repo': guru.index_repo, 'youtubeCount': 0, 'pdfCount': 0, 'websiteCount': 0, @@ -716,7 +714,7 @@ def get_guru_type_resources(request, guru_type): logger.error(f'Error while getting guru type resources: {e}', exc_info=True) return Response({'msg': str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) -def create_guru_type(name, domain_knowledge, intro_text, stackoverflow_tag, stackoverflow_source, github_repos, image, maintainer=None): +def create_guru_type(name, domain_knowledge, intro_text, stackoverflow_tag, stackoverflow_source, image, maintainer=None): """Utility function to handle guru type creation logic""" if not name or len(name) < 2: raise ValueError('Guru type name must be at least 2 characters') @@ -746,16 +744,10 @@ def create_guru_type(name, domain_knowledge, intro_text, stackoverflow_tag, stac if GuruType.objects.filter(slug=slug).exists(): raise ValueError(f'Guru type {slug} already exists') - try: - github_repos = json.loads(github_repos) - except Exception as e: - logger.error(f'Error while parsing github repos: {e}', exc_info=True) - raise ValueError('Github repos must be a list of strings') - try: guru_type_object = create_guru_type_object( slug, name, intro_text, domain_knowledge, icon_url, - stackoverflow_tag, stackoverflow_source, github_repos, maintainer + stackoverflow_tag, stackoverflow_source, maintainer ) except ValidationError as e: raise @@ -776,7 +768,6 @@ def create_guru_type_internal(request): intro_text=data.get('intro_text'), stackoverflow_tag=data.get('stackoverflow_tag', ""), stackoverflow_source=data.get('stackoverflow_source', False), - github_repos=data.get('github_repos', ""), image=request.FILES.get('icon_image'), ) return Response(GuruTypeSerializer(guru_type_object).data, status=status.HTTP_200_OK) @@ -804,7 +795,6 @@ def create_guru_type_frontend(request): intro_text=data.get('intro_text'), stackoverflow_tag=data.get('stackoverflow_tag', ""), stackoverflow_source=data.get('stackoverflow_source', False), - github_repos=data.get('github_repos', ""), image=request.FILES.get('icon_image'), maintainer=user ) @@ -853,6 +843,7 @@ def create_data_sources(request, guru_type): pdf_privacies = request.data.get('pdf_privacies', '[]') jira_urls = request.data.get('jira_urls', '[]') zendesk_urls = request.data.get('zendesk_urls', '[]') + github_repos = request.data.get('github_repos', '[]') try: if type(youtube_urls) == str: youtube_urls = json.loads(youtube_urls) @@ -866,6 +857,8 @@ def create_data_sources(request, guru_type): jira_urls = json.loads(jira_urls) if type(zendesk_urls) == str: zendesk_urls = json.loads(zendesk_urls) + if type(github_repos) == str: + github_repos = json.loads(github_repos) except Exception as e: logger.error(f'Error while parsing urls: {e}', exc_info=True) return Response({'msg': str(e)}, status=status.HTTP_400_BAD_REQUEST) @@ -874,7 +867,7 @@ def create_data_sources(request, guru_type): jira_urls = [] zendesk_urls = [] - if not pdf_files and not youtube_urls and not website_urls and not github_urls and not jira_urls and not zendesk_urls: + if not pdf_files and not youtube_urls and not website_urls and not github_repos and not jira_urls and not zendesk_urls: return Response({'msg': 'No data sources provided'}, status=status.HTTP_400_BAD_REQUEST) service = DataSourceService(guru_type_object, request.user) @@ -886,6 +879,7 @@ def create_data_sources(request, guru_type): service.validate_url_limits(website_urls, 'website') service.validate_url_limits(jira_urls, 'jira') service.validate_url_limits(zendesk_urls, 'zendesk') + service.validate_url_limits(github_repos, 'github') if jira_urls: service.validate_integration('jira') @@ -899,7 +893,8 @@ def create_data_sources(request, guru_type): youtube_urls=youtube_urls, website_urls=website_urls, jira_urls=jira_urls, - zendesk_urls=zendesk_urls + zendesk_urls=zendesk_urls, + github_repos=github_repos ) return Response({ @@ -1033,13 +1028,6 @@ def data_sources_frontend(request, guru_type): if not is_allowed: return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - # Check GitHub repo limits - github_urls = json.loads(request.data.get('github_urls', '[]')) - if github_urls: - is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, github_urls_count=len(github_urls)) - if not is_allowed: - return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - # Check Jira issue limits jira_urls = json.loads(request.data.get('jira_urls', '[]')) if jira_urls: @@ -1054,6 +1042,12 @@ def data_sources_frontend(request, guru_type): if not is_allowed: return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) + github_repos = json.loads(request.data.get('github_repos', '[]')) + if github_repos: + is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, github_repos_count=len(github_repos)) + if not is_allowed: + return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) + return create_data_sources(request, guru_type) elif request.method == 'DELETE': return delete_data_sources(request, guru_type) @@ -1094,14 +1088,7 @@ def update_guru_type(request, guru_type): data = request.data domain_knowledge = data.get('domain_knowledge', guru_type_object.prompt_map['domain_knowledge']) intro_text = data.get('intro_text', guru_type_object.intro_text) - github_repos = data.get('github_repos', guru_type_object.github_repos) - try: - github_repos = json.loads(github_repos) - except Exception as e: - logger.error(f'Error while parsing github repos: {e}', exc_info=True) - return Response({'msg': 'Github repos must be a list of strings'}, status=status.HTTP_400_BAD_REQUEST) - # Handle image upload if provided image = request.FILES.get('icon_image') if image: @@ -1120,7 +1107,6 @@ def update_guru_type(request, guru_type): # Update other fields guru_type_object.domain_knowledge = domain_knowledge guru_type_object.intro_text = intro_text - guru_type_object.github_repos = github_repos try: guru_type_object.save() except ValidationError as e: diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index 1339d01f..b6878efe 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -1170,12 +1170,6 @@ export default function NewGuru({ guruData, isProcessing }) { // Fetch updated guru data after create/update await fetchGuruData(guruSlug); - // If there are GitHub-related changes, mark for polling - if (hasGithubChanges) { - hasChanges = true; - await fetchDataSources(guruSlug); - } - // Handle deleted sources if (isEditMode && dirtyChanges.sources.some((source) => source.deleted)) { const deletedSourceIds = dirtyChanges.sources @@ -1346,16 +1340,19 @@ export default function NewGuru({ guruData, isProcessing }) { } // Add GitHub Repos from dirtyChanges (newly added via input) - const githubSources = dirtyChanges.sources - .filter( - (source) => - source.type === "github" && source.newAddedSource && !source.deleted - ) - .map((source) => source.url); + const githubSources = dirtyChanges.sources.filter( + (source) => + source.type === "github_repo" && + (source.newAddedSource || source.updated) && + !source.deleted + ); if (githubSources.length > 0) { // Add github_urls to the form data - newSourcesFormData.append("github_urls", JSON.stringify(githubSources)); + newSourcesFormData.append( + "github_repos", + JSON.stringify(githubSources) + ); hasNewSources = true; } @@ -1363,41 +1360,6 @@ export default function NewGuru({ guruData, isProcessing }) { hasChanges = true; setIsSourcesProcessing(true); - // Get all source IDs including those from the same domain groups - const processingIds = sources.reduce((ids, source) => { - if (source.domains) { - // For grouped domains (website/youtube), add all domain IDs - const domainUrls = source.domains.map((d) => d.url); - const matchingNewSources = dirtyChanges.sources.some( - (newSource) => - newSource.newAddedSource && - !newSource.deleted && - domainUrls.includes(newSource.url) - ); - - if (matchingNewSources) { - ids.push(source.id); // Add the group ID - source.domains.forEach((domain) => ids.push(domain.id)); // Add all domain IDs - } - } else if ( - dirtyChanges.sources.some( - (newSource) => - newSource.newAddedSource && - !newSource.deleted && - newSource.id === source.id - ) - ) { - // For PDFs, use the file name instead of the temporary ID - if (source.type?.toLowerCase() === "pdf") { - ids.push(source.name); - } else { - ids.push(source.id); - } - } - - return ids; - }, []); - const sourcesResponse = await addGuruSources( guruSlug, newSourcesFormData @@ -2106,13 +2068,14 @@ export default function NewGuru({ guruData, isProcessing }) { // Mark as dirty changes setDirtyChanges((prev) => ({ ...prev, - guruUpdated: true, // Mark form as changed sources: [ ...prev.sources.filter((s) => s.id !== repoId && s.url !== repoUrl), { id: String(repoId || `github-${Date.now()}`), // Ensure string updated: true, url: repoUrl, + newAddedSource: false, + deleted: false, type: "github_repo", glob_pattern: globPattern || "", // Ensure string include_glob: !!includeGlob // Ensure boolean @@ -2161,7 +2124,9 @@ export default function NewGuru({ guruData, isProcessing }) { url: repoUrl, type: "github_repo", glob_pattern: globPattern || "", // Ensure string - include_glob: !!includeGlob // Ensure boolean + include_glob: !!includeGlob, // Ensure boolean + newAddedSource: true, + deleted: false } ] })); From 84e9e8c8fd0af35303a03055c8eb8266856c4f4e Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 07:10:09 +0000 Subject: [PATCH 09/81] Improve github repo visuals --- .../NewEditGuru/SourcesTableSection.jsx | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index 11f58be9..27978dae 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -177,7 +177,7 @@ export function SourcesTableSection({ case "SUCCESS": badgeProps.icon = Check; badgeProps.iconColor = "text-green-700"; - badgeProps.text = `Indexed ${source.file_count} files`; + badgeProps.text = `${source.file_count} files`; badgeProps.className += " bg-green-50 text-green-700"; break; case "FAIL": @@ -195,18 +195,19 @@ export function SourcesTableSection({ break; } - const badgeElement = ( - - - {badgeProps.text} - - ); + const badgeElement = + source.status === "SUCCESS" || source.status === "FAIL" ? ( + + + {badgeProps.text} + + ) : null; return (
From 388e8af791c099a609b2d80faff41ad641420e11 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 07:36:30 +0000 Subject: [PATCH 10/81] Add last indexed date tooltip --- .../NewEditGuru/SourcesTableSection.jsx | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index 27978dae..8ada4c7c 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -209,6 +209,10 @@ export function SourcesTableSection({ ) : null; + const lastIndexedDate = source.last_reindex_date + ? new Date(source.last_reindex_date).toLocaleString() + : null; + return (
{source.status === "FAIL" && source.error ? ( @@ -242,6 +246,37 @@ export function SourcesTableSection({ + ) : source.last_reindex_date ? ( + + + + + {badgeElement} + + + +

+ Last indexed at {lastIndexedDate} +

+
+ + + ) : ( Date: Tue, 22 Apr 2025 07:37:00 +0000 Subject: [PATCH 11/81] Add comments --- src/gurubase-backend/backend/core/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index b959870a..cf6b6d5c 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -667,11 +667,11 @@ class Status(models.TextChoices): private = models.BooleanField(default=False) - last_reindex_date = models.DateTimeField(auto_now_add=True, null=True, blank=True) + last_reindex_date = models.DateTimeField(auto_now_add=True, null=True, blank=True) # Set when reindex is manually done reindex_count = models.IntegerField(default=0) scrape_tool = models.CharField(max_length=100, null=True, blank=True) - last_successful_index_date = models.DateTimeField(null=True, blank=True) + last_successful_index_date = models.DateTimeField(null=True, blank=True) # Set when github repo is indexed/reindexed successfully github_glob_include = models.BooleanField(default=True) github_glob_pattern = models.CharField(max_length=100, null=True, blank=True) From 3a517751e0fdc994118b1de5a6aaf2eb5936ee44 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 07:37:05 +0000 Subject: [PATCH 12/81] Return last indexed date --- src/gurubase-backend/backend/core/serializers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gurubase-backend/backend/core/serializers.py b/src/gurubase-backend/backend/core/serializers.py index 53daea0c..9a6692cb 100644 --- a/src/gurubase-backend/backend/core/serializers.py +++ b/src/gurubase-backend/backend/core/serializers.py @@ -73,7 +73,7 @@ class Meta: class DataSourceSerializer(serializers.ModelSerializer): class Meta: model = DataSource - exclude = ['file', 'doc_ids', 'content', 'guru_type'] + exclude = ['file', 'doc_ids', 'content', 'guru_type', 'last_reindex_date'] def to_representation(self, instance): from core.utils import format_github_repo_error @@ -92,6 +92,7 @@ def to_representation(self, instance): repr['file_count'] = GithubFile.objects.filter(data_source=instance, in_milvus=True).count() repr['glob_pattern'] = instance.github_glob_pattern repr['glob_include'] = instance.github_glob_include + repr['last_reindex_date'] = instance.last_successful_index_date return repr From f567d46110f17726b2d2d5c6343ed34b0f7b38c4 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 07:57:52 +0000 Subject: [PATCH 13/81] Fix glob update and remove unnecessary code --- src/gurubase-backend/backend/core/models.py | 5 - .../core/services/data_source_service.py | 113 ++++++++++++++++-- src/gurubase-backend/backend/core/views.py | 69 +++-------- .../NewEditGuru/GitHubSourceDialog.jsx | 2 +- .../src/components/NewGuru.jsx | 2 +- 5 files changed, 122 insertions(+), 69 deletions(-) diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index cf6b6d5c..3015ae62 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -498,11 +498,6 @@ def check_datasource_limits(self, user, file=None, website_urls_count=0, youtube type=DataSource.Type.YOUTUBE ).count() - github_count = DataSource.objects.filter( - guru_type=self, - type=DataSource.Type.GITHUB_REPO - ).count() - jira_count = DataSource.objects.filter( guru_type=self, type=DataSource.Type.JIRA diff --git a/src/gurubase-backend/backend/core/services/data_source_service.py b/src/gurubase-backend/backend/core/services/data_source_service.py index aad693e1..b993d4ac 100644 --- a/src/gurubase-backend/backend/core/services/data_source_service.py +++ b/src/gurubase-backend/backend/core/services/data_source_service.py @@ -41,25 +41,31 @@ def validate_pdf_files(self, pdf_files: List[UploadedFile], pdf_privacies: List[ if not is_allowed: raise ValueError(error_msg) - def validate_url_limits(self, urls: List[str], url_type: str) -> None: + def validate_url_limits(self, youtube_urls=None, website_urls=None, jira_urls=None, zendesk_urls=None) -> None: """ - Validates URL count limits + Validates URL count limits for multiple URL types Args: - urls: List of URLs to validate - url_type: Type of URLs ('website' or 'youtube' or 'jira' or 'zendesk' or 'github') + youtube_urls: List of YouTube URLs to validate + website_urls: List of website URLs to validate + jira_urls: List of Jira URLs to validate + zendesk_urls: List of Zendesk URLs to validate Raises: ValueError: If validation fails """ - if urls: + youtube_urls = youtube_urls or [] + website_urls = website_urls or [] + jira_urls = jira_urls or [] + zendesk_urls = zendesk_urls or [] + + if any([youtube_urls, website_urls, jira_urls, zendesk_urls]): is_allowed, error_msg = self.guru_type_object.check_datasource_limits( self.user, - website_urls_count=len(urls) if url_type == 'website' else 0, - youtube_urls_count=len(urls) if url_type == 'youtube' else 0, - jira_urls_count=len(urls) if url_type == 'jira' else 0, - zendesk_urls_count=len(urls) if url_type == 'zendesk' else 0, - github_repos_count=len(urls) if url_type == 'github' else 0 + website_urls_count=len(website_urls), + youtube_urls_count=len(youtube_urls), + jira_urls_count=len(jira_urls), + zendesk_urls_count=len(zendesk_urls) ) if not is_allowed: raise ValueError(error_msg) @@ -81,6 +87,93 @@ def validate_integration(self, type: str) -> None: if not zendesk_integration: raise ValueError('Zendesk integration not found') + def validate_github_repos_limits(self, github_repos: List[Dict[str, Any]]) -> None: + """ + Validates GitHub repos count limits for new repos only + + Args: + github_repos: List of GitHub repos to validate - should only contain new repos + + Raises: + ValueError: If validation fails + """ + if github_repos: + is_allowed, error_msg = self.guru_type_object.check_datasource_limits( + self.user, + github_repos_count=len(github_repos) + ) + if not is_allowed: + raise ValueError(error_msg) + + def identify_new_github_repos(self, github_repos: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Identify GitHub repos that don't exist in the system yet + + Args: + github_repos: List of GitHub repo dictionaries + + Returns: + List of GitHub repos that don't exist and need to be created + """ + new_repos = [] + + for repo in github_repos: + repo_url = repo.get('url', '') + + # Check if this repo already exists in this guru type + existing_repo = DataSource.objects.filter( + guru_type=self.guru_type_object, + type=DataSource.Type.GITHUB_REPO, + url=repo_url + ).exists() + + if not existing_repo: + # Add to list of new repos to create + new_repos.append(repo) + + return new_repos + + def update_existing_github_repos(self, github_repos: List[Dict[str, Any]]) -> None: + """ + Update glob patterns for existing GitHub repos + + Args: + github_repos: List of GitHub repo dictionaries with 'url' and 'glob_patterns' fields + """ + for repo in github_repos: + repo_url = repo.get('url', '') + + # Check if this repo already exists in this guru type + existing_repo = DataSource.objects.filter( + guru_type=self.guru_type_object, + type=DataSource.Type.GITHUB_REPO, + url=repo_url + ).first() + + if existing_repo: + # Update glob patterns + existing_repo.github_glob_pattern = repo['glob_pattern'] + existing_repo.github_glob_include = repo['include_glob'] + existing_repo.save() + + def process_github_repos(self, github_repos: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Process GitHub repos, updating existing ones and returning new ones to be created + + Args: + github_repos: List of GitHub repo dictionaries with 'url' and 'glob_patterns' fields + + Returns: + List of GitHub repos that need to be created (didn't exist before) + """ + # Identify new repos + new_repos = self.identify_new_github_repos(github_repos) + + # Update existing repos + self.update_existing_github_repos(github_repos) + + return new_repos + def create_data_sources( self, pdf_files: List[UploadedFile], diff --git a/src/gurubase-backend/backend/core/views.py b/src/gurubase-backend/backend/core/views.py index 8b079162..5a41e15e 100644 --- a/src/gurubase-backend/backend/core/views.py +++ b/src/gurubase-backend/backend/core/views.py @@ -874,18 +874,29 @@ def create_data_sources(request, guru_type): try: # Validate limits + service.validate_pdf_files(pdf_files, pdf_privacies) - service.validate_url_limits(youtube_urls, 'youtube') - service.validate_url_limits(website_urls, 'website') - service.validate_url_limits(jira_urls, 'jira') - service.validate_url_limits(zendesk_urls, 'zendesk') - service.validate_url_limits(github_repos, 'github') + service.validate_url_limits( + youtube_urls=youtube_urls, + website_urls=website_urls, + jira_urls=jira_urls, + zendesk_urls=zendesk_urls + ) + + # First identify new GitHub repos without modifying anything + new_github_repos = service.identify_new_github_repos(github_repos) + + # Validate limits for new GitHub repos + service.validate_github_repos_limits(new_github_repos) if jira_urls: service.validate_integration('jira') if zendesk_urls: service.validate_integration('zendesk') + # Process GitHub repos - this will update existing ones + service.update_existing_github_repos(github_repos) + # Create data sources results = service.create_data_sources( pdf_files=pdf_files, @@ -894,7 +905,7 @@ def create_data_sources(request, guru_type): website_urls=website_urls, jira_urls=jira_urls, zendesk_urls=zendesk_urls, - github_repos=github_repos + github_repos=new_github_repos ) return Response({ @@ -1002,52 +1013,6 @@ def data_sources_frontend(request, guru_type): validate_guru_type(guru_type, only_active=False) if request.method == 'POST': - if settings.ENV == 'selfhosted': - user = None - else: - user = get_auth0_user(request.auth0_id) - - # Check PDF file limits - pdf_files = request.FILES.getlist('pdf_files', []) - for pdf_file in pdf_files: - is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, file=pdf_file) - if not is_allowed: - return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - - # Check website limits - website_urls = json.loads(request.data.get('website_urls', '[]')) - if website_urls: - is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, website_urls_count=len(website_urls)) - if not is_allowed: - return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - - # Check YouTube limits - youtube_urls = json.loads(request.data.get('youtube_urls', '[]')) - if youtube_urls: - is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, youtube_urls_count=len(youtube_urls)) - if not is_allowed: - return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - - # Check Jira issue limits - jira_urls = json.loads(request.data.get('jira_urls', '[]')) - if jira_urls: - is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, jira_urls_count=len(jira_urls)) - if not is_allowed: - return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - - # Check Zendesk ticket limits - zendesk_urls = json.loads(request.data.get('zendesk_urls', '[]')) - if zendesk_urls: - is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, zendesk_urls_count=len(zendesk_urls)) - if not is_allowed: - return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - - github_repos = json.loads(request.data.get('github_repos', '[]')) - if github_repos: - is_allowed, error_msg = guru_type_obj.check_datasource_limits(user, github_repos_count=len(github_repos)) - if not is_allowed: - return Response({'msg': error_msg}, status=status.HTTP_400_BAD_REQUEST) - return create_data_sources(request, guru_type) elif request.method == 'DELETE': return delete_data_sources(request, guru_type) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx index 47d0ff7c..6b3dcc62 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx @@ -66,7 +66,7 @@ const GitHubSourceDialog = React.memo( React.useEffect(() => { if (isEditingRepo && editingRepo) { // If we're in edit mode, set the includeGlobPattern based on the repo settings - setIncludeGlobPattern(editingRepo.include_glob || false); + setIncludeGlobPattern(!!editingRepo.include_glob); } }, [isEditingRepo, editingRepo]); diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index b6878efe..4ed65cf4 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -532,7 +532,7 @@ export default function NewGuru({ guruData, isProcessing }) { const urlParts = (source.url || "").split("/"); baseSource.name = urlParts.slice(-2).join("/"); // e.g., username/repo baseSource.glob_pattern = source.github_glob_pattern || ""; - baseSource.include_glob = source.github_include_glob || false; + baseSource.include_glob = source.github_glob_include || false; } return baseSource; From 7e41114afad19210f2bc50a84673d524a603d6da Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 08:02:55 +0000 Subject: [PATCH 14/81] Add existing repo check --- .../src/components/NewGuru.jsx | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index 4ed65cf4..2b121c44 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -2083,6 +2083,27 @@ export default function NewGuru({ guruData, isProcessing }) { ] })); } else { + // Check if the repository URL already exists in sources + const repoExists = sources.some( + (source) => + source.type === "github_repo" && + !source.deleted && + source.url.toLowerCase() === repoUrl.toLowerCase() + ); + + if (repoExists) { + // Skip adding if the repository already exists + CustomToast({ + message: "This GitHub repository has already been added.", + variant: "info" + }); + + // Reset the input fields + setRepoUrl(""); + setGlobPattern(""); + return; + } + // Create new repo const urlParts = (repoUrl || "").split("/"); const newRepoId = `github-${Date.now()}`; @@ -2140,7 +2161,7 @@ export default function NewGuru({ guruData, isProcessing }) { setRepoUrl(""); setGlobPattern(""); }, - [form, setDirtyChanges, isEditingRepo] + [form, setDirtyChanges, isEditingRepo, sources] ); // Add handler for editing GitHub repositories From a05a8181e15a5da19a17e133b17707f784d72d43 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 08:05:31 +0000 Subject: [PATCH 15/81] Add tooltip --- .../src/components/NewEditGuru/GitHubSourceDialog.jsx | 6 +++++- .../src/components/ui/header-tooltip.jsx | 11 +++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx index 6b3dcc62..fc6480cd 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx @@ -8,6 +8,7 @@ import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { isValidUrl } from "@/utils/common"; import { Checkbox } from "@/components/ui/checkbox"; +import { HeaderTooltip } from "@/components/ui/header-tooltip"; const Dialog = DialogPrimitive.Root; const DialogPortal = DialogPrimitive.Portal; @@ -195,7 +196,10 @@ const GitHubSourceDialog = React.memo(
- +
+ + +
{ +export const HeaderTooltip = ({ text, html }) => { const [isOpen, setIsOpen] = useState(false); useEffect(() => { @@ -54,7 +54,14 @@ export const HeaderTooltip = ({ text }) => {
-

{text}

+ {html ? ( +
+ ) : ( +

{text}

+ )} From 681053dcc9a8db2ad0659c0b3013b96aaa786260 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 08:09:36 +0000 Subject: [PATCH 16/81] Improve typography --- .../src/components/NewEditGuru/GitHubSourceDialog.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx index fc6480cd..6c4d3ab0 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/GitHubSourceDialog.jsx @@ -219,7 +219,7 @@ const GitHubSourceDialog = React.memo( />
From a93f16d2fa6273414f037f862831f00579a848ba Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 08:26:40 +0000 Subject: [PATCH 17/81] Add reindex support for github data --- src/gurubase-backend/backend/core/models.py | 15 ++++-- .../core/services/data_source_service.py | 4 ++ src/gurubase-backend/backend/core/tasks.py | 52 ++++++++++++++----- src/gurubase-backend/backend/core/views.py | 7 ++- 4 files changed, 60 insertions(+), 18 deletions(-) diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index 3015ae62..4ce55789 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -1088,9 +1088,18 @@ def reindex(self): if self.type == DataSource.Type.GITHUB_REPO: self.content = '' - - self.save() - self.delete_from_milvus() + self.save() + + # Import here to avoid circular imports + from core.tasks import update_github_repositories + # Delay the task to process this specific GitHub repository + update_github_repositories.delay( + guru_type_slug=self.guru_type.slug, + repo_url=self.url + ) + else: + self.save() + self.delete_from_milvus() class FeaturedDataSource(models.Model): diff --git a/src/gurubase-backend/backend/core/services/data_source_service.py b/src/gurubase-backend/backend/core/services/data_source_service.py index b993d4ac..d4d90d4d 100644 --- a/src/gurubase-backend/backend/core/services/data_source_service.py +++ b/src/gurubase-backend/backend/core/services/data_source_service.py @@ -140,6 +140,7 @@ def update_existing_github_repos(self, github_repos: List[Dict[str, Any]]) -> No Args: github_repos: List of GitHub repo dictionaries with 'url' and 'glob_patterns' fields """ + updated_repos = [] for repo in github_repos: repo_url = repo.get('url', '') @@ -155,6 +156,9 @@ def update_existing_github_repos(self, github_repos: List[Dict[str, Any]]) -> No existing_repo.github_glob_pattern = repo['glob_pattern'] existing_repo.github_glob_include = repo['include_glob'] existing_repo.save() + updated_repos.append(existing_repo) + + return updated_repos def process_github_repos(self, github_repos: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ diff --git a/src/gurubase-backend/backend/core/tasks.py b/src/gurubase-backend/backend/core/tasks.py index 465c5589..4c2e1169 100644 --- a/src/gurubase-backend/backend/core/tasks.py +++ b/src/gurubase-backend/backend/core/tasks.py @@ -1452,7 +1452,7 @@ def update_guru_type_sitemap_status(): logger.info("Completed updating GuruType sitemap status") @shared_task -def update_github_repositories(successful_repos=True): +def update_github_repositories(successful_repos=True, guru_type_slug=None, repo_url=None): """ Periodic task to update GitHub repositories: 1. For each successfully synced GitHub repo data source, clone the repo again @@ -1460,9 +1460,14 @@ def update_github_repositories(successful_repos=True): 3. Update modified files in both DB and Milvus Uses per-guru-type locking to allow parallel processing of different guru types. + + Args: + successful_repos (bool): If True, process repos with SUCCESS status, otherwise process FAILED repos + guru_type_slug (str, optional): If provided, process only repos for this guru type + repo_url (str, optional): If provided with guru_type_slug, process only this specific repo """ - def process_guru_type(guru_type): + def process_guru_type(guru_type, specific_repo_url=None): from core.github.data_source_handler import clone_repository, read_repository from django.db import transaction import os @@ -1471,14 +1476,23 @@ def process_guru_type(guru_type): # Get all GitHub repo data sources for this guru type status = DataSource.Status.SUCCESS if successful_repos else DataSource.Status.FAIL - data_sources = DataSource.objects.filter( + data_sources_query = DataSource.objects.filter( type=DataSource.Type.GITHUB_REPO, - status=status, guru_type=guru_type ) + # If specific repo URL is provided, filter to that repo only + if specific_repo_url: + data_sources_query = data_sources_query.filter(url=specific_repo_url) + else: + data_sources_query = data_sources_query.filter(status=status) + + data_sources = data_sources_query + for data_source in data_sources: try: + data_source.status = DataSource.Status.NOT_PROCESSED + data_source.save() # Clone the repository temp_dir, repo = clone_repository(data_source.url) @@ -1651,18 +1665,28 @@ def process_guru_type(guru_type): logger.info("Starting GitHub repositories update task") - # Get unique guru types that have GitHub repo data sources - guru_types = GuruType.objects.filter( - datasource__type=DataSource.Type.GITHUB_REPO, - # datasource__status=DataSource.Status.SUCCESS, - ).distinct() - - for guru_type in guru_types: + # If specific guru type is provided + if guru_type_slug: try: - process_guru_type(guru_type=guru_type) + guru_type = GuruType.objects.get(slug=guru_type_slug) + process_guru_type(guru_type=guru_type, specific_repo_url=repo_url) + except GuruType.DoesNotExist: + logger.error(f"Error: Guru type with slug {guru_type_slug} does not exist") except Exception as e: - logger.error(f"Error processing guru type {guru_type.slug}: {traceback.format_exc()}") - continue + logger.error(f"Error processing guru type {guru_type_slug}: {traceback.format_exc()}") + else: + # Get unique guru types that have GitHub repo data sources + guru_types = GuruType.objects.filter( + datasource__type=DataSource.Type.GITHUB_REPO, + # datasource__status=DataSource.Status.SUCCESS, + ).distinct() + + for guru_type in guru_types: + try: + process_guru_type(guru_type=guru_type) + except Exception as e: + logger.error(f"Error processing guru type {guru_type.slug}: {traceback.format_exc()}") + continue logger.info("Completed GitHub repositories update task") diff --git a/src/gurubase-backend/backend/core/views.py b/src/gurubase-backend/backend/core/views.py index 5a41e15e..9a46d4c7 100644 --- a/src/gurubase-backend/backend/core/views.py +++ b/src/gurubase-backend/backend/core/views.py @@ -895,7 +895,12 @@ def create_data_sources(request, guru_type): service.validate_integration('zendesk') # Process GitHub repos - this will update existing ones - service.update_existing_github_repos(github_repos) + updated_repos = service.update_existing_github_repos(github_repos) + + # Reindex any updated repos + if updated_repos: + for repo in updated_repos: + repo.reindex() # Create data sources results = service.create_data_sources( From 724c6490e2b41bdf14e03b146c2cf773ac84c648 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Tue, 22 Apr 2025 08:34:04 +0000 Subject: [PATCH 18/81] Fix pending status when github repo is added --- src/gurubase-frontend/src/components/NewGuru.jsx | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index 2b121c44..aea78684 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -984,6 +984,9 @@ export default function NewGuru({ guruData, isProcessing }) { const zendeskCount = sources.filter( (s) => s.type.toLowerCase() === "zendesk" && !s.deleted ).length; + const githubRepoCount = sources.filter( + (s) => s.type.toLowerCase() === "github_repo" && !s.deleted + ).length; // Calculate PDF size let currentPdfSize = sources @@ -1016,6 +1019,10 @@ export default function NewGuru({ guruData, isProcessing }) { customGuruData.zendesk_limit === undefined ? Infinity : customGuruData.zendesk_limit; + const githubRepoLimit = + customGuruData.github_repo_limit === undefined + ? Infinity + : customGuruData.github_repo_limit; const pdfSizeLimitMb = customGuruData.pdf_size_limit_mb === undefined ? Infinity @@ -1062,6 +1069,14 @@ export default function NewGuru({ guruData, isProcessing }) { return false; } + if (githubRepoCount > githubRepoLimit) { + CustomToast({ + message: `You have exceeded the GitHub repository limit (${githubRepoLimit}).`, + variant: "error" + }); + return false; + } + return true; }; @@ -2136,7 +2151,6 @@ export default function NewGuru({ guruData, isProcessing }) { // Mark as dirty changes setDirtyChanges((prev) => ({ ...prev, - guruUpdated: true, // Mark form as changed sources: [ ...prev.sources, { From 45a41ac9e1ffa6108c2ff4eda58defd5b6036273 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 07:10:27 +0000 Subject: [PATCH 19/81] Fix NewGuru --- src/gurubase-frontend/src/components/NewGuru.jsx | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index 3d6ecf89..6d6707b2 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -2494,12 +2494,6 @@ export default function NewGuru({ guruData, isProcessing }) { setShowConfluenceIntegrationModal={ setShowConfluenceIntegrationModal } - confluenceIntegration={confluenceIntegration} - isLoadingConfluenceIntegration={isLoadingConfluenceIntegration} - setIsConfluenceSidebarOpen={setIsConfluenceSidebarOpen} - setShowConfluenceIntegrationModal={ - setShowConfluenceIntegrationModal - } isEditMode={isEditMode} setIsGithubSidebarOpen={setIsGithubSidebarOpen} handleEditGithubGlob={handleEditGithubGlob} From 95c37ace40c32bacabc68379837bfe0ae623f1a6 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 07:15:32 +0000 Subject: [PATCH 20/81] Fix migrations --- ...nd_more.py => 0080_remove_gurutype_github_repos_and_more.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/gurubase-backend/backend/core/migrations/{0078_remove_gurutype_github_repos_and_more.py => 0080_remove_gurutype_github_repos_and_more.py} (82%) diff --git a/src/gurubase-backend/backend/core/migrations/0078_remove_gurutype_github_repos_and_more.py b/src/gurubase-backend/backend/core/migrations/0080_remove_gurutype_github_repos_and_more.py similarity index 82% rename from src/gurubase-backend/backend/core/migrations/0078_remove_gurutype_github_repos_and_more.py rename to src/gurubase-backend/backend/core/migrations/0080_remove_gurutype_github_repos_and_more.py index 9e4bc45c..390defda 100644 --- a/src/gurubase-backend/backend/core/migrations/0078_remove_gurutype_github_repos_and_more.py +++ b/src/gurubase-backend/backend/core/migrations/0080_remove_gurutype_github_repos_and_more.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('core', '0077_settings_split_min_length_settings_split_overlap_and_more'), + ('core', '0079_gurutype_custom_instruction_prompt'), ] operations = [ From 73179ade5e2f2ae76867c0e40518f45c115ea912 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 07:15:37 +0000 Subject: [PATCH 21/81] Fix confluence --- .../backend/core/services/data_source_service.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gurubase-backend/backend/core/services/data_source_service.py b/src/gurubase-backend/backend/core/services/data_source_service.py index 06d7944d..a591538f 100644 --- a/src/gurubase-backend/backend/core/services/data_source_service.py +++ b/src/gurubase-backend/backend/core/services/data_source_service.py @@ -42,7 +42,7 @@ def validate_pdf_files(self, pdf_files: List[UploadedFile], pdf_privacies: List[ if not is_allowed: raise ValueError(error_msg) - def validate_url_limits(self, youtube_urls=None, website_urls=None, jira_urls=None, zendesk_urls=None) -> None: + def validate_url_limits(self, youtube_urls=None, website_urls=None, jira_urls=None, zendesk_urls=None, confluence_urls=None) -> None: """ Validates URL count limits for multiple URL types @@ -59,8 +59,8 @@ def validate_url_limits(self, youtube_urls=None, website_urls=None, jira_urls=No website_urls = website_urls or [] jira_urls = jira_urls or [] zendesk_urls = zendesk_urls or [] - - if any([youtube_urls, website_urls, jira_urls, zendesk_urls]): + confluence_urls = confluence_urls or [] + if any([youtube_urls, website_urls, jira_urls, zendesk_urls, confluence_urls]): is_allowed, error_msg = self.guru_type_object.check_datasource_limits( self.user, website_urls_count=len(website_urls), From 73d492a2500473a0c99dae1e2b9a46680c67b36c Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 07:17:18 +0000 Subject: [PATCH 22/81] Fix names in source table --- .../src/components/NewEditGuru/SourcesTableSection.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index f5286dce..990a0116 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -590,7 +590,7 @@ export function SourcesTableSection({
) : ( - {source.name || source.domain} + {source.domain || source.name} {source.type === "pdf" && source.size && ( ({formatFileSize(source.size)}) From a904d50e0051cb9caedd9786185daaca525c97e8 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 08:32:35 +0000 Subject: [PATCH 23/81] Fix github repo creation --- .../backend/core/services/data_source_service.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gurubase-backend/backend/core/services/data_source_service.py b/src/gurubase-backend/backend/core/services/data_source_service.py index a591538f..aa51feda 100644 --- a/src/gurubase-backend/backend/core/services/data_source_service.py +++ b/src/gurubase-backend/backend/core/services/data_source_service.py @@ -337,10 +337,6 @@ def create_data_sources( for repo in github_repos: results.append(self.strategies['github'].create(self.guru_type_object, repo)) - # Process GitHub repos - for repo in github_repos: - results.append(self.strategies['github'].create(self.guru_type_object, repo)) - # Trigger background task data_source_retrieval.delay(guru_type_slug=self.guru_type_object.slug) From bae83940e44b770ffb26f7832ad2aaa525c180dd Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 08:36:21 +0000 Subject: [PATCH 24/81] Fix polling state on initial guru edit load --- src/gurubase-backend/backend/core/views.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/gurubase-backend/backend/core/views.py b/src/gurubase-backend/backend/core/views.py index c647b6d4..15133714 100644 --- a/src/gurubase-backend/backend/core/views.py +++ b/src/gurubase-backend/backend/core/views.py @@ -554,7 +554,8 @@ def my_gurus(request, guru_slug=None): 'jira_limit': guru.jira_count_limit, 'zendesk_limit': guru.zendesk_count_limit, 'widget_ids': WidgetIdSerializer(widget_ids, many=True).data, - 'github_repo_limit': guru.github_repo_count_limit + 'github_repo_limit': guru.github_repo_count_limit, + 'ready': guru.ready }) if guru_slug: @@ -901,6 +902,14 @@ def create_data_sources(request, guru_type): if confluence_urls: service.validate_integration('confluence') + # Process GitHub repos - this will update existing ones + updated_repos = service.update_existing_github_repos(github_repos) + + # Reindex any updated repos + if updated_repos: + for repo in updated_repos: + repo.reindex() + # Create data sources results = service.create_data_sources( pdf_files=pdf_files, From 830ab12a5ab95ef6804472327299a43e6713601a Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 09:07:00 +0000 Subject: [PATCH 25/81] Turn data source creation to dropdown --- .../components/NewEditGuru/SourceActions.jsx | 190 ++++++++++-------- 1 file changed, 105 insertions(+), 85 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx index 94546f11..07687a41 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx @@ -1,6 +1,6 @@ import React from "react"; import { Button } from "@/components/ui/button"; -import { LoaderCircle } from "lucide-react"; +import { LoaderCircle, ChevronDown } from "lucide-react"; import { getAllSourceTypeConfigs } from "@/config/sourceTypes"; import { Tooltip, @@ -8,9 +8,15 @@ import { TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"; +import { + DropdownMenu, + DropdownMenuTrigger, + DropdownMenuContent, + DropdownMenuItem +} from "@/components/ui/dropdown-menu"; import { cn } from "@/lib/utils"; -// Updated component to dynamically render action buttons based on config +// Updated component to render actions in a dropdown menu export const SourceActions = ({ isProcessing, isSourcesProcessing, @@ -23,96 +29,110 @@ export const SourceActions = ({ const configs = getAllSourceTypeConfigs(); return ( -
- {configs.map((config) => { - const ActionIcon = config.actionButtonIcon; - const isLoading = - config.requiresIntegrationCheck && - loadingStates[config.integrationLoadingProp]; - const baseDisabled = isProcessing || isSourcesProcessing || isLoading; + + + + + + {configs.map((config) => { + const ActionIcon = config.actionButtonIcon; + const isLoading = + config.requiresIntegrationCheck && + loadingStates[config.integrationLoadingProp]; + const baseDisabled = isProcessing || isSourcesProcessing || isLoading; - // Determine the correct handler: prioritize specific actionHandlerName, fallback to id - const handlerKey = config.actionHandlerName || config.id; - const onClickHandler = actionHandlers[handlerKey]; + // Determine the correct handler: prioritize specific actionHandlerName, fallback to id + const handlerKey = config.actionHandlerName || config.id; + const onClickHandler = actionHandlers[handlerKey]; - // Optionally log a warning if a handler is missing for a configured type - if (!onClickHandler && !config.requiresIntegrationCheck) { - // Don't warn for integration types here, as the check is in the handler - } + // Optionally log a warning if a handler is missing for a configured type + if (!onClickHandler && !config.requiresIntegrationCheck) { + // Don't warn for integration types here, as the check is in the handler + } - // Jira (and potentially future integration types) needs a special check within its handler, - // but we use the handler name from the config to call it. - // The loading state is used to show the spinner. + // Special disabling logic for Jira in 'create' mode + const isJiraInCreateMode = config.id === "jira" && !isEditMode; + const isZendeskInCreateMode = config.id === "zendesk" && !isEditMode; + const isConfluenceInCreateMode = + config.id === "confluence" && !isEditMode; + const finalDisabled = + baseDisabled || + isJiraInCreateMode || + isZendeskInCreateMode || + isConfluenceInCreateMode; - // Special disabling logic for Jira in 'create' mode - const isJiraInCreateMode = config.id === "jira" && !isEditMode; - const isZendeskInCreateMode = config.id === "zendesk" && !isEditMode; - const finalDisabled = - baseDisabled || isJiraInCreateMode || isZendeskInCreateMode; + const menuItem = ( + + {isLoading ? ( + + ) : ( + + )} + {config.actionButtonText} + + ); - const button = ( - - ); + // Conditionally wrap with Tooltip in create mode + if (isJiraInCreateMode) { + return ( + + + +
{menuItem}
+
+ +

Jira integration requires an existing Guru.

+
+
+
+ ); + } - // Conditionally wrap Jira button with Tooltip in create mode - if (isJiraInCreateMode) { - return ( - - - - {/* Wrap the button in a span for TooltipTrigger when disabled */} - {button} - - -

Jira integration requires an existing Guru.

-
-
-
- ); - } + if (isZendeskInCreateMode) { + return ( + + + +
{menuItem}
+
+ +

Zendesk integration requires an existing Guru.

+
+
+
+ ); + } - if (isZendeskInCreateMode) { - return ( - - - - {button} - - -

Zendesk integration requires an existing Guru.

-
-
-
- ); - } + if (isConfluenceInCreateMode) { + return ( + + + +
{menuItem}
+
+ +

Confluence integration requires an existing Guru.

+
+
+
+ ); + } - // Return the button directly for other source types or when in edit mode - return button; - })} -
+ // Return the menu item directly for other source types or when in edit mode + return menuItem; + })} + + ); }; From 6e4188c7d9c5150df55a03a54f2322b365384d56 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 09:07:08 +0000 Subject: [PATCH 26/81] Fix data source creation dropdown layout --- .../src/components/NewEditGuru/SourcesTableSection.jsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index 990a0116..ffad1722 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -525,7 +525,7 @@ export function SourcesTableSection({
- {sources.length > 0 && ( + {sources.length > 0 ? ( + ) : ( + // To preserve the layout when source selection is not available during initial loading and new guru creation +
)} Date: Thu, 24 Apr 2025 09:10:47 +0000 Subject: [PATCH 27/81] Fix clickability of zendesk/confluence when creating a new guru --- .../src/components/NewEditGuru/SourceActions.jsx | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx index 07687a41..0a06467a 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourceActions.jsx @@ -72,8 +72,20 @@ export const SourceActions = ({ key={config.id} className={cn("flex items-center gap-2")} disabled={finalDisabled} - onClick={isJiraInCreateMode ? undefined : onClickHandler} - tabIndex={isJiraInCreateMode ? -1 : 0}> + onClick={ + isJiraInCreateMode || + isZendeskInCreateMode || + isConfluenceInCreateMode + ? undefined + : onClickHandler + } + tabIndex={ + isJiraInCreateMode || + isZendeskInCreateMode || + isConfluenceInCreateMode + ? -1 + : 0 + }> {isLoading ? ( ) : ( From 86447d382f40e445b19ae614246401f8ad6dfba2 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 09:33:33 +0000 Subject: [PATCH 28/81] Fix references having the same titles overwriting each other --- src/gurubase-backend/backend/core/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gurubase-backend/backend/core/utils.py b/src/gurubase-backend/backend/core/utils.py index 51ddcffb..df5b918c 100644 --- a/src/gurubase-backend/backend/core/utils.py +++ b/src/gurubase-backend/backend/core/utils.py @@ -301,7 +301,7 @@ def prepare_contexts(contexts, reranked_scores): formatted_contexts.append('\n'.join(context_parts)) - references[context['entity']['metadata']['title']] = { + references[context['entity']['metadata']['link']] = { 'question': context['entity']['metadata']['title'], 'link': context['entity']['metadata']['link'] } @@ -322,7 +322,7 @@ def prepare_contexts(contexts, reranked_scores): formatted_contexts.append('\n'.join(context_parts)) - references[context['entity']['metadata']['title']] = { + references[context['entity']['metadata']['link']] = { 'question': context['entity']['metadata']['title'], 'link': context['entity']['metadata']['link'] } @@ -353,7 +353,7 @@ def prepare_contexts(contexts, reranked_scores): formatted_contexts.append('\n'.join(context_parts)) - references[context['entity']['metadata']['title']] = { + references[context['entity']['metadata']['link']] = { 'question': context['entity']['metadata']['title'], 'link': link } From 12b1402f187203ed7e47cbf068d21ad2cc022b6f Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 09:33:59 +0000 Subject: [PATCH 29/81] Move github details logic into data sources instead of guru types --- src/gurubase-backend/backend/core/admin.py | 2 +- ...0081_datasource_github_details_and_more.py | 23 +++ src/gurubase-backend/backend/core/models.py | 4 +- src/gurubase-backend/backend/core/tasks.py | 139 ++++-------------- src/gurubase-backend/backend/core/utils.py | 100 +++++++++---- 5 files changed, 129 insertions(+), 139 deletions(-) create mode 100644 src/gurubase-backend/backend/core/migrations/0081_datasource_github_details_and_more.py diff --git a/src/gurubase-backend/backend/core/admin.py b/src/gurubase-backend/backend/core/admin.py index dce3cb1b..c731a08d 100644 --- a/src/gurubase-backend/backend/core/admin.py +++ b/src/gurubase-backend/backend/core/admin.py @@ -165,7 +165,7 @@ def guru_type(self, obj): @admin.register(GuruType) class GuruTypeAdmin(admin.ModelAdmin): - list_display = ['id', 'slug', 'active', 'has_sitemap_added_questions', 'icon_url', 'stackoverflow_tag', 'domain_knowledge', 'colors', 'custom', 'maintainers_list', 'text_embedding_model', 'code_embedding_model', 'date_created', 'date_updated', 'github_details_updated_date'] + list_display = ['id', 'slug', 'active', 'has_sitemap_added_questions', 'icon_url', 'stackoverflow_tag', 'domain_knowledge', 'colors', 'custom', 'maintainers_list', 'text_embedding_model', 'code_embedding_model', 'date_created', 'date_updated'] search_fields = ['id', 'slug', 'icon_url', 'stackoverflow_tag', 'domain_knowledge', 'date_created', 'date_updated', 'maintainers__email'] list_filter = ('active', 'custom', 'has_sitemap_added_questions', 'text_embedding_model', 'code_embedding_model') ordering = ('-id',) diff --git a/src/gurubase-backend/backend/core/migrations/0081_datasource_github_details_and_more.py b/src/gurubase-backend/backend/core/migrations/0081_datasource_github_details_and_more.py new file mode 100644 index 00000000..dd490ae5 --- /dev/null +++ b/src/gurubase-backend/backend/core/migrations/0081_datasource_github_details_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.18 on 2025-04-24 08:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0080_remove_gurutype_github_repos_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='datasource', + name='github_details', + field=models.JSONField(blank=True, default=dict, null=True), + ), + migrations.AddField( + model_name='datasource', + name='github_details_updated_date', + field=models.DateTimeField(blank=True, null=True), + ), + ] diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index cb0eefb2..c8a1213b 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -304,8 +304,6 @@ class EmbeddingModel(models.TextChoices): name = models.CharField(max_length=50, blank=True, null=True) maintainers = models.ManyToManyField(User, blank=True, related_name='maintained_guru_types') stackoverflow_tag = models.CharField(max_length=100, blank=True, null=True) - github_details = models.JSONField(default=dict, blank=True, null=False) - github_details_updated_date = models.DateTimeField(null=True, blank=True) colors = models.JSONField(default=dict, blank=True, null=False) icon_url = models.CharField(max_length=2000, default="", blank=True, null=True) ogimage_url = models.URLField(max_length=2000, default="", blank=True, null=True) # question @@ -672,6 +670,8 @@ class Status(models.TextChoices): final_summarization_created = models.BooleanField(default=False) default_branch = models.CharField(max_length=100, null=True, blank=True) # Only used for Github Repos + github_details = models.JSONField(default=dict, blank=True, null=True) # For storing GitHub repository details + github_details_updated_date = models.DateTimeField(null=True, blank=True) # When GitHub details were last updated private = models.BooleanField(default=False) diff --git a/src/gurubase-backend/backend/core/tasks.py b/src/gurubase-backend/backend/core/tasks.py index 4b2729b0..e86601f9 100644 --- a/src/gurubase-backend/backend/core/tasks.py +++ b/src/gurubase-backend/backend/core/tasks.py @@ -1240,84 +1240,6 @@ def process_sitemap(): logger.info('Processed sitemap for all guru types') -@shared_task -@with_redis_lock( - redis_client, - 'update_guru_type_details_lock', - 1800 -) -def update_guru_type_details(): - """ - Updates GuruType details: - 1. Adds github details if missing (only for the first GitHub repo) - 2. Updates domain knowledge if it's the default value - """ - logger.info("Updating guru type details") - - from core.utils import get_root_summarization_of_guru_type - from core.requester import GitHubRequester - - github_requester = GitHubRequester() - - guru_types = GuruType.objects.filter(custom=True, active=True) - - for guru_type in guru_types: - # Update GitHub details if missing - if not guru_type.github_details: - github_repo = DataSource.objects.filter(guru_type=guru_type, type=DataSource.Type.GITHUB_REPO).first() - if github_repo: - try: - # Only fetch details for the first GitHub repo - first_repo = github_repo.url - if not first_repo: - continue - try: - github_details = github_requester.get_github_repo_details(first_repo) - guru_type.github_details = github_details - guru_type.save() - logger.info(f'Updated github details for {guru_type.slug} (repo: {first_repo})') - except Exception as e: - logger.error(f"Error getting github details for repo {first_repo} in {guru_type.slug}: {traceback.format_exc()}") - except Exception as e: - logger.error(f"Error getting github details for {guru_type.slug}: {traceback.format_exc()}") - continue - - # Update domain knowledge if it's default - if settings.ENV != 'selfhosted' and guru_type.domain_knowledge == settings.DEFAULT_DOMAIN_KNOWLEDGE: - from core.requester import GeminiRequester - gemini_requester = GeminiRequester(model_name="gemini-1.5-pro-002") - root_summarization = get_root_summarization_of_guru_type(guru_type.slug) - if not root_summarization: - logger.info(f'No root summarization found for {guru_type.slug}') - continue - - try: - # Get topics and description from github_details - github_topics = [] - github_description = "" - - if guru_type.github_details: - github_topics = guru_type.github_details.get('topics', []) - github_description = guru_type.github_details.get('description', '') - - gemini_response = gemini_requester.generate_topics_from_summary( - root_summarization.result_content, - guru_type.name, - github_topics, - github_description - ) - - new_topics = gemini_response.get('topics', []) - if new_topics: - guru_type.domain_knowledge = ', '.join(new_topics) - guru_type.save() - logger.info(f'Updated domain knowledge for {guru_type.slug}') - except Exception as e: - logger.error(f"Error updating domain knowledge for {guru_type.slug}: {traceback.format_exc()}") - continue - - logger.info("Updated guru type details") - @shared_task def check_datasource_in_milvus_false_and_success(): # This should not be happened. During deployment times, some datasources can not be updated because of pod deletion. @@ -1359,10 +1281,10 @@ def send_request_to_questions_for_cloudflare_cache(): ) def update_github_details(): """ - Updates GitHub details for guru types that haven't been updated in the last 24 hours. - Processes at most 200 guru types per hour to avoid overwhelming the GitHub API. + Updates GitHub details for data sources of type GITHUB_REPO that haven't been updated in the last 24 hours. + Processes at most 200 data sources per hour to avoid overwhelming the GitHub API. """ - logger.info("Updating GitHub details for guru types") + logger.info("Updating GitHub details for GitHub repository data sources") from core.requester import GitHubRequester from django.utils import timezone @@ -1370,45 +1292,44 @@ def update_github_details(): github_requester = GitHubRequester() - # Get guru types that haven't been updated in the last 24 hours + # Get data sources that haven't been updated in the last 24 hours # Order by github_details_updated_date to prioritize oldest updates cutoff_time = timezone.now() - timedelta(days=1) - guru_types = GuruType.objects.filter( + data_sources = DataSource.objects.filter( models.Q(github_details_updated_date__isnull=True) | models.Q(github_details_updated_date__lt=cutoff_time), - github_repos__isnull=False, - github_repos__len__gt=0, # Has at least one repo - active=True + type=DataSource.Type.GITHUB_REPO, + status=DataSource.Status.SUCCESS ).order_by('github_details_updated_date')[:200] - logger.info(f'Guru types to update: {guru_types.count()} with cutoff time: {cutoff_time}') + logger.info(f'GitHub repos to update: {data_sources.count()} with cutoff time: {cutoff_time}') updated_count = 0 - for guru_type in guru_types: + for data_source in data_sources: try: - # Get details for all repos - all_details = [] - for repo_url in guru_type.github_repos: - try: - details = github_requester.get_github_repo_details(repo_url) - all_details.append(details) - except Exception as e: - logger.error(f"Error getting GitHub details for {repo_url}: {traceback.format_exc()}") - continue - - if all_details: # Only update if we got at least one repo's details - guru_type.github_details = all_details - guru_type.github_details_updated_date = timezone.now() - guru_type.save() - updated_count += 1 - logger.info(f'Updated GitHub details for {guru_type.slug}') + # Get details for the repo + repo_url = data_source.url + try: + details = github_requester.get_github_repo_details(repo_url) + + if details: + # Store github details in a custom field + data_source.github_details = details + data_source.github_details_updated_date = timezone.now() + data_source.save() + updated_count += 1 + logger.info(f'Updated GitHub details for {repo_url}') + except Exception as e: + logger.error(f"Error getting GitHub details for {repo_url}: {traceback.format_exc()}") + # Still update the timestamp to avoid repeatedly trying failed updates + data_source.github_details_updated_date = timezone.now() + data_source.save() + continue + except Exception as e: - logger.error(f"Error updating GitHub details for {guru_type.slug}: {traceback.format_exc()}") - # Still update the timestamp to avoid repeatedly trying failed updates - guru_type.github_details_updated_date = timezone.now() - guru_type.save() + logger.error(f"Error updating GitHub details for data source {data_source.id}: {traceback.format_exc()}") continue - logger.info(f'Updated GitHub details for {updated_count} guru types') + logger.info(f'Updated GitHub details for {updated_count} GitHub repositories') @shared_task @with_redis_lock( diff --git a/src/gurubase-backend/backend/core/utils.py b/src/gurubase-backend/backend/core/utils.py index df5b918c..e435f015 100644 --- a/src/gurubase-backend/backend/core/utils.py +++ b/src/gurubase-backend/backend/core/utils.py @@ -953,6 +953,7 @@ def filter_by_trust_score(contexts, reranked_scores, question, user_question, en context_data = [] for i, ctx in enumerate(context_relevance['contexts']): ctx['context'] = formatted_contexts[i] + ctx['link'] = contexts[i]['entity']['metadata'].get('link') # Filter using the final calculated threshold if ctx['score'] >= final_threshold: context_data.append((contexts[i], reranked_scores[i], ctx['score'])) @@ -1167,34 +1168,78 @@ def create_custom_guru_type_slug(name): return slug -def get_github_details_if_applicable(guru_type): - guru_type_obj = get_guru_type_object(guru_type) - response = "" - if guru_type_obj and guru_type_obj.github_details: +def get_github_details_if_applicable(guru_type, context_links=None, processed_ctx_relevances=None): + # Get GitHub details from data sources referenced in the context links + if not context_links or not processed_ctx_relevances: + return "" + + # Extract all GitHub links from context links that were kept after relevance filtering + github_links = [] + kept_links = set() + + # Get the links that were kept after relevance filtering + if processed_ctx_relevances and 'kept' in processed_ctx_relevances: + for kept_item in processed_ctx_relevances['kept']: + kept_links.add(kept_item['link']) + + # If we don't have kept links, return empty string + if not kept_links and context_links: + return "" + else: + # Only include links that were kept + for link in context_links: + if 'link' in link and link['link'] and link['link'] in kept_links and 'github.com' in link['link']: + github_links.append(link['link']) + + if not github_links: + return "" + + # Get all GitHub files that match these links in a single query + from core.models import GithubFile, DataSource + github_files = GithubFile.objects.filter( + link__in=github_links, + data_source__guru_type__slug=guru_type + ).select_related('data_source') + + # If no files found directly, return empty string + if not github_files.exists(): + return "" + else: + # Extract unique data sources from GitHub files + data_sources = {github_file.data_source for github_file in github_files} + + # Extract GitHub details from data sources + repos_details = [] + for data_source in data_sources: try: - simplified_github_details = {} - github_details = guru_type_obj.github_details - simplified_github_details['name'] = github_details.get('name', '') - simplified_github_details['description'] = github_details.get('description', '') - simplified_github_details['topics'] = github_details.get('topics', []) - simplified_github_details['language'] = github_details.get('language', '') - simplified_github_details['size'] = github_details.get('size', 0) - simplified_github_details['homepage'] = github_details.get('homepage', '') - simplified_github_details['stargazers_count'] = github_details.get('stargazers_count', 0) - simplified_github_details['forks_count'] = github_details.get('forks_count', 0) - # Handle null license case - license_info = github_details.get('license') - simplified_github_details['license_name'] = license_info.get('name', '') if license_info else '' - simplified_github_details['open_issues_count'] = github_details.get('open_issues_count', 0) - simplified_github_details['pushed_at'] = github_details.get('pushed_at', '') - simplified_github_details['created_at'] = github_details.get('created_at', '') - owner = github_details.get('owner', {}) - simplified_github_details['owner_login'] = owner.get('login', '') - response = f"Here is the GitHub details for {guru_type_obj.name}: {simplified_github_details}" + if data_source.github_details: + github_details = data_source.github_details + simplified_github_details = { + 'name': github_details.get('name', ''), + 'description': github_details.get('description', ''), + 'topics': github_details.get('topics', []), + 'language': github_details.get('language', ''), + 'size': github_details.get('size', 0), + 'homepage': github_details.get('homepage', ''), + 'stargazers_count': github_details.get('stargazers_count', 0), + 'forks_count': github_details.get('forks_count', 0), + 'license_name': github_details.get('license', {}).get('name', '') if github_details.get('license') else '', + 'open_issues_count': github_details.get('open_issues_count', 0), + 'pushed_at': github_details.get('pushed_at', ''), + 'created_at': github_details.get('created_at', ''), + 'owner_login': github_details.get('owner', {}).get('login', ''), + 'repo_url': data_source.url + } + repos_details.append(simplified_github_details) except Exception as e: - logger.error(f"Error while processing GitHub details for guru type {guru_type}: {str(e)}") - response = "" - return response + logger.error(f"Error while processing GitHub details for data source {data_source.id}: {str(e)}") + + if repos_details: + guru_type_obj = get_guru_type_object(guru_type) + guru_type_name = guru_type_obj.name if guru_type_obj else guru_type + return f"Here are the GitHub details for repositories related to {guru_type_name}: {repos_details}" + + return "" def format_history_for_prompt(history): @@ -1305,7 +1350,8 @@ def ask_question_with_stream( times['total'] = time.perf_counter() - start_total return None, None, None, None, None, None, None, None, None, times - simplified_github_details = get_github_details_if_applicable(guru_type) + # Get GitHub details from data sources referenced in the context links + simplified_github_details = get_github_details_if_applicable(guru_type, links, processed_ctx_relevances) guru_variables = get_guru_type_prompt_map(guru_type) guru_variables['streaming_type']='streaming' From 41c8b6fc670d8ec65bd7cbc448aad17b5ee1cd67 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 09:36:30 +0000 Subject: [PATCH 30/81] Add migration --- ...remove_gurutype_github_details_and_more.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 src/gurubase-backend/backend/core/migrations/0082_remove_gurutype_github_details_and_more.py diff --git a/src/gurubase-backend/backend/core/migrations/0082_remove_gurutype_github_details_and_more.py b/src/gurubase-backend/backend/core/migrations/0082_remove_gurutype_github_details_and_more.py new file mode 100644 index 00000000..8e09a825 --- /dev/null +++ b/src/gurubase-backend/backend/core/migrations/0082_remove_gurutype_github_details_and_more.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.18 on 2025-04-24 09:34 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0081_datasource_github_details_and_more'), + ] + + operations = [ + migrations.RemoveField( + model_name='gurutype', + name='github_details', + ), + migrations.RemoveField( + model_name='gurutype', + name='github_details_updated_date', + ), + ] From 7ba403dd7578abacb023ae4dd5426ebe1256577b Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 11:07:19 +0000 Subject: [PATCH 31/81] Remove duplicate code --- .../core/services/data_source_service.py | 91 ------------------- 1 file changed, 91 deletions(-) diff --git a/src/gurubase-backend/backend/core/services/data_source_service.py b/src/gurubase-backend/backend/core/services/data_source_service.py index aa51feda..2a93c101 100644 --- a/src/gurubase-backend/backend/core/services/data_source_service.py +++ b/src/gurubase-backend/backend/core/services/data_source_service.py @@ -184,97 +184,6 @@ def process_github_repos(self, github_repos: List[Dict[str, Any]]) -> List[Dict[ return new_repos - def validate_github_repos_limits(self, github_repos: List[Dict[str, Any]]) -> None: - """ - Validates GitHub repos count limits for new repos only - - Args: - github_repos: List of GitHub repos to validate - should only contain new repos - - Raises: - ValueError: If validation fails - """ - if github_repos: - is_allowed, error_msg = self.guru_type_object.check_datasource_limits( - self.user, - github_repos_count=len(github_repos) - ) - if not is_allowed: - raise ValueError(error_msg) - - def identify_new_github_repos(self, github_repos: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Identify GitHub repos that don't exist in the system yet - - Args: - github_repos: List of GitHub repo dictionaries - - Returns: - List of GitHub repos that don't exist and need to be created - """ - new_repos = [] - - for repo in github_repos: - repo_url = repo.get('url', '') - - # Check if this repo already exists in this guru type - existing_repo = DataSource.objects.filter( - guru_type=self.guru_type_object, - type=DataSource.Type.GITHUB_REPO, - url=repo_url - ).exists() - - if not existing_repo: - # Add to list of new repos to create - new_repos.append(repo) - - return new_repos - - def update_existing_github_repos(self, github_repos: List[Dict[str, Any]]) -> None: - """ - Update glob patterns for existing GitHub repos - - Args: - github_repos: List of GitHub repo dictionaries with 'url' and 'glob_patterns' fields - """ - updated_repos = [] - for repo in github_repos: - repo_url = repo.get('url', '') - - # Check if this repo already exists in this guru type - existing_repo = DataSource.objects.filter( - guru_type=self.guru_type_object, - type=DataSource.Type.GITHUB_REPO, - url=repo_url - ).first() - - if existing_repo: - # Update glob patterns - existing_repo.github_glob_pattern = repo['glob_pattern'] - existing_repo.github_glob_include = repo['include_glob'] - existing_repo.save() - updated_repos.append(existing_repo) - - return updated_repos - - def process_github_repos(self, github_repos: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Process GitHub repos, updating existing ones and returning new ones to be created - - Args: - github_repos: List of GitHub repo dictionaries with 'url' and 'glob_patterns' fields - - Returns: - List of GitHub repos that need to be created (didn't exist before) - """ - # Identify new repos - new_repos = self.identify_new_github_repos(github_repos) - - # Update existing repos - self.update_existing_github_repos(github_repos) - - return new_repos - def create_data_sources( self, pdf_files: List[UploadedFile], From 69a13221826b0d07462abeea44d13311fec99156 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 11:07:24 +0000 Subject: [PATCH 32/81] Fix github update --- src/gurubase-backend/backend/core/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gurubase-backend/backend/core/views.py b/src/gurubase-backend/backend/core/views.py index 15133714..1b6059e8 100644 --- a/src/gurubase-backend/backend/core/views.py +++ b/src/gurubase-backend/backend/core/views.py @@ -918,7 +918,7 @@ def create_data_sources(request, guru_type): website_urls=website_urls, jira_urls=jira_urls, zendesk_urls=zendesk_urls, - github_repos=github_repos, + github_repos=new_github_repos, confluence_urls=confluence_urls ) From 110d11825ae84eb351d75438b73d1fdfb9a7f494 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 11:20:19 +0000 Subject: [PATCH 33/81] Exclude extra fields in data source --- src/gurubase-backend/backend/core/serializers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gurubase-backend/backend/core/serializers.py b/src/gurubase-backend/backend/core/serializers.py index 9a6692cb..c62f7d48 100644 --- a/src/gurubase-backend/backend/core/serializers.py +++ b/src/gurubase-backend/backend/core/serializers.py @@ -73,7 +73,7 @@ class Meta: class DataSourceSerializer(serializers.ModelSerializer): class Meta: model = DataSource - exclude = ['file', 'doc_ids', 'content', 'guru_type', 'last_reindex_date'] + exclude = ['file', 'doc_ids', 'content', 'guru_type', 'last_reindex_date', 'github_glob_pattern', 'github_glob_include'] def to_representation(self, instance): from core.utils import format_github_repo_error From 8d8226162612dbee0b80ba617d65a06bc4937c7f Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 11:20:44 +0000 Subject: [PATCH 34/81] Fix processing status of repositories when they are fetched but not written yet --- .../NewEditGuru/SourcesTableSection.jsx | 34 +++++++++++-------- .../src/components/NewGuru.jsx | 2 ++ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index ffad1722..cd25bf3d 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -167,7 +167,10 @@ export function SourcesTableSection({ return source.domains.some((domain) => domain.status === "NOT_PROCESSED"); } - return source.status === "NOT_PROCESSED"; + return ( + source.status === "NOT_PROCESSED" || + (source.status === "SUCCESS" && !source.in_milvus) + ); }; const renderBadges = (source) => { @@ -179,6 +182,10 @@ export function SourcesTableSection({ if (!config) return null; + const sourceDone = + (source.status === "SUCCESS" && source.in_milvus) || + source.status === "FAIL"; + if (isGithubSource) { let badgeProps = { className: @@ -210,19 +217,18 @@ export function SourcesTableSection({ break; } - const badgeElement = - source.status === "SUCCESS" || source.status === "FAIL" ? ( - - - {badgeProps.text} - - ) : null; + const badgeElement = sourceDone ? ( + + + {badgeProps.text} + + ) : null; const lastIndexedDate = source.last_reindex_date ? new Date(source.last_reindex_date).toLocaleString() diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index 6d6707b2..1b3d247b 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -545,6 +545,7 @@ export default function NewGuru({ guruData, isProcessing }) { size: source.type === "PDF" ? source.size : "N/A", url: source.url || "", status: source.status, + in_milvus: source.in_milvus, file_count: source.file_count, // Add file_count last_reindex_date: source.last_reindex_date || "", error: source.error || "", @@ -923,6 +924,7 @@ export default function NewGuru({ guruData, isProcessing }) { size: source.type === "PDF" ? source.size : "N/A", url: source.url || "", status: source.status, + in_milvus: source.in_milvus, file_count: source.file_count, last_reindex_date: source.last_reindex_date || "", error: source.error || "", From 69a896cfb18ddf2a8512455ab69272f1a8132a3f Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 11:21:51 +0000 Subject: [PATCH 35/81] Add comment --- src/gurubase-backend/backend/core/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index c8a1213b..3443c123 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -629,7 +629,7 @@ class Type(models.TextChoices): ZENDESK = "ZENDESK" CONFLUENCE = "CONFLUENCE" - class Status(models.TextChoices): + class Status(models.TextChoices): # This is the data retrieval status. It is not the readiness of the data source as it may not be written to milvus yet. NOT_PROCESSED = "NOT_PROCESSED" SUCCESS = "SUCCESS" FAIL = "FAIL" From 4b39b24624c108a2728c5e7580ace3715f32dc58 Mon Sep 17 00:00:00 2001 From: aralyekta Date: Thu, 24 Apr 2025 11:39:55 +0000 Subject: [PATCH 36/81] Set deleting source text while deleting sources --- .../NewEditGuru/SourcesTableSection.jsx | 42 +++++++++++++++---- .../src/components/NewGuru.jsx | 8 ++++ 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx index cd25bf3d..37043db5 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourcesTableSection.jsx @@ -85,7 +85,8 @@ export function SourcesTableSection({ setIsGithubSidebarOpen, handleEditGithubGlob, setIsEditingRepo, - setEditingRepo + setEditingRepo, + deletingSources = [] }) { const [filterType, setFilterType] = useState("all"); @@ -173,6 +174,15 @@ export function SourcesTableSection({ ); }; + const isSourceDeleting = (source) => { + return deletingSources.some( + (deletingSource) => + deletingSource.id === source.id || + (source.domains && + source.domains.some((domain) => domain.id === deletingSource.id)) + ); + }; + const renderBadges = (source) => { const config = getSourceTypeConfigById(source.type); const isGithubSource = @@ -592,7 +602,12 @@ export function SourcesTableSection({ - {isSourceProcessing(source) && isSourcesProcessing ? ( + {isSourceDeleting(source) ? ( +
+ + Deleting source... +
+ ) : isSourceProcessing(source) && isSourcesProcessing ? (
Processing source... @@ -618,7 +633,9 @@ export function SourcesTableSection({