diff --git a/.changeset/code-cleanup-cache-optimization.md b/.changeset/code-cleanup-cache-optimization.md new file mode 100644 index 0000000..65d3f70 --- /dev/null +++ b/.changeset/code-cleanup-cache-optimization.md @@ -0,0 +1,14 @@ +--- +'@signalwire/docusaurus-plugin-llms-txt': patch +'@signalwire/docusaurus-theme-llms-txt': patch +--- + +Code cleanup and cache optimization: + +- Remove dead code (className prop, normalizePathname export, CopyContentData export) +- Optimize cache implementation (replace over-engineered promise cache with minimal in-memory cache) +- Fix resize re-fetch bug (component no longer re-fetches data when switching between mobile/desktop + views) +- Reduce code size by 47% in useCopyContentData hook +- Changed the location of the CopyButtonContent component. The theme now swizzles DocItem/Layout and + conditionally puts the Copy button content component after it or below it diff --git a/.changeset/eight-vans-sleep.md b/.changeset/eight-vans-sleep.md new file mode 100644 index 0000000..8781e60 --- /dev/null +++ b/.changeset/eight-vans-sleep.md @@ -0,0 +1,6 @@ +--- +'@signalwire/docusaurus-plugin-llms-txt': patch +'@signalwire/docusaurus-theme-llms-txt': patch +--- + +Organize links by path now in llms-txt diff --git a/.changeset/fast-lands-sit.md b/.changeset/fast-lands-sit.md new file mode 100644 index 0000000..07e2176 --- /dev/null +++ b/.changeset/fast-lands-sit.md @@ -0,0 +1,6 @@ +--- +'@signalwire/docusaurus-plugin-llms-txt': patch +'@signalwire/docusaurus-theme-llms-txt': patch +--- + +Fixed attachments filename bug diff --git a/.changeset/major-refactor-breadcrumbs-wrap.md b/.changeset/major-refactor-breadcrumbs-wrap.md new file mode 100644 index 0000000..caea669 --- /dev/null +++ b/.changeset/major-refactor-breadcrumbs-wrap.md @@ -0,0 +1,37 @@ +--- +'@signalwire/docusaurus-plugin-llms-txt': patch +'@signalwire/docusaurus-theme-llms-txt': patch +--- + +Major architecture improvements for better plugin compatibility: + +**Component Changes:** + +- Switched from ejecting `DocItem/Layout` to wrapping `DocBreadcrumbs` + - This prevents conflicts with other plugins that customize the layout + - Uses WRAP pattern instead of EJECT for better compatibility +- Changed internal import from `@theme-original` to `@theme-init` following Docusaurus best + practices for theme enhancers + +**Improvements:** + +- Fixed type declarations to accurately reflect component props + - Removed unused `className` prop from `CopyPageContent` + - Fixed `DocBreadcrumbs` type declaration for proper wrapping support +- Added `margin-left: auto` to ensure copy button always aligns right in desktop view +- Fixed package publishing configuration + - Added `src/theme` directory to published files for TypeScript swizzling support + - Updated devDependencies for proper type resolution + - Changed `react-icons` from exact version to version range + +**Documentation:** + +- Updated README with correct swizzle examples for `DocBreadcrumbs` +- Added explanation of `@theme-init` vs `@theme-original` usage +- Updated swizzle configuration to reflect new safe wrapping pattern + +**Compatibility:** + +- Now compatible with plugins like `docusaurus-plugin-openapi-docs` that also customize layouts +- Follows official Docusaurus theme enhancer pattern (similar to `@docusaurus/theme-live-codeblock`) +- Users can now safely wrap our enhanced breadcrumbs with `@theme-original/DocBreadcrumbs` diff --git a/README.md b/README.md index 26a004d..45cfad9 100644 --- a/README.md +++ b/README.md @@ -8,23 +8,26 @@ structure. | Package | Version | Description | | --------------------------------------------------------------------------------- | --------------------------------------------------------------------------- | ------------------------------------------------------------------------- | | [`@signalwire/docusaurus-plugin-llms-txt`](./packages/docusaurus-plugin-llms-txt) | ![npm](https://img.shields.io/npm/v/@signalwire/docusaurus-plugin-llms-txt) | Generate Markdown versions of Docusaurus pages and an llms.txt index file | +| [`@signalwire/docusaurus-theme-llms-txt`](./packages/docusaurus-theme-llms-txt) | ![npm](https://img.shields.io/npm/v/@signalwire/docusaurus-theme-llms-txt) | Theme components for llms.txt plugin with copy-to-clipboard functionality | ## 🚀 Quick Start ### Installation ```bash -npm install @signalwire/docusaurus-plugin-llms-txt +npm install @signalwire/docusaurus-plugin-llms-txt @signalwire/docusaurus-theme-llms-txt # or -yarn add @signalwire/docusaurus-plugin-llms-txt +yarn add @signalwire/docusaurus-plugin-llms-txt @signalwire/docusaurus-theme-llms-txt ``` ### Usage -Add to your `docusaurus.config.js`: +Add to your `docusaurus.config.ts`: -```javascript -module.exports = { +```typescript +import type { Config } from '@docusaurus/types'; + +const config: Config = { plugins: [ [ '@signalwire/docusaurus-plugin-llms-txt', @@ -33,7 +36,10 @@ module.exports = { }, ], ], + themes: ['@signalwire/docusaurus-theme-llms-txt'], }; + +export default config; ``` ## 🏗 Development @@ -121,7 +127,8 @@ For detailed publishing instructions, see [PUBLISHING.md](./PUBLISHING.md). ``` docusaurus-plugins/ ├── packages/ # Published packages -│ └── docusaurus-plugin-llms-txt/ +│ ├── docusaurus-plugin-llms-txt/ +│ └── docusaurus-theme-llms-txt/ ├── website/ # Demo/documentation site ├── .changeset/ # Changeset configuration ├── lerna.json # Lerna configuration @@ -194,3 +201,24 @@ consumption. - 🎯 Content filtering [View Package →](./packages/docusaurus-plugin-llms-txt) + +### [@signalwire/docusaurus-theme-llms-txt](./packages/docusaurus-theme-llms-txt) + +Theme package providing UI components for the llms.txt plugin, including a copy-to-clipboard button +for page content. + +**Key Features:** + +- 📋 Copy page content as Markdown +- 🤖 Format for ChatGPT and Claude +- 🎯 Smart detection of page title +- 📱 Responsive mobile/desktop layouts +- 🔧 Fully swizzlable components + +**Architecture:** + +- Uses DOM-based detection to identify page titles (H1 in `
` tags) +- Global data fetching with shared cache for performance +- Integrates seamlessly with Docusaurus theme system + +[View Package →](./packages/docusaurus-theme-llms-txt) diff --git a/packages/docusaurus-plugin-llms-txt/CHANGELOG.md b/packages/docusaurus-plugin-llms-txt/CHANGELOG.md index 08049b7..3d28fc0 100644 --- a/packages/docusaurus-plugin-llms-txt/CHANGELOG.md +++ b/packages/docusaurus-plugin-llms-txt/CHANGELOG.md @@ -1,9 +1,118 @@ # @signalwire/docusaurus-plugin-llms-txt -## 2.0.0-alpha.0 +## 2.0.0-alpha.5 + +### Patch Changes + +- Organize links by path now in llms-txt +- 85c2631: Fixed attachments filename bug + +## 2.0.0-alpha.3 ### Major Changes -- Alpha Release +- 75c2b75: Alpha Release + +### Patch Changes + +- Fix `contentStrategy` bug where dropdown menu displayed incorrect content type. When + `contentStrategy: 'html-only'` is set, the dropdown now correctly shows "Copy Raw HTML" instead of + "Copy Raw Markdown". The "View Markdown" option remains available when markdown exists, + independent of `contentStrategy` setting. + +- Updated ambient type declarations to remove unused `hasMarkdown` prop from CopyButton component. + +- Updated README documentation to clarify `contentStrategy` behavior and `viewMarkdown` + independence. + +- ec2e25b: Code cleanup and cache optimization: + - Remove dead code (className prop, normalizePathname export, CopyContentData export) + - Optimize cache implementation (replace over-engineered promise cache with minimal in-memory + cache) + - Fix resize re-fetch bug (component no longer re-fetches data when switching between + mobile/desktop views) + - Reduce code size by 47% in useCopyContentData hook + - Changed the location of the CopyButtonContent component. The theme now swizzles DocItem/Layout + and conditionally puts the Copy button content component after it or below it + +- e1246b2: Major architecture improvements for better plugin compatibility: + + **Component Changes:** + - Switched from ejecting `DocItem/Layout` to wrapping `DocBreadcrumbs` + - This prevents conflicts with other plugins that customize the layout + - Uses WRAP pattern instead of EJECT for better compatibility + - Changed internal import from `@theme-original` to `@theme-init` following Docusaurus best + practices for theme enhancers + + **Improvements:** + - Fixed type declarations to accurately reflect component props + - Removed unused `className` prop from `CopyPageContent` + - Fixed `DocBreadcrumbs` type declaration for proper wrapping support + - Added `margin-left: auto` to ensure copy button always aligns right in desktop view + - Fixed package publishing configuration + - Added `src/theme` directory to published files for TypeScript swizzling support + - Updated devDependencies for proper type resolution + - Changed `react-icons` from exact version to version range + + **Documentation:** + - Updated README with correct swizzle examples for `DocBreadcrumbs` + - Added explanation of `@theme-init` vs `@theme-original` usage + - Updated swizzle configuration to reflect new safe wrapping pattern + + **Compatibility:** + - Now compatible with plugins like `docusaurus-plugin-openapi-docs` that also customize layouts + - Follows official Docusaurus theme enhancer pattern (similar to + `@docusaurus/theme-live-codeblock`) + - Users can now safely wrap our enhanced breadcrumbs with `@theme-original/DocBreadcrumbs` +## 2.0.0-alpha.2 +### Patch Changes + +- Major architecture improvements for better plugin compatibility: + + **Breaking Changes:** + - Switched from ejecting `DocItem/Layout` to wrapping `DocBreadcrumbs` + - This prevents conflicts with other plugins that customize the layout + - Uses WRAP pattern instead of EJECT for better compatibility + - Changed internal import from `@theme-original` to `@theme-init` following Docusaurus best + practices for theme enhancers + + **Improvements:** + - Fixed type declarations to accurately reflect component props + - Removed unused `className` prop from `CopyPageContent` + - Fixed `DocBreadcrumbs` type declaration for proper wrapping support + - Added `margin-left: auto` to ensure copy button always aligns right in desktop view + - Fixed package publishing configuration + - Added `src/theme` directory to published files for TypeScript swizzling support + - Updated devDependencies for proper type resolution + - Changed `react-icons` from exact version to version range + + **Documentation:** + - Updated README with correct swizzle examples for `DocBreadcrumbs` + - Added explanation of `@theme-init` vs `@theme-original` usage + - Updated swizzle configuration to reflect new safe wrapping pattern + + **Compatibility:** + - Now compatible with plugins like `docusaurus-plugin-openapi-docs` that also customize layouts + - Follows official Docusaurus theme enhancer pattern (similar to + `@docusaurus/theme-live-codeblock`) + - Users can now safely wrap our enhanced breadcrumbs with `@theme-original/DocBreadcrumbs` + +## 2.0.0-alpha.1 + +### Patch Changes + +- Code cleanup and cache optimization: + - Remove dead code (className prop, normalizePathname export, CopyContentData export) + - Optimize cache implementation (replace over-engineered promise cache with minimal in-memory + cache) + - Fix resize re-fetch bug (component no longer re-fetches data when switching between + mobile/desktop views) + - Reduce code size by 47% in useCopyContentData hook + +## 2.0.0-alpha.0 + +### Major Changes + +- Alpha Release diff --git a/packages/docusaurus-plugin-llms-txt/README.md b/packages/docusaurus-plugin-llms-txt/README.md index 4e840b1..23b3c40 100644 --- a/packages/docusaurus-plugin-llms-txt/README.md +++ b/packages/docusaurus-plugin-llms-txt/README.md @@ -1,5 +1,9 @@ # @signalwire/docusaurus-plugin-llms-txt +> **📣 Version 2.0 Documentation** This documentation is for version 2.0, which includes breaking +> API changes. If you're using version 1.x, please refer to the +> [v1.2.2 documentation on npm](https://www.npmjs.com/package/@signalwire/docusaurus-plugin-llms-txt/v/1.2.2). + A Docusaurus plugin that transforms your documentation into AI-friendly formats. It automatically converts your site's rendered HTML pages into clean markdown files and generates an `llms.txt` index file, making your documentation easily consumable by Large Language Models while preserving the @@ -46,7 +50,44 @@ markdown optimized for AI consumption. ### Section Organization Content is organized into logical sections that help AI systems understand documentation structure. -Sections can have custom names, descriptions, and route patterns for precise content grouping. +You can define sections in two ways: + +#### Manual Sections + +Define sections explicitly with custom names, descriptions, and route patterns: + +```typescript +sections: [ + { + id: 'api-docs', + name: 'API Reference', + routes: [{ route: '/api/**' }], + }, +]; +``` + +#### Auto-Generated Sections + +For routes not matching any manual section, the plugin auto-generates sections based on URL path +segments. Use `autoSectionDepth` to control which path level becomes top-level sections: + +**With `autoSectionDepth: 1`** (group by first segment): + +- `/blog/post-1.md` → "Blog" section +- `/blog/post-2.md` → "Blog" section +- `/docs/intro.md` → "Docs" section + +**With `autoSectionDepth: 2`** (group by second segment): + +- `/docs/advanced/plugin.md` → "Advanced" section +- `/docs/tutorial-basics/intro.md` → "Tutorial Basics" section +- `/blog/post-1.md` → "Post 1" section (falls back to depth 1) + +Routes shallower than `autoSectionDepth` automatically fall back to their actual depth, ensuring all +content is included. + +Documents within each section are sorted by **path hierarchy** (depth-first, then lexicographic), +ensuring related content stays grouped together (e.g., `/api/methods/*` before `/api/guides/*`). ### Content Processing Pipeline @@ -58,6 +99,33 @@ HTML → Content Extraction (CSS selectors) → HTML Processing (rehype) → Mar Use glob patterns like `/docs/**` or `/api/*` to filter and organize content. Routes determine both what gets processed and how it's organized in sections. +### Default Excluded Routes + +The plugin automatically excludes common Docusaurus-generated pages from processing. These defaults +apply to all three `excludeRoutes` options (`markdown`, `llmsTxt`, and +`ui.copyPageContent.display`): + +- `/search` - Search page +- `/404.html` - 404 error page +- `/tags` - Global tags index +- `/tags/**` - Individual tag pages +- `/blog/tags` - Blog tags index +- `/blog/tags/**` - Individual blog tag pages +- `/blog/archive` - Blog archive page +- `/blog/authors` - Blog authors index +- `/blog/authors/**` - Individual author pages + +You can add your own patterns to any `excludeRoutes` array, which will be merged with these +defaults: + +```typescript +{ + markdown: { + excludeRoutes: ['/admin/**', '/internal/**'], // Merged with defaults + }, +} +``` + ## Installation ```bash @@ -115,54 +183,68 @@ const config: Config = { [ '@signalwire/docusaurus-plugin-llms-txt', { - // Generation options - generate: { - enableMarkdownFiles: true, - enableLlmsFullTxt: true, + // Markdown file generation options + markdown: { + enableFiles: true, relativePaths: true, - }, - - // Content inclusion - include: { includeBlog: true, includePages: true, + includeDocs: true, + includeVersionedDocs: true, excludeRoutes: ['/admin/**', '/internal/**'], }, - // Content organization - structure: { + // llms.txt index file configuration + llmsTxt: { + enableLlmsFullTxt: true, + includeBlog: true, + includePages: true, + includeDocs: true, + excludeRoutes: ['/admin/**'], + + // Site metadata + siteTitle: 'My Documentation', + siteDescription: 'Comprehensive documentation for developers', + + // Auto-section organization + autoSectionDepth: 2, // Group by 2nd path segment (/docs/api/* → "Api" section) + autoSectionPosition: 10, // Auto-sections appear after positioned manual sections + + // Manual section organization sections: [ { id: 'getting-started', name: 'Getting Started', + description: 'Quick start guides and tutorials', position: 1, routes: [{ route: '/docs/intro/**' }], }, { id: 'api-reference', name: 'API Reference', + description: 'Complete API documentation', position: 2, routes: [{ route: '/docs/api/**' }], - }, - ], - siteTitle: 'My Documentation', - siteDescription: 'Comprehensive documentation for developers', - }, - - // File attachments - processing: { - attachments: [ - { - source: './api/openapi.yaml', - title: 'OpenAPI Specification', - sectionId: 'api-reference', + attachments: [ + { + source: './api/openapi.yaml', + title: 'OpenAPI Specification', + description: 'Complete API specification in OpenAPI 3.0 format', + }, + ], }, ], }, // UI features (requires theme package) ui: { - copyPageContent: true, + copyPageContent: { + buttonLabel: 'Copy Page', + display: { + docs: true, + excludeRoutes: ['/admin/**'], + }, + }, }, } satisfies PluginOptions, ], @@ -174,268 +256,225 @@ export default config; ## API Reference -### Main Configuration Options - -| Property | Type | Required | Default | Description | -| ---------------- | ---------------------------------------------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `generate` | [`GenerateOptions`](#generateoptions) | ❌ | `{}` | Controls what output files are generated (markdown files, llms-full.txt) and how paths are formatted in the output. | -| `include` | [`IncludeOptions`](#includeoptions) | ❌ | `{}` | Specifies which types of content from your Docusaurus site (docs, blog, pages) should be processed and included in the output. | -| `structure` | [`StructureOptions`](#structureoptions) | ❌ | `{}` | Defines how your documentation is organized into logical sections in the llms.txt output, including site metadata, section definitions, and optional links. | -| `processing` | [`ProcessingOptions`](#processingoptions) | ❌ | `{}` | Configuration for how HTML content is processed, including markdown conversion settings, content extraction rules, and attachment handling. | -| `ui` | [`UiOptions`](#uioptions) | ❌ | `{}` | Settings for UI features that integrate with your Docusaurus theme, such as the copy content button that appears on documentation pages. | -| `runOnPostBuild` | `boolean` | ❌ | `true` | Whether the plugin automatically runs during Docusaurus's postBuild lifecycle phase. Set to false if you want to manually trigger generation via the CLI command instead. | -| `onSectionError` | `'ignore'` \| `'log'` \| `'warn'` \| `'throw'` | ❌ | `'warn'` | How the plugin handles errors when processing section definitions (e.g., invalid section IDs, route conflicts). 'throw' stops the build with an error, 'warn' shows warnings in console, 'log' silently logs to debug, 'ignore' skips silently. | -| `onRouteError` | `'ignore'` \| `'log'` \| `'warn'` \| `'throw'` | ❌ | `'warn'` | How the plugin handles errors when processing individual routes/pages (e.g., HTML parsing failures, missing content). Controls whether a single page failure should stop the entire build or just skip that page. | -| `logLevel` | `0` \| `1` \| `2` \| `3` | ❌ | `1` | Verbosity of console output during processing. 0 = silent (no output), 1 = normal (important messages only), 2 = verbose (detailed progress), 3 = debug (everything including cache operations). Higher levels help troubleshoot configuration issues. | - -### GenerateOptions - -Controls what files the plugin generates and how it formats output paths. - -| Property | Type | Required | Default | Description | -| --------------------- | --------- | -------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `enableMarkdownFiles` | `boolean` | ❌ | `true` | When enabled, generates individual .md files for each processed page alongside the llms.txt index. These files contain the full markdown content of each page and are saved to the build directory. Useful for direct file access, git tracking of content changes, or when you want to serve markdown files directly. | -| `enableLlmsFullTxt` | `boolean` | ❌ | `false` | Generates an additional llms-full.txt file that contains site title/description followed by organized sections with full content. Includes the complete processed markdown content of each document within their sections. Uses hierarchical organization without separate index/content sections. | -| `relativePaths` | `boolean` | ❌ | `true` | Determines how paths are formatted in the llms.txt file. When true, uses relative paths (e.g., ./docs/intro.md) that work for local file access. When false, uses absolute URLs (e.g., https://yoursite.com/docs/intro) for web access. Relative paths are typically better for local development and testing, while absolute URLs are needed when the llms.txt will be fetched over HTTP. | - -### IncludeOptions - -Specifies which content types from your Docusaurus site should be processed and included in the -generated output. - -| Property | Type | Required | Default | Description | -| ----------------------- | ---------- | -------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `includeBlog` | `boolean` | ❌ | `false` | When enabled, processes and includes all blog posts from your Docusaurus blog plugin in the generated llms.txt and markdown files. Blog posts are automatically organized into a 'Blog' section and sorted by date. Useful when your blog contains technical content, tutorials, or announcements that would be valuable for LLM consumption. | -| `includePages` | `boolean` | ❌ | `false` | When enabled, includes standalone pages (non-docs, non-blog pages) in the output. These are typically pages created in the `src/pages` directory like landing pages, about pages, or custom pages. Each page is processed and organized based on its URL structure. | -| `includeDocs` | `boolean` | ❌ | `true` | Controls whether documentation pages (from the docs plugin/directory) are processed and included. You might set this to false if you only want to generate llms.txt for your blog content, or if you're selectively processing specific doc versions. | -| `includeVersionedDocs` | `boolean` | ❌ | `true` | When your documentation uses Docusaurus versioning, this controls whether older versions are included in the output. Set to false to only process the current/latest version, significantly reducing output size for sites with many versions. Each version appears as a separate section in the output. | -| `includeGeneratedIndex` | `boolean` | ❌ | `true` | Includes auto-generated category index pages that Docusaurus creates for sidebar categories with `link.type: 'generated-index'`. These pages provide useful navigation context and overview information that helps LLMs understand the structure of your documentation. | -| `excludeRoutes` | `string[]` | ❌ | `[]` | Array of glob patterns for routes to exclude from processing, regardless of other include settings. Patterns are matched against the page route (not the file path). Useful for excluding admin pages, test pages, drafts, or pages with sensitive content. Example: `['/admin/**', '/**/test-*', '/docs/internal/**']`. Excluded routes are completely skipped during processing. | - -### StructureOptions - -Defines how your content is organized into logical sections in the llms.txt output, along with site -metadata. - -| Property | Type | Required | Default | Description | -| -------------------- | ------------------------------------------- | -------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `siteTitle` | `string` | ❌ | `undefined` | The title displayed in the llms.txt file header. This helps LLMs understand what documentation they're reading. Can be set to an empty string to omit the title entirely. If not specified, uses the site title from Docusaurus config. | -| `siteDescription` | `string` | ❌ | `undefined` | A brief description of your documentation site that appears in the llms.txt header, providing context to LLMs about the content's purpose, scope, and target audience. Can be set to an empty string to omit the description. | -| `enableDescriptions` | `boolean` | ❌ | `true` | When enabled, includes page descriptions and section descriptions throughout the generated llms.txt file, providing richer context for each link. Disable to create a more compact index without descriptions. | -| `sections` | [`SectionDefinition[]`](#sectiondefinition) | ❌ | `[]` | Array of section definitions that organize your content into logical groups in the llms.txt output. Each section becomes a heading with its pages listed below. Without sections, content is auto-organized by URL structure (e.g., /api/\* becomes 'Api' section). Sections provide better control over organization, custom ordering, and can include descriptions for context. | -| `optionalLinks` | [`OptionalLink[]`](#optionallink) | ❌ | `[]` | External URLs to include in the llms.txt file, useful for linking to external documentation, APIs, or resources that complement your docs but aren't part of your Docusaurus site. These links appear in an 'Optional' section at the end of llms.txt. | - -### ProcessingOptions - -Controls how HTML content is extracted and converted to markdown, plus additional content processing -features. - -| Property | Type | Required | Default | Description | -| ---------------------------- | ------------------------------------- | -------- | ------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `routeRules` | [`RouteRule[]`](#routerule) | ❌ | `[]` | Global rules for customizing content extraction for specific routes. **Use this for processing customization, not section assignment.** These rules can override content extraction selectors or modify processing behavior for groups of pages. Rules are applied in order and the first matching rule wins. Useful for handling special page types or applying consistent processing to page groups. | -| `contentSelectors` | `string[]` | ❌ | `['.theme-doc-markdown', 'main .container .col', 'main .theme-doc-wrapper', 'article', 'main .container', 'main']` | CSS selectors that identify the main content area in your HTML pages. The plugin extracts and converts content only from elements matching these selectors. Default selectors handle standard Docusaurus themes. Override when using custom themes or when you need to extract specific page sections. | -| `attachments` | [`AttachmentFile[]`](#attachmentfile) | ❌ | `[]` | Local files to include in the output alongside your documentation. Supports text files like OpenAPI specs, JSON schemas, configuration files, or markdown guides. Files are read, optionally formatted, and can be assigned to specific sections. The content is included in both llms.txt (as links) and optionally in llms-full.txt (full content). | -| `remarkStringify` | `RemarkStringifyOptions` | ❌ | `{}` | Configuration options for how markdown is formatted in the output. Controls aspects like bullet markers (-, _, +), emphasis markers (_, \_), code fence style, line width, and other markdown formatting preferences. Uses [remark-stringify options](https://github.com/remarkjs/remark/tree/main/packages/remark-stringify#options). | -| `remarkGfm` | `boolean` \| `RemarkGfmOptions` | ❌ | `true` | Enables GitHub Flavored Markdown features including tables, strikethrough, task lists, and literal URLs. When true, uses default GFM settings. Can be configured with specific options like `{singleTilde: false}` to customize strikethrough behavior. Set to false only if your content doesn't use GFM features. | -| `rehypeProcessTables` | `boolean` | ❌ | `true` | When enabled, HTML tables are converted to markdown table format for better readability in markdown files. When disabled, table HTML is preserved as-is in the output. Disable if you have complex tables that don't convert well to markdown or if you prefer to preserve exact HTML structure. | -| `beforeDefaultRehypePlugins` | `PluginInput[]` | ❌ | `[]` | Custom rehype plugins to run BEFORE the default HTML processing pipeline. Use for preprocessing HTML, adding custom attributes, or modifying HTML structure before conversion. Plugins receive the HTML AST and can transform it. Format: `[plugin]` or `[plugin, options]` or `[plugin, options, settings]` following unified.js conventions. Advanced feature for extending HTML processing capabilities. | -| `rehypePlugins` | `PluginInput[]` | ❌ | `[]` | Custom rehype plugins that REPLACE the entire default HTML processing pipeline. Use with caution as it overrides all built-in HTML processing including table conversion and link processing. Format: `[plugin]` or `[plugin, options]` or `[plugin, options, settings]` following unified.js conventions. Only use when you need complete control over HTML transformation. | -| `beforeDefaultRemarkPlugins` | `PluginInput[]` | ❌ | `[]` | Custom remark plugins to run BEFORE the default markdown processing pipeline. Use for adding custom markdown transformations, syntax extensions, or preprocessing markdown before final formatting. Format: `[plugin]` or `[plugin, options]` or `[plugin, options, settings]` following unified.js conventions. Plugins work with the markdown AST. | -| `remarkPlugins` | `PluginInput[]` | ❌ | `[]` | Custom remark plugins that REPLACE the entire default markdown processing pipeline. Overrides all built-in markdown processing including GFM support and formatting. Format: `[plugin]` or `[plugin, options]` or `[plugin, options, settings]` following unified.js conventions. Only use when you need complete control over markdown transformation. | +The plugin configuration is organized into three main areas: **markdown file generation**, +**llms.txt index creation**, and **UI features**. + +### Top-Level Options + +These options control plugin behavior and error handling. + +| Property | Type | Required | Default | Description | +| ---------------- | ---------------------------------------------- | -------- | -------- | -------------------------------------------------------------------------------------------------------- | +| `markdown` | [`MarkdownOptions`](#markdownoptions) | ❌ | `{}` | Generate individual .md files for each page. See [MarkdownOptions](#markdownoptions) below. | +| `llmsTxt` | [`LlmsTxtOptions`](#llmstxtoptions) | ❌ | `{}` | Generate llms.txt index file with organized content. See [LlmsTxtOptions](#llmstxtoptions) below. | +| `ui` | [`UiOptions`](#uioptions) | ❌ | `{}` | Enable UI features like copy buttons. See [UiOptions](#uioptions) below. | +| `runOnPostBuild` | `boolean` | ❌ | `true` | Automatically run during build. Set to `false` to manually trigger via CLI. | +| `onSectionError` | `'ignore'` \| `'log'` \| `'warn'` \| `'throw'` | ❌ | `'warn'` | How to handle section configuration errors (invalid IDs, route conflicts). | +| `onRouteError` | `'ignore'` \| `'log'` \| `'warn'` \| `'throw'` | ❌ | `'warn'` | How to handle page processing errors (HTML parsing failures). `'warn'` skips failed pages and continues. | +| `logLevel` | `0` \| `1` \| `2` \| `3` | ❌ | `1` | Console output verbosity. `0`=silent, `1`=normal, `2`=verbose, `3`=debug. | + +--- + +### MarkdownOptions + +Generate individual .md files for each page. + +| Property | Type | Required | Default | Description | +| ---------------------------- | --------------------------- | -------- | ----------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `enableFiles` | `boolean` | ❌ | `true` | Generate .md files. Disable to skip file generation entirely. | +| `relativePaths` | `boolean` | ❌ | `true` | Use relative paths (`./docs/intro.md`) vs absolute URLs (`https://site.com/docs/intro`). | +| `includeDocs` | `boolean` | ❌ | `true` | Include documentation pages. | +| `includeVersionedDocs` | `boolean` | ❌ | `true` | Include older doc versions. Disable to only process current version. | +| `includeBlog` | `boolean` | ❌ | `false` | Include blog posts. | +| `includePages` | `boolean` | ❌ | `false` | Include standalone pages from `src/pages/`. | +| `includeGeneratedIndex` | `boolean` | ❌ | `true` | Include auto-generated category index pages. | +| `excludeRoutes` | `string[]` | ❌ | See [default excludes](#default-excluded-routes) | Glob patterns to exclude routes from markdown generation. Defaults include common Docusaurus pages like `/search`, `/blog/tags/**`, `/blog/archive`, etc. Add your own patterns like `['/admin/**', '/internal/**']`. | +| `contentSelectors` | `string[]` | ❌ | `['.theme-doc-markdown', 'main .container .col', 'main .theme-doc-wrapper', 'article', 'main']` | CSS selectors to find main content. First match wins. | +| `routeRules` | [`RouteRule[]`](#routerule) | ❌ | `[]` | Override selectors for specific routes. See [RouteRule](#routerule). | +| `remarkStringify` | `object` | ❌ | `{}` | Markdown formatting options. See [remark-stringify](https://github.com/remarkjs/remark/tree/main/packages/remark-stringify#options). | +| `remarkGfm` | `boolean \| object` | ❌ | `true` | Enable GitHub Flavored Markdown (tables, strikethrough, task lists). | +| `rehypeProcessTables` | `boolean` | ❌ | `true` | Convert HTML tables to markdown. Disable for complex tables. | +| `beforeDefaultRehypePlugins` | `PluginInput[]` | ❌ | `[]` | Custom rehype plugins to run BEFORE defaults. | +| `rehypePlugins` | `PluginInput[]` | ❌ | `[]` | Custom rehype plugins that REPLACE defaults. Use with caution. | +| `beforeDefaultRemarkPlugins` | `PluginInput[]` | ❌ | `[]` | Custom remark plugins to run BEFORE defaults. | +| `remarkPlugins` | `PluginInput[]` | ❌ | `[]` | Custom remark plugins that REPLACE defaults. Use with caution. | + +--- + +### LlmsTxtOptions + +Generate and configure the llms.txt index file. + +| Property | Type | Required | Default | Description | +| ----------------------- | ------------------------------------------- | -------- | ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `enableLlmsFullTxt` | `boolean` | ❌ | `false` | Generate llms-full.txt with complete page content (not just links). | +| `includeDocs` | `boolean` | ❌ | `true` | Include documentation pages. | +| `includeVersionedDocs` | `boolean` | ❌ | `false` | ⚠️ Include older doc versions. **Default is `false`** (different from markdown). | +| `includeBlog` | `boolean` | ❌ | `false` | Include blog posts. | +| `includePages` | `boolean` | ❌ | `false` | Include standalone pages from `src/pages/`. | +| `includeGeneratedIndex` | `boolean` | ❌ | `true` | Include auto-generated category index pages. | +| `excludeRoutes` | `string[]` | ❌ | See [default excludes](#default-excluded-routes) | Glob patterns to exclude routes from llms.txt. Defaults include `/search`, `/blog/tags/**`, etc. Add your own like `['/admin/**']`. | +| `sections` | [`SectionDefinition[]`](#sectiondefinition) | ❌ | `[]` | Organize content into named sections. See [SectionDefinition](#sectiondefinition). | +| `autoSectionDepth` | `1 \| 2 \| 3 \| 4 \| 5 \| 6` | ❌ | `1` | Path depth for auto-generated sections. `1`=group by first segment (`/blog/*` → "Blog"), `2`=group by second segment (`/docs/advanced/*` → "Advanced"). Routes shallower than this depth fall back to their actual depth. Only affects auto-generated sections; manual sections are unaffected. | +| `autoSectionPosition` | `number` | ❌ | `undefined` | Position for auto-generated sections. `undefined`=after positioned sections, number=sort with positioned sections. | +| `siteTitle` | `string` | ❌ | `''` | Title for llms.txt header. Falls back to Docusaurus config if not set. | +| `siteDescription` | `string` | ❌ | `''` | Description for llms.txt header. | +| `enableDescriptions` | `boolean` | ❌ | `true` | Include page and section descriptions. Disable for a more compact index. | +| `attachments` | [`AttachmentFile[]`](#attachmentfile) | ❌ | `[]` | Include files like OpenAPI specs, schemas. Appear in 'Attachments' section. | +| `optionalLinks` | [`OptionalLink[]`](#optionallink) | ❌ | `[]` | External links (APIs, forums). Appear in 'Optional' section. | + +--- ### UiOptions -Settings for UI features that integrate with your Docusaurus theme. - -| Property | Type | Required | Default | Description | -| ----------------- | ---------------------------------------------------------------- | -------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `copyPageContent` | `boolean` \| [`CopyPageContentOptions`](#copypagecontentoptions) | ❌ | `false` | Enables a copy button on each documentation page that allows users to copy the page content in various formats or open AI interfaces with the page URL. When true, uses default settings with all formats enabled. When configured as an object, allows customization of button text and available actions. The button integrates seamlessly with Docusaurus themes and appears in the doc page header. See [Copy Page Content Feature](./docs/features/copy-page-content.md) for setup details and current limitations. | - -### Complex Types - -#### SectionDefinition - -Defines a logical section in your documentation for organizing content in llms.txt. - -| Property | Type | Required | Default | Description | -| ------------- | --------------------------------- | -------- | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `id` | `string` | ✅ | - | Unique identifier for the section in kebab-case format (e.g., 'api-reference', 'getting-started'). Must match pattern `/^[a-z0-9-]+$/` (lowercase letters, numbers, and hyphens only). This ID is used internally for routing, attachment assignment, and link organization. Must be unique across all sections including subsections. | -| `name` | `string` | ✅ | - | Display name shown as the section heading in llms.txt (e.g., 'API Reference', 'Getting Started'). This is what LLMs and users will see as the section title. Can contain spaces and special characters. | -| `description` | `string` | ❌ | `undefined` | Optional description that appears under the section heading in the llms.txt output, providing context about what content is in this section. Formatted as a blockquote (with >) to distinguish it from regular content. Helps LLMs understand the purpose and scope of the section. | -| `position` | `number` | ❌ | `undefined` | Controls the sort order of sections in the output. Lower numbers appear first (position 1 comes before position 2). Sections without a position are sorted alphabetically after all positioned sections. Useful for ensuring important sections like 'Getting Started' appear at the top. | -| `routes` | [`SectionRoute[]`](#sectionroute) | ❌ | `[]` | Array of route patterns that determine which pages belong to this section. Uses glob patterns like '/api/\*\*' to match multiple routes. Pages matching these routes are assigned to this section, overriding the default URL-based auto-assignment. First matching section wins when routes overlap. | -| `subsections` | `SectionDefinition[]` | ❌ | `[]` | Nested sections that appear as subsections in the output hierarchy. Useful for creating multi-level organization like API > Authentication > OAuth. Subsections inherit context from parent sections and can have their own routes and descriptions. Maximum recommended nesting is 3 levels for readability. | - -#### SectionRoute - -Configuration for route patterns within a section definition. **Use this to assign specific routes -to sections.** - -| Property | Type | Required | Default | Description | -| ------------------ | ---------- | -------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `route` | `string` | ✅ | - | Glob pattern that matches page routes to include in this section (e.g., '/api/**', '/docs/advanced/_'). The pattern is matched against the page's URL path, not the file system path. Supports standard glob syntax including _ (single level) and ** (multiple levels). **Primary purpose: Section assignment.** | -| `contentSelectors` | `string[]` | ❌ | `undefined` | Optional CSS selectors that override the global content extraction selectors for pages matching this route. Useful when certain pages in a section have different HTML structure. Example: `['.api-docs-content', 'article.api']` for API documentation with custom layout. | +Enable UI features on your documentation pages. -#### RouteRule - -Global rules for customizing content extraction for specific routes. **Use this for processing -customization, not section assignment.** - -| Property | Type | Required | Default | Description | -| ------------------ | ---------- | -------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `route` | `string` | ✅ | - | Glob pattern that matches page routes this rule applies to (e.g., '/api/**', '/blog/2024/**'). All pages matching this pattern will have this rule's content extraction settings applied. Pattern matching happens during HTML processing. **Primary purpose: Content extraction customization.** | -| `contentSelectors` | `string[]` | ❌ | `undefined` | Custom CSS selectors for extracting content from pages matching this route pattern. Overrides the global contentSelectors setting. Use when specific pages or page groups have different HTML structure or need special content extraction logic. **Note: RouteRules do NOT assign pages to sections.** | - -#### AttachmentFile - -Configuration for including external files in the llms.txt output. +| Property | Type | Required | Default | Description | +| ----------------- | ----------------------------------------------------------------- | -------- | ------- | -------------------------------------------------------------------------------------------------------------- | +| `copyPageContent` | `boolean \| ` [`CopyPageContentOptions`](#copypagecontentoptions) | ❌ | `false` | Add copy button to doc pages. Use `true` for defaults or object for customization. **Requires theme package.** | -| Property | Type | Required | Default | Description | -| ------------------ | --------- | -------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `source` | `string` | ✅ | - | Path to the local file to include, relative to your Docusaurus site directory (e.g., './specs/openapi.yaml', './schemas/config.json'). The file must exist at build time. Supports any text-based file format. Binary files are not supported. | -| `title` | `string` | ✅ | - | Display title for the attachment shown in llms.txt. This becomes the link text that LLMs will see (e.g., 'OpenAPI Specification', 'Database Schema'). Should be descriptive enough to indicate the file's purpose without needing to read it. | -| `description` | `string` | ❌ | `undefined` | Optional description providing additional context about the attachment's purpose, format, or usage. Appears as indented text under the title in llms.txt. Helps LLMs understand when and why they might need to reference this file. | -| `sectionId` | `string` | ❌ | `undefined` | ID of the section where this attachment should appear. Must match pattern `/^[a-z0-9-]+$/` if specified. If specified, the attachment is placed within that section's content. If not specified, attachments appear in a dedicated 'Attachments' section or follow URL-based organization. | -| `includeInFullTxt` | `boolean` | ❌ | `true` | Whether to include the full content of this attachment in the llms-full.txt file. Set to false for very large files that might bloat the output or for files that are better accessed separately. The attachment link always appears in llms.txt regardless of this setting. | - -#### OptionalLink +--- -External URLs to include in the llms.txt index for referencing external resources. +### Complex Types Reference -| Property | Type | Required | Default | Description | -| ------------- | -------- | -------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `title` | `string` | ✅ | - | Display text for the link in llms.txt. This is what LLMs and users will see as the clickable/referenced text. Should clearly indicate what resource the link points to (e.g., 'React Documentation', 'API Status Page'). | -| `url` | `string` | ✅ | - | The complete external URL to link to (e.g., 'https://reactjs.org/docs'). Must be a valid HTTP or HTTPS URL. The plugin doesn't validate that the URL is reachable, only that it's properly formatted. | -| `description` | `string` | ❌ | `undefined` | Optional description that provides context about what's at this URL and why it's relevant. Appears as indented text under the link in the llms.txt output. Helps LLMs understand whether to reference this external resource for specific types of questions. | +These types are used in the configuration options above. -#### CopyPageContentOptions - -Configuration for the copy page content button feature when using object configuration. - -| Property | Type | Required | Default | Description | -| --------------------------- | --------------------- | -------- | ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `buttonLabel` | `string` | ❌ | `'Copy Page'` | Custom label text for the copy button that appears in the doc header. Keep it short as it appears alongside a dropdown arrow. Examples: 'Copy Page', 'Export', 'Share'. | -| `actions` | `object` | ❌ | See below | Configuration for available copy and share actions in the dropdown menu. | -| `actions.markdown` | `boolean` | ❌ | `true` | Whether to include the "Copy as Markdown" option in the dropdown menu. When selected, copies the page content in clean markdown format, useful for pasting into markdown editors, GitHub issues, or documentation tools. | -| `actions.ai` | `object` | ❌ | See below | Configuration for AI integration options in the dropdown menu. | -| `actions.ai.chatGPT` | `boolean` \| `object` | ❌ | `true` | ChatGPT integration. When `true`, uses default prompt. When an object, can specify custom `prompt` string. Opens ChatGPT web interface with the documentation URL and prompt. | -| `actions.ai.chatGPT.prompt` | `string` | ❌ | `'Analyze this documentation:'` | Custom prompt text to include when opening ChatGPT. Only used when `chatGPT` is configured as an object. The documentation URL is automatically appended after the prompt. | -| `actions.ai.claude` | `boolean` \| `object` | ❌ | `true` | Claude integration. When `true`, uses default prompt. When an object, can specify custom `prompt` string. Opens Claude web interface with the documentation URL and prompt. | -| `actions.ai.claude.prompt` | `string` | ❌ | `'Analyze this documentation:'` | Custom prompt text to include when opening Claude. Only used when `claude` is configured as an object. The documentation URL is automatically appended after the prompt. | +#### SectionDefinition -## Troubleshooting +Organize content into logical sections in llms.txt. -### Common Issues +```typescript +{ + id: 'api-docs', // Unique kebab-case ID + name: 'API Documentation', // Display name + description: 'Complete API docs', // Optional context + position: 1, // Sort order (lower = earlier) + routes: [{ route: '/api/**' }], // Which pages belong here + subsections: [], // Nested sections + attachments: [], // Section-specific files + optionalLinks: [] // Section-specific external links +} +``` -**"No cached routes found"** - Run `npm run build` first; the plugin needs route cache from build -**Empty or minimal content** - Check `contentSelectors` match your theme; use `logLevel: 3` for -debugging **Sections not organizing correctly** - Verify route patterns don't overlap; first -matching section wins **Missing pages** - Check `excludeRoutes` patterns and ensure correct -`include` options are set **Build performance issues** - Use `excludeRoutes` to skip large sections; -disable `enableLlmsFullTxt` if not needed +| Property | Type | Required | Description | +| --------------- | ------------------------------------- | -------- | ----------------------------------------------------------------------------------------- | +| `id` | `string` | ✅ | Unique identifier (lowercase, numbers, hyphens only). Must be unique across all sections. | +| `name` | `string` | ✅ | Display name shown in llms.txt. | +| `description` | `string` | ❌ | Optional description shown under heading. | +| `position` | `number` | ❌ | Sort order. Lower numbers appear first. | +| `routes` | [`SectionRoute[]`](#sectionroute) | ❌ | Glob patterns to match pages to this section. | +| `subsections` | `SectionDefinition[]` | ❌ | Nested sections (max 3 levels recommended). | +| `attachments` | [`AttachmentFile[]`](#attachmentfile) | ❌ | Files specific to this section. | +| `optionalLinks` | [`OptionalLink[]`](#optionallink) | ❌ | External links specific to this section. | -## Advanced Usage +#### SectionRoute -### Custom Remark/Rehype Plugins +Assign routes to sections using glob patterns. ```typescript -import remarkCustomPlugin from './my-remark-plugin'; - -const pluginConfig: PluginOptions = { - processing: { - beforeDefaultRemarkPlugins: [[remarkCustomPlugin, { option: 'value' }]], - }, -}; +{ + route: '/api/**', // Match all /api/* routes + contentSelectors: ['.api-content'] // Optional: custom selectors for these pages +} ``` -### Section Hierarchies +| Property | Type | Required | Description | +| ------------------ | ---------- | -------- | ---------------------------------------------------------- | +| `route` | `string` | ✅ | Glob pattern (`*` = single level, `**` = multiple levels). | +| `contentSelectors` | `string[]` | ❌ | Override content extraction for these routes. | -```typescript -import type { PluginOptions } from '@signalwire/docusaurus-plugin-llms-txt/public'; +#### RouteRule -const pluginConfig: PluginOptions = { - structure: { - sections: [ - { - id: 'api', - name: 'API Reference', - subsections: [ - { id: 'rest', name: 'REST API', routes: [{ route: '/api/rest/**' }] }, - { id: 'graphql', name: 'GraphQL', routes: [{ route: '/api/graphql/**' }] }, - ], - }, - ], - }, -}; +Customize content extraction for specific routes (separate from section assignment). + +```typescript +{ + route: '/api/**', + contentSelectors: ['.api-content', 'article'] +} ``` -### Performance Optimization +| Property | Type | Required | Description | +| ------------------ | ---------- | -------- | ------------------------------------- | +| `route` | `string` | ✅ | Glob pattern matching routes. | +| `contentSelectors` | `string[]` | ❌ | CSS selectors for content extraction. | -- Use `excludeRoutes` to skip unnecessary pages -- Set `includeVersionedDocs: false` to process only current version -- Enable `logLevel: 0` for production builds to reduce console output -- Use route-specific `contentSelectors` for pages with different structures +**Note:** Use `RouteRule` (in `markdown.routeRules`) for processing customization. Use +`SectionRoute` (in `sections[].routes`) for section assignment. -## CLI Commands +#### AttachmentFile -### Generate Command +Include external text files in your output. -```bash -npx docusaurus llms-txt [siteDir] +```typescript +{ + source: './specs/openapi.yaml', + title: 'API Specification', + description: 'Complete OpenAPI 3.0 spec', + fileName: 'api-spec', // Custom output filename (prevents collisions) + includeInFullTxt: true +} ``` -Generates `llms.txt` and markdown files using cached routes from a previous build. +| Property | Type | Default | Description | +| ------------------ | --------- | ------- | ----------------------------------------------------------------------------------------------------------------------- | +| `source` | `string` | - | File path relative to site root. | +| `title` | `string` | - | Display name in llms.txt. | +| `description` | `string` | - | Optional context about the file. | +| `fileName` | `string` | - | Custom output filename (without extension). If not provided, uses source filename. Auto-numbered if collision detected. | +| `includeInFullTxt` | `boolean` | `true` | Include full content in llms-full.txt. | -**Prerequisites:** You must run `npm run build` first to create the route cache. +#### OptionalLink -### Clean Command +Link to external resources. -```bash -npx docusaurus llms-txt-clean [siteDir] [options] +```typescript +{ + title: 'API Status Page', + url: 'https://status.example.com', + description: 'Real-time API status' +} ``` -Removes all generated files: +| Property | Type | Required | Description | +| ------------- | -------- | -------- | ---------------------------------- | +| `title` | `string` | ✅ | Link text shown in llms.txt. | +| `url` | `string` | ✅ | External URL (must be HTTP/HTTPS). | +| `description` | `string` | ❌ | Optional context about the link. | -- Deletes `build/llms.txt`, `build/llms-full.txt`, and all generated markdown files -- Use `--clear-cache` to also clear the `.docusaurus/llms-txt-plugin` cache directory -- Useful for forcing full regeneration or cleaning up after plugin removal - -## Copy Page Content Feature - -Add a copy button to your documentation pages with AI integration: +#### CopyPageContentOptions -```bash -# Install both packages -npm install @signalwire/docusaurus-plugin-llms-txt @signalwire/docusaurus-theme-llms-txt -``` +Configure the copy button feature (requires theme package). ```typescript -import type { Config } from '@docusaurus/types'; -import type { PluginOptions } from '@signalwire/docusaurus-plugin-llms-txt/public'; - -const config: Config = { - themes: ['@signalwire/docusaurus-theme-llms-txt'], - plugins: [ - [ - '@signalwire/docusaurus-plugin-llms-txt', - { - ui: { - copyPageContent: true, // Enable with defaults - }, - } satisfies PluginOptions, - ], - ], -}; - -export default config; +{ + buttonLabel: 'Copy Page', + display: { + docs: true, + excludeRoutes: ['/admin/**'] + }, + contentStrategy: 'prefer-markdown', + actions: { + viewMarkdown: true, + ai: { + chatGPT: true, + claude: { prompt: 'Help me understand this:' } + } + } +} ``` -**Important Notes:** - -- Currently only supports documentation pages (not blog or custom pages) -- Requires both the plugin AND theme package to be installed -- Button appears in the doc page header with dropdown for multiple actions - -## License +| Property | Type | Required | Default | Description | +| ----------------------- | ---------------------------------- | -------- | ------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `buttonLabel` | `string` | ❌ | `'Copy Page'` | Button text. | +| `display` | `object` | ❌ | `{}` | Control where button appears. | +| `display.docs` | `boolean` | ❌ | `true` | Show on docs pages. | +| `display.excludeRoutes` | `string[]` | ❌ | See [default excludes](#default-excluded-routes) | Hide copy button on specific routes. Defaults include `/search`, `/blog/tags/**`, etc. | +| `contentStrategy` | `'prefer-markdown' \| 'html-only'` | ❌ | `'prefer-markdown'` | Controls what content is copied. `'prefer-markdown'` copies markdown if available, falls back to HTML. `'html-only'` always copies HTML. Dropdown menu item shows "Copy Raw Markdown" or "Copy Raw HTML" accordingly. | +| `actions` | `object` | ❌ | `{}` | Available actions in dropdown. | +| `actions.viewMarkdown` | `boolean` | ❌ | `true` | Show "View Markdown" option in dropdown when markdown file exists. Independent of `contentStrategy`. | +| `actions.ai` | `object` | ❌ | `{}` | AI integration options. | +| `actions.ai.chatGPT` | `boolean \| { prompt?: string }` | ❌ | `true` | ChatGPT integration. Default prompt: "Analyze this documentation:" | +| `actions.ai.claude` | `boolean \| { prompt?: string }` | ❌ | `true` | Claude integration. Default prompt: "Analyze this documentation:" | MIT © [SignalWire](https://github.com/signalwire) diff --git a/packages/docusaurus-plugin-llms-txt/package.json b/packages/docusaurus-plugin-llms-txt/package.json index 11424cd..305305d 100644 --- a/packages/docusaurus-plugin-llms-txt/package.json +++ b/packages/docusaurus-plugin-llms-txt/package.json @@ -1,6 +1,6 @@ { "name": "@signalwire/docusaurus-plugin-llms-txt", - "version": "2.0.0-alpha.0", + "version": "2.0.0-alpha.6", "type": "module", "description": "Generate Markdown versions of Docusaurus HTML pages and an llms.txt index file", "main": "./lib/src/index.js", diff --git a/packages/docusaurus-plugin-llms-txt/src/cache/cache-filter.ts b/packages/docusaurus-plugin-llms-txt/src/cache/cache-filter.ts index a9a3df9..9b92a90 100644 --- a/packages/docusaurus-plugin-llms-txt/src/cache/cache-filter.ts +++ b/packages/docusaurus-plugin-llms-txt/src/cache/cache-filter.ts @@ -5,22 +5,22 @@ * LICENSE file in the root directory of this source tree. */ -import { getIncludeConfig } from '../config'; -import { CONTENT_TYPES } from '../constants'; +import { getLlmsTxtIncludeConfig } from '../config'; +import { CONTENT_TYPES, DEFAULT_EXCLUDE_ROUTES } from '../constants'; import { createExclusionMatcher } from '../discovery/exclusion-matcher'; +import type { IncludeFilterConfig } from '../discovery/content-classifier'; import type { CachedRouteInfo, PluginOptions, Logger } from '../types'; /** - * Filter cached routes based on current plugin configuration - * This provides unified filtering logic for both build and CLI contexts + * Filter cached routes based on include configuration + * @internal */ -export function filterCachedRoutesForConfig( +export function filterCachedRoutesByConfig( cachedRoutes: readonly CachedRouteInfo[], - config: PluginOptions, + includeConfig: IncludeFilterConfig, logger?: Logger ): CachedRouteInfo[] { - const includeConfig = getIncludeConfig(config); const isExcludedByPattern = createExclusionMatcher( includeConfig.excludeRoutes ); @@ -101,6 +101,66 @@ export function filterCachedRoutesForConfig( return filteredRoutes; } +/** + * Filter cached routes for indexing (llms.txt generation) + * Uses indexing configuration + */ +export function filterCachedRoutesForIndexing( + cachedRoutes: readonly CachedRouteInfo[], + config: PluginOptions, + logger?: Logger +): CachedRouteInfo[] { + const includeConfig = getLlmsTxtIncludeConfig(config); + return filterCachedRoutesByConfig(cachedRoutes, includeConfig, logger); +} + +/** + * Filter cached routes for processing (union of generate and indexing) + * Uses the union of both configs to ensure we have everything needed + */ +export function filterCachedRoutesForProcessing( + cachedRoutes: readonly CachedRouteInfo[], + config: PluginOptions, + logger?: Logger +): CachedRouteInfo[] { + const markdown = config.markdown ?? {}; + const llmsTxt = config.llmsTxt ?? {}; + + // Union of both configs + const unionConfig: IncludeFilterConfig = { + includeDocs: + (markdown.includeDocs ?? true) || (llmsTxt.includeDocs ?? true), + includeVersionedDocs: + (markdown.includeVersionedDocs ?? true) || + (llmsTxt.includeVersionedDocs ?? false), + includeBlog: + (markdown.includeBlog ?? false) || (llmsTxt.includeBlog ?? false), + includePages: + (markdown.includePages ?? false) || (llmsTxt.includePages ?? false), + includeGeneratedIndex: + (markdown.includeGeneratedIndex ?? true) || + (llmsTxt.includeGeneratedIndex ?? true), + excludeRoutes: [ + ...DEFAULT_EXCLUDE_ROUTES, + ...(markdown.excludeRoutes ?? []), + ...(llmsTxt.excludeRoutes ?? []), + ], + }; + + return filterCachedRoutesByConfig(cachedRoutes, unionConfig, logger); +} + +/** + * @deprecated Use filterCachedRoutesForIndexing or filterCachedRoutesForProcessing + */ +export function filterCachedRoutesForConfig( + cachedRoutes: readonly CachedRouteInfo[], + config: PluginOptions, + logger?: Logger +): CachedRouteInfo[] { + return filterCachedRoutesForIndexing(cachedRoutes, config, logger); +} + /** * Check if cache-based filtering would produce different results than * current cache @@ -115,7 +175,7 @@ export function wouldFilteringChangeCachedRoutes( filteredCount: number; changeReason?: string; } { - const filteredRoutes = filterCachedRoutesForConfig(cachedRoutes, config); + const filteredRoutes = filterCachedRoutesForIndexing(cachedRoutes, config); const wouldChange = filteredRoutes.length !== cachedRoutes.length; let changeReason: string | undefined; diff --git a/packages/docusaurus-plugin-llms-txt/src/cache/cache-validation.ts b/packages/docusaurus-plugin-llms-txt/src/cache/cache-validation.ts index e70a9fe..d829a7e 100644 --- a/packages/docusaurus-plugin-llms-txt/src/cache/cache-validation.ts +++ b/packages/docusaurus-plugin-llms-txt/src/cache/cache-validation.ts @@ -10,7 +10,7 @@ import path from 'path'; import { md5Hash } from '@docusaurus/utils'; import fs from 'fs-extra'; -import { getGenerateConfig } from '../config'; +import { getMarkdownConfig } from '../config'; import type { CachedRouteInfo, @@ -93,9 +93,9 @@ export async function markdownFileStateMatches( currentConfig: PluginOptions, directories: DirectoryConfig ): Promise { - const generateConfig = getGenerateConfig(currentConfig); + const generateConfig = getMarkdownConfig(currentConfig); - if (!generateConfig.enableMarkdownFiles) { + if (!generateConfig.enableFiles) { // If markdown files are disabled, we don't care about their state return { isValid: true }; } @@ -172,23 +172,19 @@ export async function hashFile(filePath: string): Promise { export function calcConfigHash(options: Partial): string { // Collect all options that affect output generation (not just filtering) const hashableOptions = { - // Output generation options (affect file creation) - generate: options.generate, + // Markdown file generation and processing options (affect file + // creation and transformation) + markdown: options.markdown, - // Structure options (affect content organization and headers) - structure: options.structure, - - // Processing options (affect content transformation) - processing: options.processing, + // llms.txt index options (affect what goes in llms.txt, structure, + // and attachments) + llmsTxt: options.llmsTxt, // UI options (affect output features) ui: options.ui, // Top-level runtime options that affect generation onSectionError: options.onSectionError, - - // Note: Filtering options (include.*) are excluded since they only affect - // which routes are processed, not how individual files are generated }; // Remove undefined values for stable hashing diff --git a/packages/docusaurus-plugin-llms-txt/src/cache/cache.ts b/packages/docusaurus-plugin-llms-txt/src/cache/cache.ts index 8d64a3f..67e9dd2 100644 --- a/packages/docusaurus-plugin-llms-txt/src/cache/cache.ts +++ b/packages/docusaurus-plugin-llms-txt/src/cache/cache.ts @@ -11,6 +11,8 @@ import packageJson from '../../package.json'; import { CACHE_FILENAME } from '../constants'; import { CacheIO } from './cache-io'; import { isCachedRouteValid, calcConfigHash } from './cache-validation'; +import { getMarkdownConfig } from '../config'; +import { getEffectiveConfigForRoute } from '../config/route-rules'; import { classifyRoute } from '../discovery/content-classifier'; import { routePathToHtmlPath } from '../discovery/route-filter'; import { PathManager, htmlPathToMdPath } from '../filesystem/paths'; @@ -31,6 +33,7 @@ export class CacheManager { private pathManager: PathManager; private cacheIO: CacheIO; private siteConfig?: { baseUrl: string; trailingSlash?: boolean }; + private config: PluginOptions; constructor( siteDir: string, @@ -42,6 +45,7 @@ export class CacheManager { ) { this.pathManager = new PathManager(siteDir, config, outDir); this.siteConfig = siteConfig; + this.config = config; const cacheDir = path.join(generatedFilesDir, 'docusaurus-plugin-llms-txt'); const cachePath = path.join(cacheDir, CACHE_FILENAME); this.cacheIO = new CacheIO(cachePath, logger); @@ -121,10 +125,24 @@ export class CacheManager { isGeneratedIndex, }; + // Resolve content selectors for this route + const effectiveConfig = getEffectiveConfigForRoute( + route.path, + this.config + ); + + // Get content selectors from effective config (route-specific or + // base config). If effectiveConfig has explicit contentSelectors, + // use them. Otherwise, use markdown config contentSelectors. + const markdownConfig = getMarkdownConfig(this.config); + const contentSelectors = + effectiveConfig.contentSelectors ?? markdownConfig.contentSelectors; + return { ...baseInfo, ...pluginInfo, ...metadata, + contentSelectors, } satisfies CachedRouteInfo; }); @@ -136,7 +154,7 @@ export class CacheManager { cachedRoute: CachedRouteInfo, doc: DocInfo, hash: string, - enableMarkdownFiles: boolean + enableFiles: boolean ): CachedRouteInfo { const baseUpdate = { ...cachedRoute, @@ -145,9 +163,9 @@ export class CacheManager { description: doc.description, }; - if (enableMarkdownFiles && doc.markdownFile) { + if (enableFiles && doc.markdownFile) { return { ...baseUpdate, markdownFile: doc.markdownFile }; - } else if (enableMarkdownFiles && cachedRoute.htmlPath) { + } else if (enableFiles && cachedRoute.htmlPath) { const mdPath = htmlPathToMdPath( cachedRoute.htmlPath, this.pathManager.directories.mdOutDir diff --git a/packages/docusaurus-plugin-llms-txt/src/cli/command.ts b/packages/docusaurus-plugin-llms-txt/src/cli/command.ts index 7c3c403..90a1ce7 100644 --- a/packages/docusaurus-plugin-llms-txt/src/cli/command.ts +++ b/packages/docusaurus-plugin-llms-txt/src/cli/command.ts @@ -9,7 +9,7 @@ import path from 'path'; import fs from 'fs-extra'; import { CacheManager } from '../cache/cache'; -import { getConfig, getProcessingConfig, getGenerateConfig } from '../config'; +import { getConfig, getMarkdownConfig } from '../config'; import { LLMS_TXT_FILENAME, LLMS_FULL_TXT_FILENAME, @@ -35,8 +35,7 @@ async function runCliConversion( context: LoadContext ): Promise { const config = getConfig(options); - const processingConfig = getProcessingConfig(config); - const generateConfig = getGenerateConfig(config); + const markdownConfig = getMarkdownConfig(config); const log = createPluginLogger(config); try { @@ -51,8 +50,8 @@ async function runCliConversion( siteConfig: context.siteConfig, outDir: context.outDir, logger: log, - contentSelectors: processingConfig.contentSelectors, - relativePaths: generateConfig.relativePaths, + contentSelectors: markdownConfig.contentSelectors, + relativePaths: markdownConfig.relativePaths, } ); diff --git a/packages/docusaurus-plugin-llms-txt/src/config/gfm-resolver.ts b/packages/docusaurus-plugin-llms-txt/src/config/gfm-resolver.ts index 4458899..680dc32 100644 --- a/packages/docusaurus-plugin-llms-txt/src/config/gfm-resolver.ts +++ b/packages/docusaurus-plugin-llms-txt/src/config/gfm-resolver.ts @@ -7,58 +7,55 @@ import { DEFAULT_GFM } from '../constants'; -import type { PluginOptions, ProcessingOptions } from '../types'; +import type { PluginOptions, MarkdownOptions } from '../types'; /** * Resolve GFM configuration with defaults */ -export function resolveGfmConfig( - processing: ProcessingOptions -): ProcessingOptions { - if (!processing.remarkGfm) { - return processing; +export function resolveGfmConfig(markdown: MarkdownOptions): MarkdownOptions { + if (!markdown.remarkGfm) { + return markdown; } - if (processing.remarkGfm === true) { + if (markdown.remarkGfm === true) { return { - ...processing, + ...markdown, remarkGfm: DEFAULT_GFM, }; } - if (typeof processing.remarkGfm === 'object') { + if (typeof markdown.remarkGfm === 'object') { return { - ...processing, + ...markdown, remarkGfm: { ...DEFAULT_GFM, - ...processing.remarkGfm, + ...markdown.remarkGfm, }, }; } - return processing; + return markdown; } /** * Apply GFM configuration to plugin options */ export function applyGfmConfiguration(options: PluginOptions): PluginOptions { - const processing = options.processing ?? {}; + const markdown = options.markdown ?? {}; if ( !( - processing.remarkGfm === true || - (typeof processing.remarkGfm === 'object' && - processing.remarkGfm !== null) + markdown.remarkGfm === true || + (typeof markdown.remarkGfm === 'object' && markdown.remarkGfm !== null) ) ) { return options; } - const resolvedProcessing = resolveGfmConfig(processing); + const resolvedMarkdown = resolveGfmConfig(markdown); return { ...options, - processing: resolvedProcessing, + markdown: resolvedMarkdown, }; } diff --git a/packages/docusaurus-plugin-llms-txt/src/config/index.ts b/packages/docusaurus-plugin-llms-txt/src/config/index.ts index e023e24..59b3ab6 100644 --- a/packages/docusaurus-plugin-llms-txt/src/config/index.ts +++ b/packages/docusaurus-plugin-llms-txt/src/config/index.ts @@ -4,7 +4,7 @@ * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ -import { VALIDATION_MESSAGES } from '../constants'; +import { DEFAULT_EXCLUDE_ROUTES, VALIDATION_MESSAGES } from '../constants'; import { createConfigError } from '../errors'; import { pluginOptionsSchema } from '../types'; import { ensureLeadingSlash } from '../utils'; @@ -15,10 +15,8 @@ import { validateSections } from './section-validator'; import type { PluginOptions, EffectiveConfig, - GenerateOptions, - IncludeOptions, - StructureOptions, - ProcessingOptions, + MarkdownOptions, + LlmsTxtOptions, UiOptions, } from '../types'; @@ -39,12 +37,14 @@ export function getConfig(options: Partial): PluginOptions { const validated = validationResult.value; - // Validate section configuration using new structure - if (validated.structure?.sections) { + // Validate section configuration (now in llmsTxt config) + if (validated.llmsTxt?.sections) { + // Note: validateSections will collect attachments from both + // global llmsTxt.attachments and section-specific attachments validateSections( - validated.structure.sections, - validated.processing?.routeRules, - validated.processing?.attachments + validated.llmsTxt.sections, + validated.markdown?.routeRules, + validated.llmsTxt?.attachments ); } @@ -67,74 +67,122 @@ export function getConfig(options: Partial): PluginOptions { export { validateUserInputs } from './security-validator'; /** - * Get output generation configuration with defaults applied + * Get markdown generation configuration with defaults applied * @internal */ -export function getGenerateConfig( +export function getMarkdownConfig( config: PluginOptions -): Required { - const generate = config.generate ?? {}; +): Required { + const markdown = config.markdown ?? {}; return { - enableMarkdownFiles: generate.enableMarkdownFiles ?? true, - enableLlmsFullTxt: generate.enableLlmsFullTxt ?? false, - relativePaths: generate.relativePaths ?? true, + enableFiles: markdown.enableFiles ?? true, + relativePaths: markdown.relativePaths ?? true, + includeDocs: markdown.includeDocs ?? true, + includeVersionedDocs: markdown.includeVersionedDocs ?? true, + includeBlog: markdown.includeBlog ?? false, + includePages: markdown.includePages ?? false, + includeGeneratedIndex: markdown.includeGeneratedIndex ?? true, + excludeRoutes: [ + ...DEFAULT_EXCLUDE_ROUTES, + ...(markdown.excludeRoutes ?? []), + ], + // Content extraction and processing + contentSelectors: markdown.contentSelectors ?? [], + routeRules: markdown.routeRules ?? [], + // Markdown processing options + remarkStringify: markdown.remarkStringify ?? {}, + remarkGfm: markdown.remarkGfm ?? true, + rehypeProcessTables: markdown.rehypeProcessTables ?? true, + // Unified plugin system + beforeDefaultRehypePlugins: markdown.beforeDefaultRehypePlugins ?? [], + rehypePlugins: markdown.rehypePlugins ?? [], + beforeDefaultRemarkPlugins: markdown.beforeDefaultRemarkPlugins ?? [], + remarkPlugins: markdown.remarkPlugins ?? [], }; } /** - * Get content inclusion configuration with defaults applied + * Get filtering config for markdown generation * @internal */ -export function getIncludeConfig( - config: PluginOptions -): Required { - const include = config.include ?? {}; +export function getMarkdownIncludeConfig(config: PluginOptions): { + includeDocs: boolean; + includeVersionedDocs: boolean; + includeBlog: boolean; + includePages: boolean; + includeGeneratedIndex: boolean; + excludeRoutes: readonly string[]; +} { + const markdown = config.markdown ?? {}; return { - includeBlog: include.includeBlog ?? false, - includePages: include.includePages ?? false, - includeDocs: include.includeDocs ?? true, - includeVersionedDocs: include.includeVersionedDocs ?? true, - includeGeneratedIndex: include.includeGeneratedIndex ?? true, - excludeRoutes: include.excludeRoutes ?? [], + includeDocs: markdown.includeDocs ?? true, + includeVersionedDocs: markdown.includeVersionedDocs ?? true, + includeBlog: markdown.includeBlog ?? false, + includePages: markdown.includePages ?? false, + includeGeneratedIndex: markdown.includeGeneratedIndex ?? true, + excludeRoutes: [ + ...DEFAULT_EXCLUDE_ROUTES, + ...(markdown.excludeRoutes ?? []), + ], }; } /** - * Get content structure configuration with defaults applied + * Get llms.txt configuration with defaults applied * @internal */ -export function getStructureConfig( +export function getLlmsTxtConfig( config: PluginOptions -): Required { - const structure = config.structure ?? {}; +): Omit, 'autoSectionPosition'> & + Pick { + const llmsTxt = config.llmsTxt ?? {}; return { - sections: structure.sections ?? [], - siteTitle: structure.siteTitle ?? '', - siteDescription: structure.siteDescription ?? '', - enableDescriptions: structure.enableDescriptions ?? true, - optionalLinks: structure.optionalLinks ?? [], + enableLlmsFullTxt: llmsTxt.enableLlmsFullTxt ?? false, + includeDocs: llmsTxt.includeDocs ?? true, + includeVersionedDocs: llmsTxt.includeVersionedDocs ?? false, + includeBlog: llmsTxt.includeBlog ?? false, + includePages: llmsTxt.includePages ?? false, + includeGeneratedIndex: llmsTxt.includeGeneratedIndex ?? true, + excludeRoutes: [ + ...DEFAULT_EXCLUDE_ROUTES, + ...(llmsTxt.excludeRoutes ?? []), + ], + // Structure and organization + sections: llmsTxt.sections ?? [], + siteTitle: llmsTxt.siteTitle ?? '', + siteDescription: llmsTxt.siteDescription ?? '', + enableDescriptions: llmsTxt.enableDescriptions ?? true, + autoSectionDepth: llmsTxt.autoSectionDepth ?? 1, + autoSectionPosition: llmsTxt.autoSectionPosition, + optionalLinks: llmsTxt.optionalLinks ?? [], + // Attachments + attachments: llmsTxt.attachments ?? [], }; } /** - * Get content processing configuration with defaults applied + * Get filtering config for llms.txt indexing * @internal */ -export function getProcessingConfig( - config: PluginOptions -): Required { - const processing = config.processing ?? {}; +export function getLlmsTxtIncludeConfig(config: PluginOptions): { + includeDocs: boolean; + includeVersionedDocs: boolean; + includeBlog: boolean; + includePages: boolean; + includeGeneratedIndex: boolean; + excludeRoutes: readonly string[]; +} { + const llmsTxt = config.llmsTxt ?? {}; return { - contentSelectors: processing.contentSelectors ?? [], - routeRules: processing.routeRules ?? [], - attachments: processing.attachments ?? [], - remarkStringify: processing.remarkStringify ?? {}, - remarkGfm: processing.remarkGfm ?? true, - rehypeProcessTables: processing.rehypeProcessTables ?? true, - beforeDefaultRehypePlugins: processing.beforeDefaultRehypePlugins ?? [], - rehypePlugins: processing.rehypePlugins ?? [], - beforeDefaultRemarkPlugins: processing.beforeDefaultRemarkPlugins ?? [], - remarkPlugins: processing.remarkPlugins ?? [], + includeDocs: llmsTxt.includeDocs ?? true, + includeVersionedDocs: llmsTxt.includeVersionedDocs ?? false, + includeBlog: llmsTxt.includeBlog ?? false, + includePages: llmsTxt.includePages ?? false, + includeGeneratedIndex: llmsTxt.includeGeneratedIndex ?? true, + excludeRoutes: [ + ...DEFAULT_EXCLUDE_ROUTES, + ...(llmsTxt.excludeRoutes ?? []), + ], }; } @@ -149,6 +197,61 @@ export function getUiConfig(config: PluginOptions): Required { }; } +/** + * Collect all attachments from config (global + section-specific) with + * their sectionIds + * @internal + */ +export function collectAllAttachments( + config: PluginOptions +): Array { + const llmsTxt = config.llmsTxt ?? {}; + const attachmentsWithSections: Array< + import('../types').AttachmentFile & { sectionId: string } + > = []; + + // Helper to recursively collect from sections + function collectFromSections( + sections: readonly import('../types').SectionDefinition[] | undefined + ): void { + if (!sections) { + return; + } + + for (const section of sections) { + // Add section-specific attachments + if (section.attachments) { + for (const attachment of section.attachments) { + attachmentsWithSections.push({ + ...attachment, + sectionId: section.id, + }); + } + } + + // Recursively collect from subsections + if (section.subsections) { + collectFromSections(section.subsections); + } + } + } + + // Collect from all sections (including subsections) + collectFromSections(llmsTxt.sections); + + // Add global attachments (they get their own auto-generated section) + if (llmsTxt.attachments) { + for (const attachment of llmsTxt.attachments) { + attachmentsWithSections.push({ + ...attachment, + sectionId: 'attachments', // Auto-generated section ID for global attachments + }); + } + } + + return attachmentsWithSections; +} + /** * Gets config effective for a specific route, applying any matching route rules * Uses new section-based route resolution with precedence logic diff --git a/packages/docusaurus-plugin-llms-txt/src/config/route-rules.ts b/packages/docusaurus-plugin-llms-txt/src/config/route-rules.ts index a549244..9d11cd5 100644 --- a/packages/docusaurus-plugin-llms-txt/src/config/route-rules.ts +++ b/packages/docusaurus-plugin-llms-txt/src/config/route-rules.ts @@ -8,7 +8,7 @@ import { createMatcher } from '@docusaurus/utils'; import { ensureLeadingSlash, generateSectionId } from '../utils'; -import { getStructureConfig, getProcessingConfig } from './index'; +import { getLlmsTxtConfig, getMarkdownConfig } from './index'; import type { RouteRule, @@ -107,11 +107,11 @@ export function resolveRouteConfiguration( baseConfig: PluginOptions ): EffectiveConfig { // Get all config groups using the new structure - const structureConfig = getStructureConfig(baseConfig); - const processingConfig = getProcessingConfig(baseConfig); + const llmsTxtConfig = getLlmsTxtConfig(baseConfig); + const markdownConfig = getMarkdownConfig(baseConfig); // 1. Check section routes first (most specific) - const sectionMatch = findMatchingSectionRoute(path, structureConfig.sections); + const sectionMatch = findMatchingSectionRoute(path, llmsTxtConfig.sections); if (sectionMatch) { return { ...baseConfig, @@ -126,7 +126,7 @@ export function resolveRouteConfiguration( // 2. Check global rules (less specific) const globalMatch = findMostSpecificGlobalRule( path, - processingConfig.routeRules + markdownConfig.routeRules ); if (globalMatch) { return { diff --git a/packages/docusaurus-plugin-llms-txt/src/config/section-validator.ts b/packages/docusaurus-plugin-llms-txt/src/config/section-validator.ts index 8b8205d..acc3b22 100644 --- a/packages/docusaurus-plugin-llms-txt/src/config/section-validator.ts +++ b/packages/docusaurus-plugin-llms-txt/src/config/section-validator.ts @@ -59,8 +59,8 @@ function validateNoDuplicateIds(sections: readonly SectionDefinition[]): void { */ function validateSectionReferences( sections: readonly SectionDefinition[], - routeRules?: readonly RouteRule[], - attachments?: readonly AttachmentFile[] + _routeRules?: readonly RouteRule[], + _attachments?: readonly AttachmentFile[] ): void { const validSectionIds = new Set(collectAllSectionIds(sections)); const invalidReferences: string[] = []; @@ -68,14 +68,9 @@ function validateSectionReferences( // Global route rules no longer reference sections directly // Section route validation is handled separately - // Check attachment references - attachments?.forEach((attachment) => { - if (attachment.sectionId && !validSectionIds.has(attachment.sectionId)) { - invalidReferences.push( - `Attachment '${attachment.source}' references non-existent section '${attachment.sectionId}'` - ); - } - }); + // Note: Global attachments (passed as parameter) no longer have + // sectionId. They will be assigned to an auto-generated section. + // Section-specific attachments are validated within sections. if (invalidReferences.length > 0) { throw new Error( diff --git a/packages/docusaurus-plugin-llms-txt/src/constants.ts b/packages/docusaurus-plugin-llms-txt/src/constants.ts index 2bb2e80..da17d17 100644 --- a/packages/docusaurus-plugin-llms-txt/src/constants.ts +++ b/packages/docusaurus-plugin-llms-txt/src/constants.ts @@ -173,6 +173,23 @@ export const ROOT_ROUTE_PATH = '/' as const; /** @internal */ export const INDEX_ROUTE_PATH = '/index.md' as const; +/** + * Default routes to exclude from processing across all features + * These are common Docusaurus-generated pages that typically don't need + * markdown files, shouldn't appear in llms.txt, or show a copy button + */ +export const DEFAULT_EXCLUDE_ROUTES = [ + '/search', // Search page + '/404.html', // 404 page + '/tags', // Global tags index + '/tags/**', // Individual tag pages + '/blog/tags', // Blog tags index + '/blog/tags/**', // Individual blog tag pages + '/blog/archive', // Blog archive + '/blog/authors', // Blog authors index + '/blog/authors/**', // Individual author pages +] as const; + // Default content selectors - public for user reference export const DEFAULT_CONTENT_SELECTORS = [ '.theme-doc-markdown', diff --git a/packages/docusaurus-plugin-llms-txt/src/copy-button/json-generator.ts b/packages/docusaurus-plugin-llms-txt/src/copy-button/json-generator.ts index 3de9f57..5effd52 100644 --- a/packages/docusaurus-plugin-llms-txt/src/copy-button/json-generator.ts +++ b/packages/docusaurus-plugin-llms-txt/src/copy-button/json-generator.ts @@ -9,20 +9,54 @@ import * as path from 'path'; import * as fs from 'fs-extra'; -import type { CachedRouteInfo, Logger } from '../types'; +import { + DEFAULT_CONTENT_SELECTORS, + DEFAULT_EXCLUDE_ROUTES, +} from '../constants'; +import { createExclusionMatcher } from '../discovery/exclusion-matcher'; + +import type { CachedRouteInfo, Logger, PluginOptions } from '../types'; export async function generateCopyContentJson( processedRoutes: CachedRouteInfo[], filePath: string, - logger: Logger + logger: Logger, + config: PluginOptions ): Promise { try { - // Build minimal data structure: route path → boolean - const copyContentData: Record = {}; + // Get UI config for copy button display settings + const uiConfig = + typeof config.ui?.copyPageContent === 'object' + ? config.ui.copyPageContent + : {}; + const excludeRoutes = [ + ...DEFAULT_EXCLUDE_ROUTES, + ...(uiConfig.display?.excludeRoutes ?? []), + ]; + + // Create exclusion matcher for server-side filtering + const isExcluded = createExclusionMatcher(excludeRoutes); + + // Build data structure: route path → route info with content selectors + const copyContentData: Record< + string, + { + shouldDisplay: boolean; + hasMarkdown: boolean; + contentSelectors: readonly string[]; + } + > = {}; for (const route of processedRoutes) { - // Only routes with markdownFile have content available for copying - copyContentData[route.path] = Boolean(route.markdownFile); + copyContentData[route.path] = { + // Check if route should display button (based on excludeRoutes) + shouldDisplay: !isExcluded(route.path), + // Routes with markdownFile have markdown content available + hasMarkdown: Boolean(route.markdownFile), + // Always include content selectors for HTML fallback extraction + // Use route-specific selectors if defined, otherwise use defaults + contentSelectors: route.contentSelectors ?? DEFAULT_CONTENT_SELECTORS, + }; } await fs.writeFile(filePath, JSON.stringify(copyContentData, null, 2)); diff --git a/packages/docusaurus-plugin-llms-txt/src/discovery/content-classifier.ts b/packages/docusaurus-plugin-llms-txt/src/discovery/content-classifier.ts index 693e66b..b0b9c72 100644 --- a/packages/docusaurus-plugin-llms-txt/src/discovery/content-classifier.ts +++ b/packages/docusaurus-plugin-llms-txt/src/discovery/content-classifier.ts @@ -5,7 +5,6 @@ * LICENSE file in the root directory of this source tree. */ -import { getIncludeConfig } from '../config'; import { DOCUSAURUS_BLOG_PLUGIN, DOCUSAURUS_PAGES_PLUGIN, @@ -16,6 +15,18 @@ import { import type { PluginOptions } from '../types'; import type { PluginRouteConfig } from '@docusaurus/types'; +/** + * Generic include configuration for filtering + */ +export interface IncludeFilterConfig { + includeDocs: boolean; + includeVersionedDocs: boolean; + includeBlog: boolean; + includePages: boolean; + includeGeneratedIndex: boolean; + excludeRoutes: readonly string[]; +} + /** * Classify a route by its plugin type, with fallback heuristics for routes * without plugin info @@ -70,29 +81,28 @@ function classifyRouteByHeuristics(route: PluginRouteConfig): ContentType { } /** - * Determines if a route should be processed based on plugin configuration + * Determines if a route should be included based on filter configuration * @internal */ -export function shouldProcessRoute( +export function shouldIncludeRoute( route: PluginRouteConfig, - options: PluginOptions + filterConfig: IncludeFilterConfig ): boolean { - const includeConfig = getIncludeConfig(options); const routeType = classifyRoute(route); // First check if this content type should be included let shouldIncludeType = false; switch (routeType) { case CONTENT_TYPES.BLOG: - shouldIncludeType = includeConfig.includeBlog; + shouldIncludeType = filterConfig.includeBlog; break; case CONTENT_TYPES.PAGES: - shouldIncludeType = includeConfig.includePages; + shouldIncludeType = filterConfig.includePages; break; case CONTENT_TYPES.DOCS: case CONTENT_TYPES.UNKNOWN: default: - shouldIncludeType = includeConfig.includeDocs; + shouldIncludeType = filterConfig.includeDocs; break; } @@ -103,7 +113,7 @@ export function shouldProcessRoute( // For docs routes, check versioned docs filtering if ( (routeType === CONTENT_TYPES.DOCS || routeType === CONTENT_TYPES.UNKNOWN) && - includeConfig.includeVersionedDocs === false + filterConfig.includeVersionedDocs === false ) { // Check if this is a versioned docs route (not current version) const isVersionedRoute = @@ -120,7 +130,7 @@ export function shouldProcessRoute( } // Check if this is a generated category index page - if (includeConfig.includeGeneratedIndex === false) { + if (filterConfig.includeGeneratedIndex === false) { // Generated index pages have a categoryGeneratedIndex prop if (route.props?.categoryGeneratedIndex !== undefined) { return false; // Skip generated index pages when includeGeneratedIndex is false @@ -129,3 +139,38 @@ export function shouldProcessRoute( return true; } + +/** + * Determines if a route should be processed based on plugin configuration + * This uses the union of generate and indexing configs to ensure we process + * everything that might be needed for either use case + * @internal + * @deprecated Use shouldIncludeRoute with specific filter config instead + */ +export function shouldProcessRoute( + route: PluginRouteConfig, + options: PluginOptions +): boolean { + // For processing, we need to include routes that are in EITHER + // markdown OR llmsTxt + const markdown = options.markdown ?? {}; + const llmsTxt = options.llmsTxt ?? {}; + + const unionConfig: IncludeFilterConfig = { + includeDocs: + (markdown.includeDocs ?? true) || (llmsTxt.includeDocs ?? true), + includeVersionedDocs: + (markdown.includeVersionedDocs ?? true) || + (llmsTxt.includeVersionedDocs ?? false), + includeBlog: + (markdown.includeBlog ?? false) || (llmsTxt.includeBlog ?? false), + includePages: + (markdown.includePages ?? false) || (llmsTxt.includePages ?? false), + includeGeneratedIndex: + (markdown.includeGeneratedIndex ?? true) || + (llmsTxt.includeGeneratedIndex ?? true), + excludeRoutes: [], // Exclusions handled separately + }; + + return shouldIncludeRoute(route, unionConfig); +} diff --git a/packages/docusaurus-plugin-llms-txt/src/generation/full-index-builder.ts b/packages/docusaurus-plugin-llms-txt/src/generation/full-index-builder.ts index 22e4d00..2c54cf5 100644 --- a/packages/docusaurus-plugin-llms-txt/src/generation/full-index-builder.ts +++ b/packages/docusaurus-plugin-llms-txt/src/generation/full-index-builder.ts @@ -9,7 +9,7 @@ import path from 'path'; import fs from 'fs-extra'; -import { getStructureConfig, getProcessingConfig } from '../config'; +import { getLlmsTxtConfig, getMarkdownConfig } from '../config'; import { buildUnifiedDocumentTree } from './index-builder'; import { processMarkdownForFullContent, @@ -44,11 +44,11 @@ export async function buildLlmsFullTxtContent( (doc) => doc.routePath === '/' || doc.routePath === '/index' ); - const structureConfig = getStructureConfig(config); - const processingConfig = getProcessingConfig(config); + const llmsTxtConfig = getLlmsTxtConfig(config); + const markdownConfig = getMarkdownConfig(config); const documentTitle = - structureConfig.siteTitle || + llmsTxtConfig.siteTitle || siteConfig.title || rootDoc?.title || 'Documentation'; @@ -56,13 +56,13 @@ export async function buildLlmsFullTxtContent( let content = `# ${documentTitle}\n\n`; // Add description if available - const description = structureConfig.siteDescription || rootDoc?.description; + const description = llmsTxtConfig.siteDescription || rootDoc?.description; if (description) { content += `> ${description}\n\n`; } - // Get remarkStringify options from processing config - const remarkStringifyOptions = processingConfig.remarkStringify; + // Get remarkStringify options from markdown config + const remarkStringifyOptions = markdownConfig.remarkStringify; // Build unified document tree for category organization const { tree } = buildUnifiedDocumentTree(docs, config, attachments); diff --git a/packages/docusaurus-plugin-llms-txt/src/generation/index-builder.ts b/packages/docusaurus-plugin-llms-txt/src/generation/index-builder.ts index 8daf6d2..27ca731 100644 --- a/packages/docusaurus-plugin-llms-txt/src/generation/index-builder.ts +++ b/packages/docusaurus-plugin-llms-txt/src/generation/index-builder.ts @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -import { getStructureConfig, getGenerateConfig } from '../config'; +import { getLlmsTxtConfig, getMarkdownConfig } from '../config'; import { ROOT_ROUTE_PATH, INDEX_ROUTE_PATH, @@ -36,17 +36,18 @@ export function buildLlmsTxtContent( ); // Get configuration groups - const structureConfig = getStructureConfig(config); - const generateConfig = getGenerateConfig(config); + const llmsTxtConfig = getLlmsTxtConfig(config); + const markdownConfig = getMarkdownConfig(config); // Generate configuration values const documentTitle = - structureConfig.siteTitle || + llmsTxtConfig.siteTitle || siteConfig.title || rootDoc?.title || DEFAULT_SITE_TITLE; - const enableDescriptions = structureConfig.enableDescriptions; - const useRelativePaths = generateConfig.relativePaths; + const enableDescriptions = llmsTxtConfig.enableDescriptions; + const autoSectionDepth = llmsTxtConfig.autoSectionDepth ?? 1; + const useRelativePaths = markdownConfig.relativePaths; const siteUrl = siteConfig.url + (siteConfig.baseUrl !== '/' ? siteConfig.baseUrl : ''); @@ -55,7 +56,7 @@ export function buildLlmsTxtContent( // Add description if enabled and available if (enableDescriptions) { - const description = structureConfig.siteDescription || rootDoc?.description; + const description = llmsTxtConfig.siteDescription || rootDoc?.description; if (description) { content += `> ${description}\n\n`; } @@ -65,7 +66,7 @@ export function buildLlmsTxtContent( if (rootDoc) { const formatOptions: Parameters[1] = { relativePaths: useRelativePaths, - enableMarkdownFiles: generateConfig.enableMarkdownFiles, + enableFiles: markdownConfig.enableFiles, }; if (rootDoc.markdownFile) { @@ -81,22 +82,23 @@ export function buildLlmsTxtContent( } // Add main content (filter out root pages to prevent duplication) + // Pass autoSectionDepth to calculate heading levels based on route depth content += renderTreeAsMarkdown( tree, - 2, + autoSectionDepth, true, siteUrl, useRelativePaths, - generateConfig.enableMarkdownFiles, + markdownConfig.enableFiles, enableDescriptions ); // Add ALL optional links to Optional section // Optional links should never appear in other categories, regardless of // categoryId - if (structureConfig.optionalLinks?.length) { + if (llmsTxtConfig.optionalLinks?.length) { content += `\n## Optional\n`; - for (const link of structureConfig.optionalLinks) { + for (const link of llmsTxtConfig.optionalLinks) { const descPart = enableDescriptions && link.description ? `: ${link.description}` : ''; content += `- [${link.title}](${link.url})${descPart}\n`; diff --git a/packages/docusaurus-plugin-llms-txt/src/generation/output-generator.ts b/packages/docusaurus-plugin-llms-txt/src/generation/output-generator.ts index b2a40e8..00ef9b6 100644 --- a/packages/docusaurus-plugin-llms-txt/src/generation/output-generator.ts +++ b/packages/docusaurus-plugin-llms-txt/src/generation/output-generator.ts @@ -7,18 +7,27 @@ import path from 'path'; -import { getGenerateConfig } from '../config'; +import { getLlmsTxtIncludeConfig, getLlmsTxtConfig } from '../config'; import { - LLMS_TXT_FILENAME, + CONTENT_TYPES, LLMS_FULL_TXT_FILENAME, + LLMS_TXT_FILENAME, PROCESSING_MESSAGES, } from '../constants'; import { buildLlmsFullTxtContent } from './full-index-builder'; import { buildLlmsTxtContent, buildUnifiedDocumentTree } from './index-builder'; import { saveMarkdownFile } from './markdown-writer'; +import { createExclusionMatcher } from '../discovery/exclusion-matcher'; import type { ProcessedAttachment } from '../processing/attachment-processor'; -import type { DocInfo, PluginOptions, Logger, DirectoryConfig } from '../types'; +import type { + CachedRouteInfo, + CacheSchema, + DirectoryConfig, + DocInfo, + Logger, + PluginOptions, +} from '../types'; /** * Output generation result @@ -29,6 +38,90 @@ export interface OutputResult { readonly contentLength: number; } +/** + * Filter docs based on indexing configuration + * This determines what appears in the main llms.txt file + * @internal + */ +function filterDocsForIndexing( + docs: DocInfo[], + config: PluginOptions, + cache: CacheSchema, + logger: Logger +): DocInfo[] { + const indexingConfig = getLlmsTxtIncludeConfig(config); + const isExcluded = createExclusionMatcher(indexingConfig.excludeRoutes); + + // Build lookup from cache + const routeLookup = new Map(); + for (const route of cache.routes) { + routeLookup.set(route.path, route); + } + + const filtered = docs.filter((doc) => { + // Check exclusion patterns + if (isExcluded(doc.routePath)) { + logger.debug(`Excluding from llms.txt (pattern): ${doc.routePath}`); + return false; + } + + // Get cached route info for metadata + const cachedRoute = routeLookup.get(doc.routePath); + if (!cachedRoute) { + // If no cached route info, include by default (shouldn't happen) + return true; + } + + // Check content type + switch (cachedRoute.contentType) { + case CONTENT_TYPES.BLOG: + if (!indexingConfig.includeBlog) { + logger.debug(`Excluding from llms.txt (blog): ${doc.routePath}`); + return false; + } + break; + case CONTENT_TYPES.PAGES: + if (!indexingConfig.includePages) { + logger.debug(`Excluding from llms.txt (pages): ${doc.routePath}`); + return false; + } + break; + case CONTENT_TYPES.DOCS: + case CONTENT_TYPES.UNKNOWN: + default: + if (!indexingConfig.includeDocs) { + logger.debug(`Excluding from llms.txt (docs): ${doc.routePath}`); + return false; + } + break; + } + + // Check versioned docs + if (cachedRoute.isVersioned && !indexingConfig.includeVersionedDocs) { + logger.debug(`Excluding from llms.txt (versioned): ${doc.routePath}`); + return false; + } + + // Check generated index + if (cachedRoute.isGeneratedIndex && !indexingConfig.includeGeneratedIndex) { + logger.debug( + `Excluding from llms.txt (generated index): ${doc.routePath}` + ); + return false; + } + + return true; + }); + + if (filtered.length < docs.length) { + logger.info( + `Filtered for llms.txt: ${filtered.length}/${docs.length} docs included` + ); + } + + return filtered; +} + /** * Generate and save output files */ @@ -38,7 +131,8 @@ export async function generateOutputFiles( siteConfig: { title?: string; url: string; baseUrl: string }, directories: DirectoryConfig, logger: Logger, - attachments?: readonly ProcessedAttachment[] + attachments?: readonly ProcessedAttachment[], + cache?: CacheSchema ): Promise { if (docs.length === 0) { logger.info(PROCESSING_MESSAGES.NO_DOCUMENTS); @@ -48,12 +142,18 @@ export async function generateOutputFiles( }; } - // Build the unified tree first (used by llms.txt) - buildUnifiedDocumentTree(docs, config, attachments); + // Filter docs for indexing (llms.txt) if cache is available + const docsForIndexing = + cache && cache.routes.length > 0 + ? filterDocsForIndexing(docs, config, cache, logger) + : docs; + + // Build the unified tree first (used by llms.txt) with filtered docs + buildUnifiedDocumentTree(docsForIndexing, config, attachments); - // Build llms.txt content using the tree + // Build llms.txt content using the tree with filtered docs const llmsTxtContent = buildLlmsTxtContent( - docs, + docsForIndexing, config, siteConfig, attachments @@ -68,17 +168,17 @@ export async function generateOutputFiles( await saveMarkdownFile(llmsTxtPath, llmsTxtContent); logger.debug(`Successfully saved llms.txt`); - const totalItems = docs.length + (attachments?.length ?? 0); + const totalItems = docsForIndexing.length + (attachments?.length ?? 0); logger.info( - `Generated llms.txt with ${docs.length} documents${attachments?.length ? ` and ${attachments.length} attachments` : ''}` + `Generated llms.txt with ${docsForIndexing.length} documents${attachments?.length ? ` and ${attachments.length} attachments` : ''}` ); let llmsFullTxtPath: string | undefined; let totalContentLength = llmsTxtContent.length; // Generate llms-full.txt if enabled - const generateConfig = getGenerateConfig(config); - if (generateConfig.enableLlmsFullTxt) { + const llmsTxtConfig = getLlmsTxtConfig(config); + if (llmsTxtConfig.enableLlmsFullTxt) { const llmsFullTxtContent = await buildLlmsFullTxtContent( docs, config, diff --git a/packages/docusaurus-plugin-llms-txt/src/index.ts b/packages/docusaurus-plugin-llms-txt/src/index.ts index 58918ce..a063245 100644 --- a/packages/docusaurus-plugin-llms-txt/src/index.ts +++ b/packages/docusaurus-plugin-llms-txt/src/index.ts @@ -4,16 +4,16 @@ * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ -import { flattenRoutes } from '@docusaurus/utils'; +import { flattenRoutes, normalizeUrl } from '@docusaurus/utils'; import * as fs from 'fs-extra'; import { registerLlmsTxt, registerLlmsTxtClean } from './cli/command'; import { getConfig, - getProcessingConfig, - getGenerateConfig, + getMarkdownConfig, getUiConfig, validateUserInputs, + collectAllAttachments, } from './config'; import { ERROR_MESSAGES, PLUGIN_NAME } from './constants'; import { generateCopyContentJson } from './copy-button/json-generator'; @@ -222,7 +222,10 @@ export default function llmsTxtPlugin( // Only add URL if copy content is enabled if (uiConfig.copyPageContent !== false) { - globalData.copyContentDataUrl = `/assets/llms-txt/copy-content-data.${buildTimestamp}.json?v=${Date.now()}`; + // Construct data URL with baseUrl for proper routing + const dataPath = `/assets/llms-txt/copy-content-data.${buildTimestamp}.json`; + const dataUrl = normalizeUrl([context.siteConfig.baseUrl, dataPath]); + globalData.copyContentDataUrl = `${dataUrl}?v=${Date.now()}`; } setGlobalData(globalData); @@ -283,18 +286,16 @@ export default function llmsTxtPlugin( ); // Get configuration groups - const processingConfig = getProcessingConfig(config); - const generateConfig = getGenerateConfig(config); + const markdownConfig = getMarkdownConfig(config); - // Process attachments if configured before orchestrating processing + // Process attachments if configured before orchestrating processing. + // Collect all attachments (global + section-specific) with sectionIds let processedAttachments: ProcessedAttachment[] | undefined; - if ( - processingConfig.attachments && - processingConfig.attachments.length > 0 - ) { + const allAttachments = collectAllAttachments(config); + if (allAttachments.length > 0) { const attachmentProcessor = new AttachmentProcessor(log); processedAttachments = await attachmentProcessor.processAttachments( - processingConfig.attachments, + allAttachments, siteDir, outDir ); @@ -311,8 +312,8 @@ export default function llmsTxtPlugin( siteConfig, outDir, logger: log, - contentSelectors: processingConfig.contentSelectors, - relativePaths: generateConfig.relativePaths, + contentSelectors: markdownConfig.contentSelectors, + relativePaths: markdownConfig.relativePaths, }, enhancedCachedRoutes, processedAttachments // Pass attachments for integration @@ -335,7 +336,8 @@ export default function llmsTxtPlugin( await generateCopyContentJson( [...updatedCache.routes], copyDataPath, - log + log, + config ); } diff --git a/packages/docusaurus-plugin-llms-txt/src/orchestrator.ts b/packages/docusaurus-plugin-llms-txt/src/orchestrator.ts index b255bb6..d8e6aac 100644 --- a/packages/docusaurus-plugin-llms-txt/src/orchestrator.ts +++ b/packages/docusaurus-plugin-llms-txt/src/orchestrator.ts @@ -106,7 +106,8 @@ export async function orchestrateProcessing( siteConfig, directories, logger, - processedAttachments // Pass attachments for integration into tree + processedAttachments, // Pass attachments for integration into tree + finalCache // Pass cache for indexing filtering ); return { diff --git a/packages/docusaurus-plugin-llms-txt/src/organization/tree-builder.ts b/packages/docusaurus-plugin-llms-txt/src/organization/tree-builder.ts index bb6342d..8f34d9d 100644 --- a/packages/docusaurus-plugin-llms-txt/src/organization/tree-builder.ts +++ b/packages/docusaurus-plugin-llms-txt/src/organization/tree-builder.ts @@ -5,11 +5,11 @@ * LICENSE file in the root directory of this source tree. */ -import { getEffectiveConfigForRoute, getStructureConfig } from '../config'; +import { getEffectiveConfigForRoute, getLlmsTxtConfig } from '../config'; import { findQualityIssues } from '../config/section-validator'; import { TREE_ROOT_NAME, INDEX_IDENTIFIER } from '../constants'; import { handleSectionError } from '../errors/section-errors'; -import { ensureLeadingSlash } from '../utils'; +import { ensureLeadingSlash, sortByPosition } from '../utils'; import type { DocInfo, @@ -28,13 +28,16 @@ export function buildDocumentTree( logger?: Logger ): TreeNode { // Get configuration groups - const structureConfig = getStructureConfig(globalConfig); + const llmsTxtConfig = getLlmsTxtConfig(globalConfig); // 1. Create section definitions map for quick lookup - const sectionsMap = buildSectionsMap(structureConfig.sections); + const sectionsMap = buildSectionsMap(llmsTxtConfig.sections); - // 2. Group documents by section (auto-assign if not specified) + // 2. Group documents by section + // For manual sections: group by section ID + // For auto-generated sections: group by path segment at autoSectionDepth const sectionGroups = new Map(); + const autoSectionDepth = llmsTxtConfig.autoSectionDepth ?? 1; for (const doc of docs) { const route = doc.routePath.replace(/\.md$/, ''); @@ -51,28 +54,49 @@ export function buildDocumentTree( } const effectiveConfig = getEffectiveConfigForRoute(routePath, globalConfig); - const sectionId = effectiveConfig.section; + const baseSectionId = effectiveConfig.section; + const isManualSection = sectionsMap.has(baseSectionId); - if (!sectionGroups.has(sectionId)) { - sectionGroups.set(sectionId, []); + let finalSectionId: string; + + if (isManualSection) { + // Manual sections: use the section ID as-is + finalSectionId = baseSectionId; + } else { + // Auto-generated sections: determine section based on autoSectionDepth + // If route is shallower, fall back to the actual depth + const effectiveDepth = Math.min(autoSectionDepth, segments.length); + const segmentIndex = effectiveDepth - 1; + finalSectionId = segments[segmentIndex]!; + } + + if (!sectionGroups.has(finalSectionId)) { + sectionGroups.set(finalSectionId, []); } - sectionGroups.get(sectionId)!.push(doc); + sectionGroups.get(finalSectionId)!.push(doc); } - // 3. Create processed sections with hierarchy + // 3. Create processed sections const processedSections = new Map(); for (const [sectionId, sectionDocs] of sectionGroups) { + const isManualSection = sectionsMap.has(sectionId); const sectionDef = - sectionsMap.get(sectionId) || createAutoSection(sectionId); - const processedSection = createProcessedSection(sectionDef, sectionDocs); + sectionsMap.get(sectionId) || + createAutoSection(sectionId, llmsTxtConfig.autoSectionPosition); + const processedSection = createProcessedSection( + sectionDef, + sectionDocs, + !isManualSection, // isAutoGenerated + autoSectionDepth + ); processedSections.set(sectionId, processedSection); } // 4. Handle quality issues (empty sections, etc.) if (logger) { const qualityIssues = findQualityIssues( - structureConfig.sections, + llmsTxtConfig.sections, processedSections, globalConfig.onSectionError || 'warn' ); @@ -123,6 +147,8 @@ interface ProcessedSection { position?: number; docs: DocInfo[]; parentId?: string; + isAutoGenerated: boolean; + autoSectionDepth: number; } /** @@ -153,7 +179,10 @@ function buildSectionsMap( /** * Create auto-generated section definition */ -function createAutoSection(sectionId: string): SectionDefinition { +function createAutoSection( + sectionId: string, + position?: number +): SectionDefinition { // Auto-create section with ID as name (title-cased) const name = sectionId .split('-') @@ -163,6 +192,7 @@ function createAutoSection(sectionId: string): SectionDefinition { return { id: sectionId, name, + ...(position !== undefined && { position }), }; } @@ -171,15 +201,19 @@ function createAutoSection(sectionId: string): SectionDefinition { */ function createProcessedSection( sectionDef: SectionDefinition & { parentId?: string }, - docs: DocInfo[] + docs: DocInfo[], + isAutoGenerated: boolean, + autoSectionDepth: number ): ProcessedSection { return { id: sectionDef.id, name: sectionDef.name, description: sectionDef.description, position: sectionDef.position, - docs: sortDocsByTitle(docs), + docs: sortDocsByPath(docs), parentId: sectionDef.parentId, + isAutoGenerated, + autoSectionDepth, }; } @@ -215,13 +249,13 @@ function buildHierarchicalTree( globalConfig: PluginOptions ): TreeNode { // Get structure configuration - const structureConfig = getStructureConfig(globalConfig); + const llmsTxtConfig = getLlmsTxtConfig(globalConfig); // Create root node const root: TreeNode = { id: 'root', - name: structureConfig.siteTitle || TREE_ROOT_NAME, - description: structureConfig.siteDescription || '', + name: llmsTxtConfig.siteTitle || TREE_ROOT_NAME, + description: llmsTxtConfig.siteDescription || '', relPath: '', docs: [], subCategories: [], @@ -243,62 +277,69 @@ function buildHierarchicalTree( } // Build tree nodes - function createTreeNode(section: ProcessedSection): TreeNode { - const subsections = subsectionsByParent.get(section.id) || []; + function createTreeNode( + section: ProcessedSection, + parentPath: string = '' + ): TreeNode { + const manualSubsections = subsectionsByParent.get(section.id) || []; + + // Build relPath to indicate hierarchy depth + // For top-level sections: just the section ID + // For subsections: parent/section-id + const relPath = parentPath ? `${parentPath}/${section.id}` : section.id; + + // Auto-generated sections are already at the correct level + // (created from path segments at autoSectionDepth) + // Manual sections may have subsections defined via parentId + const subCategories = sortByPosition( + manualSubsections.map((sub) => createTreeNode(sub, relPath)) + ); return { id: section.id, name: section.name, description: section.description, - relPath: section.id, + position: section.position, + relPath, docs: section.docs, - subCategories: sortByPosition(subsections.map(createTreeNode)), + subCategories, }; } // Add top-level sections to root const mutableRoot = root as TreeNode & { subCategories: TreeNode[] }; mutableRoot.subCategories = sortByPosition( - topLevelSections.map(createTreeNode) + topLevelSections.map((section) => createTreeNode(section)) ); return root; } /** - * Sort items by position + alphabetical (Docusaurus-style) + * Sort documents by path hierarchy for better organization + * Groups by path prefix, then by depth within prefix, then lexicographic */ -function sortByPosition( - items: T[] -): T[] { - return items.sort((a, b) => { - // 1. Items with position come before items without position - const aHasPosition = a.position !== undefined; - const bHasPosition = b.position !== undefined; - - if (aHasPosition && !bHasPosition) { - return -1; - } - if (!aHasPosition && bHasPosition) { - return 1; - } - - // 2. If both have positions, sort by position numerically - if (aHasPosition && bHasPosition) { - const positionDiff = a.position! - b.position!; - if (positionDiff !== 0) { - return positionDiff; +function sortDocsByPath(docs: DocInfo[]): DocInfo[] { + return docs.sort((a, b) => { + // Get normalized paths without .md extension + const pathA = a.routePath.replace(/\.md$/, '').split('/').filter(Boolean); + const pathB = b.routePath.replace(/\.md$/, '').split('/').filter(Boolean); + + // Compare path segments from left to right + const minLength = Math.min(pathA.length, pathB.length); + + for (let i = 0; i < minLength; i += 1) { + const segmentA = pathA[i]; + const segmentB = pathB[i]; + if (segmentA && segmentB) { + const comparison = segmentA.localeCompare(segmentB); + if (comparison !== 0) { + return comparison; + } } } - // 3. If same position (or both undefined), sort alphabetically - return a.name.localeCompare(b.name); + // If all common segments match, shorter path comes first + return pathA.length - pathB.length; }); } - -/** - * Sort docs alphabetically by title - */ -function sortDocsByTitle(docs: DocInfo[]): DocInfo[] { - return docs.sort((a, b) => a.title.localeCompare(b.title)); -} diff --git a/packages/docusaurus-plugin-llms-txt/src/organization/tree-renderer.ts b/packages/docusaurus-plugin-llms-txt/src/organization/tree-renderer.ts index 96d48e2..dad6388 100644 --- a/packages/docusaurus-plugin-llms-txt/src/organization/tree-renderer.ts +++ b/packages/docusaurus-plugin-llms-txt/src/organization/tree-renderer.ts @@ -7,7 +7,6 @@ import { createSlugger } from '@docusaurus/utils'; -import { DEFAULT_MARKDOWN_HEADER_LEVEL } from '../constants'; import { formatUrl } from '../utils/url'; import type { TreeNode, DocInfo } from '../types'; @@ -27,23 +26,35 @@ function areSimilarTitles(a: string, b: string): boolean { */ export function renderTreeAsMarkdown( node: TreeNode, - level: number = DEFAULT_MARKDOWN_HEADER_LEVEL, + autoSectionDepth: number = 1, isRoot: boolean = false, baseUrl: string = '', useRelativePaths: boolean = true, - enableMarkdownFiles: boolean = true, + enableFiles: boolean = true, enableDescriptions: boolean = true ): string { let md = ''; + // Calculate heading level based on tree hierarchy depth + // Top-level sections (direct children of root) are H2 + // Their subsections (if defined) are H3, H4, etc. + const calculateHeadingLevel = (relPath: string): number => { + // Count path segments to determine tree hierarchy depth + // Single segment = top-level section (H2) + // Two segments = subsection (H3), etc. + const segments = relPath.split('/').filter(Boolean); + const hierarchyDepth = segments.length; + // Top-level sections get H2 (1 + 1), subsections get H3 (2 + 1), etc. + return Math.min(hierarchyDepth + 1, 6); + }; + // Handle section heading and description if (!isRoot && node.name) { const shouldHeader = !node.indexDoc || !areSimilarTitles(node.name, node.indexDoc.title); if (shouldHeader) { - // Cap at H6 to respect markdown heading limits - const cappedLevel = Math.min(level, 6); - md += `${'#'.repeat(cappedLevel)} ${node.name}\n\n`; + const headingLevel = calculateHeadingLevel(node.relPath); + md += `${'#'.repeat(headingLevel)} ${node.name}\n\n`; // Prefer section description over index doc description if (enableDescriptions && node.description) { @@ -59,7 +70,7 @@ export function renderTreeAsMarkdown( if (node.indexDoc && !isRoot) { const formatOptions: Parameters[1] = { relativePaths: useRelativePaths, - enableMarkdownFiles, + enableFiles, }; if (node.indexDoc.markdownFile) { @@ -83,7 +94,7 @@ export function renderTreeAsMarkdown( node.docs.forEach((d: DocInfo) => { const formatOptions: Parameters[1] = { relativePaths: useRelativePaths, - enableMarkdownFiles, + enableFiles, }; if (d.markdownFile) { @@ -100,15 +111,13 @@ export function renderTreeAsMarkdown( // Process subcategories (already ordered by tree builder) if (node.subCategories.length) { node.subCategories.forEach((sub: TreeNode) => { - // Cap at H6 to respect markdown heading limits - const nextLevel = Math.min(isRoot ? level : level + 1, 6); md += `\n${renderTreeAsMarkdown( sub, - nextLevel, + autoSectionDepth, false, baseUrl, useRelativePaths, - enableMarkdownFiles, + enableFiles, enableDescriptions )}`; }); diff --git a/packages/docusaurus-plugin-llms-txt/src/plugin-llms-txt.d.ts b/packages/docusaurus-plugin-llms-txt/src/plugin-llms-txt.d.ts index 7c60c87..ab0f109 100644 --- a/packages/docusaurus-plugin-llms-txt/src/plugin-llms-txt.d.ts +++ b/packages/docusaurus-plugin-llms-txt/src/plugin-llms-txt.d.ts @@ -25,12 +25,22 @@ declare module '@theme/CopyPageContent' { import type { ReactNode } from 'react'; export interface Props { - readonly className?: string; + readonly isMobile?: boolean; } export default function CopyPageContent(props: Props): ReactNode; } +// DocBreadcrumbs - wrapper that adds CopyPageContent button next to breadcrumbs +// This is a wrapped component that extends the default Docusaurus breadcrumbs +declare module '@theme/DocBreadcrumbs' { + import type { ReactNode } from 'react'; + + export type Props = Record; + + export default function DocBreadcrumbs(props: Props): ReactNode; +} + // CopyButton subcomponent declare module '@theme/CopyPageContent/CopyButton' { import type { ReactNode } from 'react'; @@ -54,6 +64,8 @@ declare module '@theme/CopyPageContent/DropdownMenu' { readonly isOpen: boolean; readonly finalConfig: any; readonly onAction: (action: string) => void; + readonly isMobile?: boolean; + readonly hasMarkdown?: boolean; } export default function DropdownMenu(props: Props): ReactNode; @@ -76,6 +88,8 @@ declare module '@theme/CopyPageContent/DropdownMenu/MenuItem' { declare module '@theme/CopyPageContent/Icons' { export { default as ChevronIcon } from '@theme/CopyPageContent/Icons/ChevronIcon'; export { default as MarkdownIcon } from '@theme/CopyPageContent/Icons/MarkdownIcon'; + export { default as HtmlIcon } from '@theme/CopyPageContent/Icons/HtmlIcon'; + export { default as CopyIcon } from '@theme/CopyPageContent/Icons/CopyIcon'; export { default as ChatGPTIcon } from '@theme/CopyPageContent/Icons/ChatGPTIcon'; export { default as ClaudeIcon } from '@theme/CopyPageContent/Icons/ClaudeIcon'; } @@ -93,23 +107,22 @@ declare module '@theme/CopyPageContent/Icons/MarkdownIcon' { export default function MarkdownIcon(): ReactNode; } -declare module '@theme/CopyPageContent/Icons/ChatGPTIcon' { +declare module '@theme/CopyPageContent/Icons/HtmlIcon' { import type { ReactNode } from 'react'; - export default function ChatGPTIcon(): ReactNode; + export default function HtmlIcon(): ReactNode; } -declare module '@theme/CopyPageContent/Icons/ClaudeIcon' { +declare module '@theme/CopyPageContent/Icons/CopyIcon' { import type { ReactNode } from 'react'; - export default function ClaudeIcon(): ReactNode; + export default function CopyIcon(): ReactNode; } -// DocItem Content component (existing) -declare module '@theme/DocItem/Content' { +declare module '@theme/CopyPageContent/Icons/ChatGPTIcon' { import type { ReactNode } from 'react'; - import type { WrapperProps } from '@docusaurus/types'; - import type ContentType from '@theme-init/DocItem/Content'; - - export type Props = WrapperProps; + export default function ChatGPTIcon(): ReactNode; +} - export default function DocItemContent(props: Props): ReactNode; +declare module '@theme/CopyPageContent/Icons/ClaudeIcon' { + import type { ReactNode } from 'react'; + export default function ClaudeIcon(): ReactNode; } diff --git a/packages/docusaurus-plugin-llms-txt/src/processing/attachment-processor.ts b/packages/docusaurus-plugin-llms-txt/src/processing/attachment-processor.ts index 0fdc5bf..73cb7d3 100644 --- a/packages/docusaurus-plugin-llms-txt/src/processing/attachment-processor.ts +++ b/packages/docusaurus-plugin-llms-txt/src/processing/attachment-processor.ts @@ -9,8 +9,6 @@ import * as path from 'path'; import * as fs from 'fs-extra'; -import { generateSectionId } from '../utils'; - import type { Logger } from '../types'; import type { AttachmentFile } from '../types/public'; @@ -41,7 +39,7 @@ export class AttachmentProcessor { * Process attachment files and copy them to the output directory */ async processAttachments( - attachments: readonly AttachmentFile[], + attachments: readonly (AttachmentFile & { sectionId: string })[], siteDir: string, outDir: string ): Promise { @@ -56,6 +54,7 @@ export class AttachmentProcessor { await fs.ensureDir(attachmentsDir); const processed: ProcessedAttachment[] = []; + const usedFileNames = new Set(); for (const attachment of attachments) { try { @@ -70,12 +69,38 @@ export class AttachmentProcessor { // Read file content as-is const content = await fs.readFile(sourcePath, 'utf-8'); - // Generate output filename (always .md) - const baseName = path.basename( - attachment.source, - path.extname(attachment.source) - ); - const outputFileName = `${baseName}.md`; + // Determine base filename + let baseName: string; + if (attachment.fileName) { + // Use custom fileName if provided + baseName = attachment.fileName; + } else { + // Extract from source path + baseName = path.basename( + attachment.source, + path.extname(attachment.source) + ); + } + + // Handle filename collisions by auto-numbering + let outputFileName = `${baseName}.md`; + let counter = 2; + while (usedFileNames.has(outputFileName)) { + outputFileName = `${baseName}-${counter}.md`; + counter += 1; + } + + // Warn about auto-numbered files (collision detected) + if (counter > 2) { + this.logger.warn( + `Filename collision detected for "${baseName}.md". ` + + `Using "${outputFileName}" instead. ` + + `Consider setting a custom "fileName" for attachment: ${attachment.title}` + ); + } + + usedFileNames.add(outputFileName); + const outputPath = path.join(attachmentsDir, outputFileName); // Write content to .md file (no processing, just raw content) @@ -85,9 +110,8 @@ export class AttachmentProcessor { `Copied attachment ${attachment.source} to ${outputFileName}` ); - // Use sectionId directly (auto-assign if not provided) - const sectionId = - attachment.sectionId || generateSectionId('attachments'); + // Use sectionId from attachment (provided by collectAllAttachments) + const sectionId = attachment.sectionId; // Create metadata for llms.txt and llms-full.txt processed.push({ diff --git a/packages/docusaurus-plugin-llms-txt/src/processing/processing-coordinator.ts b/packages/docusaurus-plugin-llms-txt/src/processing/processing-coordinator.ts index 32dbed9..8e79f92 100644 --- a/packages/docusaurus-plugin-llms-txt/src/processing/processing-coordinator.ts +++ b/packages/docusaurus-plugin-llms-txt/src/processing/processing-coordinator.ts @@ -6,7 +6,7 @@ */ import { processDocuments } from './route-processor'; -import { filterCachedRoutesForConfig } from '../cache/cache-filter'; +import { filterCachedRoutesForProcessing } from '../cache/cache-filter'; import type { CacheManager } from '../cache/cache'; import type { @@ -59,7 +59,8 @@ export async function coordinateProcessing( if (isCliContext && cache.routes.length > 0) { // CLI context: filter cached routes based on current config - const filteredCachedRoutes = filterCachedRoutesForConfig( + // (union of generate+indexing) + const filteredCachedRoutes = filterCachedRoutesForProcessing( cache.routes, config, logger @@ -81,8 +82,9 @@ export async function coordinateProcessing( logger.warn(`Excluded ${excludedCount} routes by current config`); } } else if (!isCliContext && cache.routes.length > 0) { - // Build context: filter both live routes and cache to ensure consistency - const filteredCachedRoutes = filterCachedRoutesForConfig( + // Build context: filter both live routes and cache to ensure + // consistency (union of generate+indexing) + const filteredCachedRoutes = filterCachedRoutesForProcessing( cache.routes, config, logger @@ -143,6 +145,15 @@ export async function coordinateProcessing( if (processedPaths.has(route.path)) { // Find the updated route from processing const updatedRoute = cachedRoutes.find((r) => r.path === route.path); + // Preserve contentSelectors from the original route if not in + // updated route + if ( + updatedRoute && + !updatedRoute.contentSelectors && + route.contentSelectors + ) { + return { ...updatedRoute, contentSelectors: route.contentSelectors }; + } return updatedRoute ?? route; } return route; diff --git a/packages/docusaurus-plugin-llms-txt/src/processing/route-processor.ts b/packages/docusaurus-plugin-llms-txt/src/processing/route-processor.ts index 08cc3fa..aefb08f 100644 --- a/packages/docusaurus-plugin-llms-txt/src/processing/route-processor.ts +++ b/packages/docusaurus-plugin-llms-txt/src/processing/route-processor.ts @@ -12,7 +12,7 @@ import pMap from 'p-map'; import { CacheManager } from '../cache/cache'; import { validateCliContext } from '../cache/cache-strategy'; import { hashFile } from '../cache/cache-validation'; -import { getEffectiveConfigForRoute, getGenerateConfig } from '../config'; +import { getEffectiveConfigForRoute, getMarkdownConfig } from '../config'; import { ERROR_MESSAGES } from '../constants'; import { getErrorMessage } from '../errors'; import { analyzeProcessingContext } from './processing-context'; @@ -72,7 +72,7 @@ async function processSingleRoute( if (doc) { const hash = await hashFile(fullHtmlPath); - const generateConfig = getGenerateConfig(config); + const generateConfig = getMarkdownConfig(config); // Note: This is a temporary CacheManager just for the update method // We don't have siteConfig here, but it's not needed for @@ -83,7 +83,7 @@ async function processSingleRoute( cachedRoute, doc, hash, - generateConfig.enableMarkdownFiles + generateConfig.enableFiles ); logger.debug(`Processed route: ${route.path}`); @@ -126,8 +126,16 @@ async function processRoutesStream( outDir, siteConfig ); + // Check if existing cached routes have contentSelectors field + // If not, we need to regenerate them to include the new field + const needsRegeneration = + existingCachedRoutes && + existingCachedRoutes.some((route) => !route.contentSelectors); + const cachedRoutes = - existingCachedRoutes ?? cacheManager.createCachedRouteInfo(routes); + existingCachedRoutes && !needsRegeneration + ? existingCachedRoutes + : cacheManager.createCachedRouteInfo(routes); const directories = { docsDir, mdOutDir }; // Create route lookup table for link resolution diff --git a/packages/docusaurus-plugin-llms-txt/src/public/index.ts b/packages/docusaurus-plugin-llms-txt/src/public/index.ts index d20b744..8018b78 100644 --- a/packages/docusaurus-plugin-llms-txt/src/public/index.ts +++ b/packages/docusaurus-plugin-llms-txt/src/public/index.ts @@ -20,10 +20,8 @@ export { default, validateOptions } from '../index'; // Export public types export type { PluginOptions, - GenerateOptions, - IncludeOptions, - StructureOptions, - ProcessingOptions, + MarkdownOptions, + LlmsTxtOptions, UiOptions, SectionDefinition, RouteRule, diff --git a/packages/docusaurus-plugin-llms-txt/src/transformation/html-file-processor.ts b/packages/docusaurus-plugin-llms-txt/src/transformation/html-file-processor.ts index 1742e72..ca212ed 100644 --- a/packages/docusaurus-plugin-llms-txt/src/transformation/html-file-processor.ts +++ b/packages/docusaurus-plugin-llms-txt/src/transformation/html-file-processor.ts @@ -10,9 +10,9 @@ import path from 'path'; import fs from 'fs-extra'; import { - getProcessingConfig, - getGenerateConfig, - getIncludeConfig, + getMarkdownConfig, + getMarkdownIncludeConfig, + getLlmsTxtConfig, } from '../config'; import { ERROR_MESSAGES } from '../constants'; import { @@ -54,9 +54,9 @@ export async function processHtmlFileWithContext( try { const html = await fs.readFile(fullHtmlPath, 'utf8'); - const processingConfig = getProcessingConfig(config); - const generateConfig = getGenerateConfig(config); - const contentSelectors = processingConfig.contentSelectors; + const markdownConfig = getMarkdownConfig(config); + const llmsTxtConfig = getLlmsTxtConfig(config); + const contentSelectors = markdownConfig.contentSelectors; let title: string; let description: string; @@ -64,28 +64,25 @@ export async function processHtmlFileWithContext( // Process content if markdown files are enabled OR if llms-full.txt is // enabled - if ( - generateConfig.enableMarkdownFiles || - generateConfig.enableLlmsFullTxt - ) { + if (markdownConfig.enableFiles || llmsTxtConfig.enableLlmsFullTxt) { // Full processing for individual markdown files const conversionOptions: MarkdownConversionOptions = { - remarkStringify: processingConfig.remarkStringify, - remarkGfm: processingConfig.remarkGfm, - rehypeProcessTables: processingConfig.rehypeProcessTables, + remarkStringify: markdownConfig.remarkStringify, + remarkGfm: markdownConfig.remarkGfm, + rehypeProcessTables: markdownConfig.rehypeProcessTables, rehypeProcessLinks: true, baseUrl: siteUrl, - relativePaths: generateConfig.relativePaths, - enableMarkdownFiles: generateConfig.enableMarkdownFiles, - excludeRoutes: getIncludeConfig(config).excludeRoutes, + relativePaths: markdownConfig.relativePaths, + enableFiles: markdownConfig.enableFiles, + excludeRoutes: getMarkdownIncludeConfig(config).excludeRoutes, fullConfig: config, logger, routeLookup, // Pass simplified plugin arrays to the conversion pipeline - beforeDefaultRehypePlugins: processingConfig.beforeDefaultRehypePlugins, - rehypePlugins: processingConfig.rehypePlugins, - beforeDefaultRemarkPlugins: processingConfig.beforeDefaultRemarkPlugins, - remarkPlugins: processingConfig.remarkPlugins, + beforeDefaultRehypePlugins: markdownConfig.beforeDefaultRehypePlugins, + rehypePlugins: markdownConfig.rehypePlugins, + beforeDefaultRemarkPlugins: markdownConfig.beforeDefaultRemarkPlugins, + remarkPlugins: markdownConfig.remarkPlugins, }; const result = convertHtmlToMarkdown( @@ -107,8 +104,8 @@ export async function processHtmlFileWithContext( ); } - // Save markdown files if enableMarkdownFiles is true - if (generateConfig.enableMarkdownFiles) { + // Save markdown files if enableFiles is true + if (markdownConfig.enableFiles) { logger.debug(`Saving markdown: ${routePath}`); const mdPath = htmlPathToMdPath(relHtmlPath, mdOutDir); await saveMarkdownFile(mdPath, markdown); @@ -124,7 +121,7 @@ export async function processHtmlFileWithContext( markdownFile: relativeMdPath, }; } else { - // enableLlmsFullTxt is true but enableMarkdownFiles is false + // enableLlmsFullTxt is true but enableFiles is false // Return content in memory for llms-full.txt generation return { routePath, diff --git a/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/plugin-registry.ts b/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/plugin-registry.ts index 55e59d2..960062d 100644 --- a/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/plugin-registry.ts +++ b/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/plugin-registry.ts @@ -87,7 +87,7 @@ export class PluginRegistry { const linkOptions: RehypeLinksOptions = { baseUrl: options.baseUrl ?? '', relativePaths: options.relativePaths !== false, - enableMarkdownFiles: options.enableMarkdownFiles !== false, + enableFiles: options.enableFiles !== false, excludeRoutes: options.excludeRoutes ?? [], fullConfig: options.fullConfig, routeLookup: options.routeLookup, diff --git a/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/rehype-links.ts b/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/rehype-links.ts index e569e43..ecc6e6a 100644 --- a/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/rehype-links.ts +++ b/packages/docusaurus-plugin-llms-txt/src/transformation/plugins/rehype-links.ts @@ -113,10 +113,10 @@ function isExcludedLink(href: string, options: RehypeLinksOptions): boolean { * Check if link transformation should be skipped entirely */ function shouldSkipLinkTransformation(options: RehypeLinksOptions): boolean { - const { enableMarkdownFiles = true, relativePaths = true } = options; + const { enableFiles = true, relativePaths = true } = options; // If relative paths are enabled and markdown files are disabled, // skip processing as links don't need transformation - return relativePaths && !enableMarkdownFiles; + return relativePaths && !enableFiles; } /** @@ -134,7 +134,7 @@ function getExcludedLinkOptions( if (!relativePaths) { return { ...options, - enableMarkdownFiles: false, // Force no .md extension for excluded links + enableFiles: false, // Force no .md extension for excluded links }; } @@ -149,11 +149,7 @@ function transformInternalLink( href: string, options: RehypeLinksOptions ): string { - const { - enableMarkdownFiles = true, - relativePaths = true, - baseUrl = '', - } = options; + const { enableFiles = true, relativePaths = true, baseUrl = '' } = options; // Parse the URL to handle query params and hash fragments properly const parsed = parseLocalURLPath(href); @@ -171,11 +167,10 @@ function transformInternalLink( // Ensure it starts with / for absolute path from site root pathname = ensureLeadingSlash(pathname); - // Remove any existing file extensions and trailing slashes for consistent - // processing + // Remove any existing file extensions for consistent processing pathname = pathname.replace(HTML_OR_MD_EXTENSION_REGEX, ''); - // Remove trailing slashes (except for root path) + // Remove trailing slashes before route resolution (except for root path) if (pathname !== '/' && pathname.endsWith('/')) { pathname = pathname.slice(0, -1); } @@ -184,9 +179,10 @@ function transformInternalLink( const resolvedPathname = resolvePathname(pathname, options.routeLookup); // Use our URL formatting utility for the pathname + // formatUrl will handle trailing slashes when adding .md extension const transformedPathname = formatUrl( resolvedPathname, - { relativePaths, enableMarkdownFiles }, + { relativePaths, enableFiles }, baseUrl ); @@ -237,10 +233,10 @@ function processAnchorElement( * Rehype plugin that transforms internal links based on plugin configuration. * * This plugin automatically determines when to run: - * - If relativePaths=true AND enableMarkdownFiles=false → plugin disabled + * - If relativePaths=true AND enableFiles=false → plugin disabled * - If relativePaths=false → prepend baseUrl to internal links - * - If enableMarkdownFiles=true → append .md to internal links - * - If relativePaths=false AND enableMarkdownFiles=true → do both + * - If enableFiles=true → append .md to internal links + * - If relativePaths=false AND enableFiles=true → do both * * Special handling for excluded links: * - If relativePaths=false → excluded links get baseUrl but NO .md extension diff --git a/packages/docusaurus-plugin-llms-txt/src/types/core.ts b/packages/docusaurus-plugin-llms-txt/src/types/core.ts index f603502..bcf4c83 100644 --- a/packages/docusaurus-plugin-llms-txt/src/types/core.ts +++ b/packages/docusaurus-plugin-llms-txt/src/types/core.ts @@ -31,6 +31,7 @@ export interface CachedRouteInfo { readonly contentType: ContentType; readonly isVersioned?: boolean; // true for non-latest versions (isLast=false), false for latest (isLast=true) readonly isGeneratedIndex?: boolean; + readonly contentSelectors: readonly string[]; // Content selectors for HTML extraction (always present) } /** @@ -79,6 +80,7 @@ export interface TreeNode { readonly subCategories: readonly TreeNode[]; readonly title?: string; readonly description?: string; + readonly position?: number; readonly indexDoc?: DocInfo; } @@ -133,7 +135,7 @@ export interface ProcessingResult { export interface RehypeLinksOptions { readonly baseUrl?: string; readonly relativePaths?: boolean; - readonly enableMarkdownFiles?: boolean; + readonly enableFiles?: boolean; readonly excludeRoutes?: readonly string[]; readonly fullConfig?: PluginOptions; readonly routeLookup?: Map; @@ -146,7 +148,7 @@ export interface MarkdownConversionOptions { readonly contentSelectors?: readonly string[]; readonly relativePaths?: boolean; readonly baseUrl?: string; - readonly enableMarkdownFiles?: boolean; + readonly enableFiles?: boolean; readonly excludeRoutes?: readonly string[]; readonly fullConfig?: PluginOptions; readonly rehypeProcessTables?: boolean; diff --git a/packages/docusaurus-plugin-llms-txt/src/types/index.ts b/packages/docusaurus-plugin-llms-txt/src/types/index.ts index 59a882a..afab0fe 100644 --- a/packages/docusaurus-plugin-llms-txt/src/types/index.ts +++ b/packages/docusaurus-plugin-llms-txt/src/types/index.ts @@ -11,15 +11,14 @@ export type { PluginOptions, - GenerateOptions, - IncludeOptions, - StructureOptions, - ProcessingOptions, + MarkdownOptions, + LlmsTxtOptions, UiOptions, RouteRule, SectionRoute, OptionalLink, SectionDefinition, + AttachmentFile, Logger, PluginError, PluginConfigError, diff --git a/packages/docusaurus-plugin-llms-txt/src/types/public.ts b/packages/docusaurus-plugin-llms-txt/src/types/public.ts index ccf1699..32fa4e2 100644 --- a/packages/docusaurus-plugin-llms-txt/src/types/public.ts +++ b/packages/docusaurus-plugin-llms-txt/src/types/public.ts @@ -7,7 +7,11 @@ import { Joi } from '@docusaurus/utils-validation'; -import { DEFAULT_CONTENT_SELECTORS, PLUGIN_NAME } from '../constants'; +import { + DEFAULT_CONTENT_SELECTORS, + DEFAULT_EXCLUDE_ROUTES, + PLUGIN_NAME, +} from '../constants'; import type { ReportingSeverity } from '@docusaurus/types'; import type { Options as RemarkGfmOptions } from 'remark-gfm'; @@ -44,6 +48,10 @@ export interface SectionDefinition { readonly routes?: readonly SectionRoute[]; /** User-defined nested subsections (IDs must be globally unique) */ readonly subsections?: readonly SectionDefinition[]; + /** Attachments specific to this section */ + readonly attachments?: readonly AttachmentFile[]; + /** Optional links specific to this section */ + readonly optionalLinks?: readonly OptionalLink[]; } /** @@ -79,61 +87,40 @@ export type PluginInput = | [Plugin, unknown?, Settings?]; /** - * Output generation configuration + * Output generation configuration - controls what .md files are created + */ +/** + * Markdown file generation configuration + * Controls which routes get .md files generated (used by copy button feature) */ -export interface GenerateOptions { +export interface MarkdownOptions { /** Whether to generate individual markdown files (default: true) */ - readonly enableMarkdownFiles?: boolean; - /** Generate llms-full.txt with complete content (default: false) */ - readonly enableLlmsFullTxt?: boolean; + readonly enableFiles?: boolean; /** Whether to use relative paths in links (default: true) */ readonly relativePaths?: boolean; -} -/** - * Content inclusion/filtering configuration - */ -export interface IncludeOptions { - /** Include blog posts (default: false) */ - readonly includeBlog?: boolean; - /** Include pages (default: false) */ - readonly includePages?: boolean; - /** Include docs (default: true) */ + // What routes get .md files generated + /** Include docs in markdown generation (default: true) */ readonly includeDocs?: boolean; - /** Include versioned docs in addition to current version (default: true) */ + /** Include versioned docs in markdown generation (default: true) */ readonly includeVersionedDocs?: boolean; - /** Include generated category index pages (default: true) */ + /** Include blog posts in markdown generation (default: false) */ + readonly includeBlog?: boolean; + /** Include standalone pages in markdown generation (default: false) */ + readonly includePages?: boolean; + /** + * Include generated category index pages in markdown generation + * (default: true) + */ readonly includeGeneratedIndex?: boolean; - /** Glob patterns to exclude from processing */ + /** Exclude specific routes from markdown generation */ readonly excludeRoutes?: readonly string[]; -} - -/** - * Content structure and organization configuration - */ -export interface StructureOptions { - /** Section definitions for organizing content */ - readonly sections?: readonly SectionDefinition[]; - /** Site title for llms.txt header */ - readonly siteTitle?: string; - /** Site description for llms.txt header */ - readonly siteDescription?: string; - /** Whether to include descriptions in llms.txt links (default: true) */ - readonly enableDescriptions?: boolean; - /** Additional links to include in llms.txt */ - readonly optionalLinks?: readonly OptionalLink[]; -} -/** - * Content processing and transformation configuration - */ -export interface ProcessingOptions { + // Content extraction and processing /** CSS selectors for content extraction */ readonly contentSelectors?: readonly string[]; /** Route-specific processing rules */ readonly routeRules?: readonly RouteRule[]; - /** Raw files to attach and include in llms.txt/llms-full.txt */ - readonly attachments?: readonly AttachmentFile[]; // Markdown processing options /** Remark stringify options for markdown generation */ @@ -154,6 +141,58 @@ export interface ProcessingOptions { readonly remarkPlugins?: readonly PluginInput[]; } +/** + * llms.txt index file configuration + * Controls what content appears in the main /llms.txt file + */ +export interface LlmsTxtOptions { + /** Generate llms-full.txt with complete content (default: false) */ + readonly enableLlmsFullTxt?: boolean; + + // Content filtering - what goes in llms.txt + /** Include docs in main llms.txt (default: true) */ + readonly includeDocs?: boolean; + /** Include versioned docs in main llms.txt (default: false) */ + readonly includeVersionedDocs?: boolean; + /** Include blog posts in main llms.txt (default: false) */ + readonly includeBlog?: boolean; + /** Include standalone pages in main llms.txt (default: false) */ + readonly includePages?: boolean; + /** Include generated category index pages in main llms.txt (default: true) */ + readonly includeGeneratedIndex?: boolean; + /** Exclude specific routes from main llms.txt */ + readonly excludeRoutes?: readonly string[]; + + // Structure and organization + /** Section definitions for organizing content */ + readonly sections?: readonly SectionDefinition[]; + /** Site title for llms.txt header */ + readonly siteTitle?: string; + /** Site description for llms.txt header */ + readonly siteDescription?: string; + /** Whether to include descriptions in llms.txt links (default: true) */ + readonly enableDescriptions?: boolean; + /** + * Automatically determine section heading depth based on route depth + * (default: 1) + * - 1: depth-1 routes get H2, depth-2 routes get H3, etc. + * - 2: depth-1 routes get H3, depth-2 routes get H4, etc. + */ + readonly autoSectionDepth?: 1 | 2 | 3 | 4 | 5 | 6; + /** + * Position value assigned to auto-generated sections + * (default: undefined) + * - undefined: auto-sections appear after positioned sections + * - number: auto-sections sort with other positioned sections + */ + readonly autoSectionPosition?: number; + /** Additional links to include in llms.txt */ + readonly optionalLinks?: readonly OptionalLink[]; + + // Additional content + /** Raw files to attach and include in llms.txt/llms-full.txt */ + readonly attachments?: readonly AttachmentFile[]; +} /** * User interface features configuration */ @@ -168,10 +207,24 @@ export interface UiOptions { export interface CopyPageContentOptions { /** Custom button label (default: 'Copy Page') */ readonly buttonLabel?: string; - /** Available copy and share actions */ + /** Control where the copy button is displayed */ + readonly display?: { + /** Show on docs pages (default: true) */ + readonly docs?: boolean; + /** Exclude specific routes by path pattern */ + readonly excludeRoutes?: readonly string[]; + }; + /** + * Strategy for what content to copy (default: 'prefer-markdown') + */ + readonly contentStrategy?: 'prefer-markdown' | 'html-only'; + /** Available actions in the dropdown menu */ readonly actions?: { - /** Enable markdown copy option (default: true) */ - readonly markdown?: boolean; + /** + * Enable view markdown option (default: true) - only shows when + * markdown is available + */ + readonly viewMarkdown?: boolean; /** AI integration options */ readonly ai?: { /** ChatGPT integration - true enables with default prompt */ @@ -200,10 +253,15 @@ export interface AttachmentFile { readonly title: string; /** Optional description */ readonly description?: string; - /** Section ID to place the attachment under */ - readonly sectionId?: string; - /** Whether to include this attachment's content in llms-full.txt - * (default: true) */ + /** + * Custom output filename (without extension) + * If not provided, uses source filename. Auto-numbered if collision. + */ + readonly fileName?: string; + /** + * Whether to include this attachment's content in llms-full.txt + * (default: true) + */ readonly includeInFullTxt?: boolean; } @@ -225,14 +283,10 @@ export interface PluginOptions { readonly runOnPostBuild?: boolean; // Grouped configuration options - /** Output generation configuration */ - readonly generate?: GenerateOptions; - /** Content inclusion/filtering configuration */ - readonly include?: IncludeOptions; - /** Content structure and organization configuration */ - readonly structure?: StructureOptions; - /** Content processing and transformation configuration */ - readonly processing?: ProcessingOptions; + /** Markdown file generation - controls .md file creation and processing */ + readonly markdown?: MarkdownOptions; + /** llms.txt index file - controls what goes in main /llms.txt */ + readonly llmsTxt?: LlmsTxtOptions; /** User interface features configuration */ readonly ui?: UiOptions; } @@ -300,25 +354,56 @@ export const pluginOptionsSchema = Joi.object({ logLevel: Joi.number().integer().min(0).max(3).default(1), runOnPostBuild: Joi.boolean().default(true), - // Output generation configuration - generate: Joi.object({ - enableMarkdownFiles: Joi.boolean().default(true), - enableLlmsFullTxt: Joi.boolean().default(false), + // Markdown file generation - controls .md file creation and processing + markdown: Joi.object({ + enableFiles: Joi.boolean().default(true), relativePaths: Joi.boolean().default(true), - }).default({}), - - // Content inclusion/filtering configuration - include: Joi.object({ - includeBlog: Joi.boolean().default(false), - includePages: Joi.boolean().default(false), includeDocs: Joi.boolean().default(true), includeVersionedDocs: Joi.boolean().default(true), + includeBlog: Joi.boolean().default(false), + includePages: Joi.boolean().default(false), includeGeneratedIndex: Joi.boolean().default(true), - excludeRoutes: Joi.array().items(Joi.string()).default([]), + excludeRoutes: Joi.array() + .items(Joi.string()) + .default([...DEFAULT_EXCLUDE_ROUTES]), + // Content extraction and processing + contentSelectors: Joi.array() + .items(Joi.string()) + .min(1) + .default([...DEFAULT_CONTENT_SELECTORS]), + routeRules: Joi.array() + .items( + Joi.object({ + route: Joi.string().required(), + contentSelectors: Joi.array().items(Joi.string()), + }) + ) + .default([]), + // Markdown processing options + remarkStringify: Joi.object().unknown(true).default({}), + remarkGfm: Joi.alternatives() + .try(Joi.boolean(), Joi.object().unknown(true)) + .default(true), + rehypeProcessTables: Joi.boolean().default(true), + // Unified plugin system + beforeDefaultRehypePlugins: Joi.array().items(Joi.any()).default([]), + rehypePlugins: Joi.array().items(Joi.any()).default([]), + beforeDefaultRemarkPlugins: Joi.array().items(Joi.any()).default([]), + remarkPlugins: Joi.array().items(Joi.any()).default([]), }).default({}), - // Content structure and organization configuration - structure: Joi.object({ + // llms.txt index file - controls what goes in main /llms.txt + llmsTxt: Joi.object({ + enableLlmsFullTxt: Joi.boolean().default(false), + includeDocs: Joi.boolean().default(true), + includeVersionedDocs: Joi.boolean().default(false), + includeBlog: Joi.boolean().default(false), + includePages: Joi.boolean().default(false), + includeGeneratedIndex: Joi.boolean().default(true), + excludeRoutes: Joi.array() + .items(Joi.string()) + .default([...DEFAULT_EXCLUDE_ROUTES]), + // Structure and organization sections: Joi.array().items( Joi.object({ id: Joi.string() @@ -334,11 +419,35 @@ export const pluginOptionsSchema = Joi.object({ }) ), subsections: Joi.array().items(Joi.link('#sectionDefinition')), + // Section-specific attachments + attachments: Joi.array() + .items( + Joi.object({ + source: Joi.string().required(), + title: Joi.string().required(), + description: Joi.string(), + fileName: Joi.string(), + includeInFullTxt: Joi.boolean().default(true), + }) + ) + .default([]), + // Section-specific optional links + optionalLinks: Joi.array() + .items( + Joi.object({ + title: Joi.string().required(), + url: Joi.string().required(), + description: Joi.string(), + }) + ) + .default([]), }).id('sectionDefinition') ), siteTitle: Joi.string().allow(''), siteDescription: Joi.string().allow(''), enableDescriptions: Joi.boolean().default(true), + autoSectionDepth: Joi.number().valid(1, 2, 3, 4, 5, 6).default(1), + autoSectionPosition: Joi.number(), optionalLinks: Joi.array() .items( Joi.object({ @@ -348,74 +457,18 @@ export const pluginOptionsSchema = Joi.object({ }) ) .default([]), - }).default({}), - - // Content processing and transformation configuration - processing: Joi.object({ - contentSelectors: Joi.array() - .items(Joi.string()) - .min(1) - .default([...DEFAULT_CONTENT_SELECTORS]), - routeRules: Joi.array() - .items( - Joi.object({ - route: Joi.string().required(), - contentSelectors: Joi.array().items(Joi.string()), - }) - ) - .default([]), + // Global attachments (will be placed in their own section) attachments: Joi.array() .items( Joi.object({ source: Joi.string().required(), title: Joi.string().required(), description: Joi.string(), - sectionId: Joi.string().pattern(/^[a-z0-9-]+$/), + fileName: Joi.string(), includeInFullTxt: Joi.boolean().default(true), }) ) .default([]), - - // Markdown processing options - remarkStringify: Joi.object().unknown(true).default({}), - remarkGfm: Joi.alternatives() - .try(Joi.boolean(), Joi.object().unknown(true)) - .default(true), - rehypeProcessTables: Joi.boolean().default(true), - - // Unified plugin system (standard unified.js formats) - beforeDefaultRehypePlugins: Joi.array() - .items( - Joi.alternatives().try( - Joi.function(), - Joi.array().items(Joi.function(), Joi.any(), Joi.any()).min(1).max(3) - ) - ) - .default([]), - rehypePlugins: Joi.array() - .items( - Joi.alternatives().try( - Joi.function(), - Joi.array().items(Joi.function(), Joi.any(), Joi.any()).min(1).max(3) - ) - ) - .default([]), - beforeDefaultRemarkPlugins: Joi.array() - .items( - Joi.alternatives().try( - Joi.function(), - Joi.array().items(Joi.function(), Joi.any(), Joi.any()).min(1).max(3) - ) - ) - .default([]), - remarkPlugins: Joi.array() - .items( - Joi.alternatives().try( - Joi.function(), - Joi.array().items(Joi.function(), Joi.any(), Joi.any()).min(1).max(3) - ) - ) - .default([]), }).default({}), // User interface features configuration @@ -425,8 +478,17 @@ export const pluginOptionsSchema = Joi.object({ Joi.boolean(), Joi.object({ buttonLabel: Joi.string().default('Copy Page'), + display: Joi.object({ + docs: Joi.boolean().default(true), + excludeRoutes: Joi.array() + .items(Joi.string()) + .default([...DEFAULT_EXCLUDE_ROUTES]), + }).default({}), + contentStrategy: Joi.string() + .valid('prefer-markdown', 'html-only') + .default('prefer-markdown'), actions: Joi.object({ - markdown: Joi.boolean().default(true), + viewMarkdown: Joi.boolean().default(true), ai: Joi.object({ chatGPT: Joi.alternatives() .try( diff --git a/packages/docusaurus-plugin-llms-txt/src/utils/url.ts b/packages/docusaurus-plugin-llms-txt/src/utils/url.ts index 41482b6..17f7ad9 100644 --- a/packages/docusaurus-plugin-llms-txt/src/utils/url.ts +++ b/packages/docusaurus-plugin-llms-txt/src/utils/url.ts @@ -43,10 +43,19 @@ export function stripBaseUrl(routePath: string, baseUrl: string): string { // This ensures proper leading/trailing slash handling const normalizedBase = normalizeUrl([baseUrl]); + // Extract pathname from baseUrl if it's a full URL + let basePathname = normalizedBase; + try { + const url = new URL(normalizedBase); + basePathname = url.pathname; + } catch { + // Not a full URL, use as-is + } + // Remove trailing slash for comparison - const baseForComparison = normalizedBase.replace(/\/$/, ''); + const baseForComparison = basePathname.replace(/\/$/, ''); - // If route starts with baseUrl, remove it + // If route starts with baseUrl pathname, remove it if (routePath.startsWith(baseForComparison)) { const stripped = routePath.slice(baseForComparison.length); // Ensure the result starts with / or is empty for root @@ -71,33 +80,60 @@ export function stripBaseUrl(routePath: string, baseUrl: string): string { export function formatUrl( routePath: string, options: { - enableMarkdownFiles?: boolean; + enableFiles?: boolean; relativePaths?: boolean; markdownFile?: string; }, baseUrl = '' ): string { - const { - enableMarkdownFiles = true, - relativePaths = true, - markdownFile, - } = options; + const { enableFiles = true, relativePaths = true, markdownFile } = options; // Ensure route path starts with / let targetPath = ensureLeadingSlash(routePath); // Use markdown file path if available and enabled - if (enableMarkdownFiles && markdownFile) { + if (enableFiles && markdownFile) { // Ensure markdown file path starts with / for consistency targetPath = ensureLeadingSlash(markdownFile); - } else if (enableMarkdownFiles) { + } else if (enableFiles) { + // Remove trailing slash before adding .md extension to prevent /.md + const pathForExtension = + targetPath.endsWith('/') && targetPath !== '/' + ? targetPath.slice(0, -1) + : targetPath; // Add .md extension to route path - targetPath = targetPath === '/' ? INDEX_MD : `${targetPath}.md`; + targetPath = pathForExtension === '/' ? INDEX_MD : `${pathForExtension}.md`; } // Handle absolute vs relative paths if (relativePaths === false && baseUrl) { - return normalizeUrl([baseUrl, targetPath]); + // Strip baseUrl if it's already in the path to prevent duplication + const pathWithoutBase = stripBaseUrl(targetPath, baseUrl); + return normalizeUrl([baseUrl, pathWithoutBase]); + } + + // For relative paths with baseUrl, ensure baseUrl path is included + if (relativePaths === true && baseUrl) { + // Extract pathname from baseUrl if it's a full URL + let basePathname = baseUrl; + try { + const url = new URL(baseUrl); + basePathname = url.pathname; + } catch { + // Not a full URL, use as-is + } + + // Normalize baseUrl pathname + basePathname = normalizeUrl([basePathname]); + const baseForComparison = basePathname.replace(/\/$/, ''); + + // If targetPath doesn't start with baseUrl pathname, prepend it + if ( + baseForComparison !== '/' && + !targetPath.startsWith(baseForComparison) + ) { + return normalizeUrl([basePathname, targetPath]); + } } // For relative paths, ensure we preserve the leading slash diff --git a/packages/docusaurus-theme-llms-txt/CHANGELOG.md b/packages/docusaurus-theme-llms-txt/CHANGELOG.md index 75233a2..8c78e92 100644 --- a/packages/docusaurus-theme-llms-txt/CHANGELOG.md +++ b/packages/docusaurus-theme-llms-txt/CHANGELOG.md @@ -1,5 +1,117 @@ # @signalwire/docusaurus-theme-llms-txt +## 1.0.0-alpha.6 + +### Patch Changes + +- Organize links by path now in llms-txt +- 85c2631: Fixed attachments filename bug + +## 1.0.0-alpha.4 + +### Major Changes + +- 75c2b75: Alpha Release + +### Patch Changes + +- Fix `contentStrategy` bug in dropdown menu and copy button: + - Dropdown menu now respects `contentStrategy` setting, showing "Copy Raw HTML" when set to + `'html-only'` instead of incorrectly showing "Copy Raw Markdown" + - Dropdown menu icon changes dynamically (Markdown icon vs HTML icon) based on what will be copied + - "View Markdown" option remains available when markdown file exists, independent of + `contentStrategy` (allows viewing source even when copying HTML) + - Removed unused `hasMarkdown` prop from CopyButton component (only needed in DropdownMenu) + +- ec2e25b: Code cleanup and cache optimization: + - Remove dead code (className prop, normalizePathname export, CopyContentData export) + - Optimize cache implementation (replace over-engineered promise cache with minimal in-memory + cache) + - Fix resize re-fetch bug (component no longer re-fetches data when switching between + mobile/desktop views) + - Reduce code size by 47% in useCopyContentData hook + - Changed the location of the CopyButtonContent component. The theme now swizzles DocItem/Layout + and conditionally puts the Copy button content component after it or below it + +- ef3f363: fix styling issue +- e1246b2: Major architecture improvements for better plugin compatibility: + + **Component Changes:** + - Switched from ejecting `DocItem/Layout` to wrapping `DocBreadcrumbs` + - This prevents conflicts with other plugins that customize the layout + - Uses WRAP pattern instead of EJECT for better compatibility + - Changed internal import from `@theme-original` to `@theme-init` following Docusaurus best + practices for theme enhancers + + **Improvements:** + - Fixed type declarations to accurately reflect component props + - Removed unused `className` prop from `CopyPageContent` + - Fixed `DocBreadcrumbs` type declaration for proper wrapping support + - Added `margin-left: auto` to ensure copy button always aligns right in desktop view + - Fixed package publishing configuration + - Added `src/theme` directory to published files for TypeScript swizzling support + - Updated devDependencies for proper type resolution + - Changed `react-icons` from exact version to version range + + **Documentation:** + - Updated README with correct swizzle examples for `DocBreadcrumbs` + - Added explanation of `@theme-init` vs `@theme-original` usage + - Updated swizzle configuration to reflect new safe wrapping pattern + + **Compatibility:** + - Now compatible with plugins like `docusaurus-plugin-openapi-docs` that also customize layouts + - Follows official Docusaurus theme enhancer pattern (similar to + `@docusaurus/theme-live-codeblock`) + - Users can now safely wrap our enhanced breadcrumbs with `@theme-original/DocBreadcrumbs` + +## 1.0.0-alpha.3 + +### Patch Changes + +- Major architecture improvements for better plugin compatibility: + + **Breaking Changes:** + - Switched from ejecting `DocItem/Layout` to wrapping `DocBreadcrumbs` + - This prevents conflicts with other plugins that customize the layout + - Uses WRAP pattern instead of EJECT for better compatibility + - Changed internal import from `@theme-original` to `@theme-init` following Docusaurus best + practices for theme enhancers + + **Improvements:** + - Fixed type declarations to accurately reflect component props + - Removed unused `className` prop from `CopyPageContent` + - Fixed `DocBreadcrumbs` type declaration for proper wrapping support + - Added `margin-left: auto` to ensure copy button always aligns right in desktop view + - Fixed package publishing configuration + - Added `src/theme` directory to published files for TypeScript swizzling support + - Updated devDependencies for proper type resolution + - Changed `react-icons` from exact version to version range + + **Documentation:** + - Updated README with correct swizzle examples for `DocBreadcrumbs` + - Added explanation of `@theme-init` vs `@theme-original` usage + - Updated swizzle configuration to reflect new safe wrapping pattern + + **Compatibility:** + - Now compatible with plugins like `docusaurus-plugin-openapi-docs` that also customize layouts + - Follows official Docusaurus theme enhancer pattern (similar to + `@docusaurus/theme-live-codeblock`) + - Users can now safely wrap our enhanced breadcrumbs with `@theme-original/DocBreadcrumbs` + +## 1.0.0-alpha.2 + +### Patch Changes + +- Code cleanup and cache optimization: + - Remove dead code (className prop, normalizePathname export, CopyContentData export) + - Optimize cache implementation (replace over-engineered promise cache with minimal in-memory + cache) + - Fix resize re-fetch bug (component no longer re-fetches data when switching between + mobile/desktop views) + - Reduce code size by 47% in useCopyContentData hook + - Changed location of componen to appear after breadcrumbs + - Theme now swizzles doc layout instead of DocItem/Content + ## 1.0.0-alpha.1 ### Patch Changes diff --git a/packages/docusaurus-theme-llms-txt/README.md b/packages/docusaurus-theme-llms-txt/README.md index 42ce1cc..edceffc 100644 --- a/packages/docusaurus-theme-llms-txt/README.md +++ b/packages/docusaurus-theme-llms-txt/README.md @@ -202,49 +202,48 @@ customize components: #### Safe Swizzling (Recommended) ```bash -# Swizzle the main copy button (safe) -npx docusaurus swizzle @signalwire/docusaurus-theme-llms-txt CopyPageButton +# Wrap the breadcrumbs to customize button placement (safe) +npx docusaurus swizzle @signalwire/docusaurus-theme-llms-txt DocBreadcrumbs --wrap -# Wrap the copy button for additional functionality (safe) +# Wrap the copy button for additional functionality (safe - not commonly needed) npx docusaurus swizzle @signalwire/docusaurus-theme-llms-txt CopyPageButton --wrap ``` -#### Content Wrapper Swizzling - -```bash -# Wrap DocItem/Content to modify layout (safe) -npx docusaurus swizzle @signalwire/docusaurus-theme-llms-txt DocItem/Content --wrap - -# Note: Ejecting DocItem/Content is unsafe and may break plugin integration -``` +**Note:** This plugin uses a WRAP pattern on `DocBreadcrumbs` instead of ejecting layout components. +This prevents conflicts with other plugins that might also modify the documentation layout. ### Custom Component Implementation After swizzling, you can customize the components: ```tsx -// src/theme/CopyPageButton/index.tsx +// src/theme/DocBreadcrumbs/index.tsx import React from 'react'; -import OriginalCopyPageButton from '@theme-original/CopyPageButton'; +// Use @theme-original to wrap our enhanced breadcrumbs +import DocBreadcrumbsEnhanced from '@theme-original/DocBreadcrumbs'; -export default function CopyPageButton(props) { +export default function DocBreadcrumbs(props) { return ( -
- - AI Ready +
+ + Custom Enhancement
); } ``` +**Note:** This plugin uses `@theme-init/DocBreadcrumbs` internally to wrap the base Docusaurus +breadcrumbs. As a user, you should use `@theme-original/DocBreadcrumbs` to wrap our enhanced +version. + ### TypeScript Support Full TypeScript definitions are included: ```tsx -import type { Props as CopyPageButtonProps } from '@theme/CopyPageButton'; +import type { Props as DocBreadcrumbsProps } from '@theme/DocBreadcrumbs'; -const CustomButton: React.FC = ({ className }) => { +const CustomBreadcrumbs: React.FC = (props) => { // Your implementation }; ``` diff --git a/packages/docusaurus-theme-llms-txt/package.json b/packages/docusaurus-theme-llms-txt/package.json index adbd868..c44dccb 100644 --- a/packages/docusaurus-theme-llms-txt/package.json +++ b/packages/docusaurus-theme-llms-txt/package.json @@ -1,6 +1,6 @@ { "name": "@signalwire/docusaurus-theme-llms-txt", - "version": "1.0.0-alpha.1", + "version": "1.0.0-alpha.8", "description": "Docusaurus theme components for llms-txt plugin including CopyPageButton", "main": "./lib/index.js", "types": "src/theme-llms-txt.d.ts", @@ -12,6 +12,7 @@ }, "files": [ "lib", + "src/theme", "src/theme-llms-txt.d.ts", "README.md", "CHANGELOG.md" @@ -46,10 +47,13 @@ "@docusaurus/core": "^3.0.0", "@docusaurus/theme-common": "^3.0.0", "clsx": "^2.0.0", - "react-icons": "5.5.0" + "react-icons": "^5.5.0" }, "devDependencies": { + "@docusaurus/module-type-aliases": "^3.0.0", + "@docusaurus/plugin-content-docs": "^3.0.0", "@docusaurus/types": "^3.0.0", + "@signalwire/docusaurus-plugin-llms-txt": "2.0.0-alpha.6", "@types/node": "^22.15.19", "@types/react": "^19.1.13", "@types/react-dom": "^19.1.9", diff --git a/packages/docusaurus-theme-llms-txt/src/getSwizzleConfig.ts b/packages/docusaurus-theme-llms-txt/src/getSwizzleConfig.ts index b2f7ec1..4aabe12 100644 --- a/packages/docusaurus-theme-llms-txt/src/getSwizzleConfig.ts +++ b/packages/docusaurus-theme-llms-txt/src/getSwizzleConfig.ts @@ -9,12 +9,18 @@ import type { SwizzleConfig } from '@docusaurus/types'; /** * Swizzle config for theme components - * Based on Docusaurus patterns from: - * https://github.com/facebook/docusaurus/blob/main/packages/docusaurus-theme-classic/src/getSwizzleConfig.ts */ export default function getSwizzleConfig(): SwizzleConfig { return { components: { + DocBreadcrumbs: { + actions: { + eject: 'unsafe', + wrap: 'safe', + }, + description: + 'Breadcrumbs wrapper that positions the CopyPageContent button next to breadcrumbs. Wrapping is safe and recommended. Ejecting is unsafe as it may conflict with other plugins.', + }, CopyPageContent: { actions: { eject: 'unsafe', @@ -87,14 +93,6 @@ export default function getSwizzleConfig(): SwizzleConfig { description: 'Markdown file icon component. Safe to replace with custom icon.', }, - 'DocItem/Content': { - actions: { - eject: 'safe', - wrap: 'safe', - }, - description: - 'Documentation content wrapper that integrates the copy page button with Docusaurus content. Safe to customize for layout modifications and additional content integration.', - }, }, }; } diff --git a/packages/docusaurus-theme-llms-txt/src/hooks/useCopyActions.ts b/packages/docusaurus-theme-llms-txt/src/hooks/useCopyActions.ts index b90ff74..f76b665 100644 --- a/packages/docusaurus-theme-llms-txt/src/hooks/useCopyActions.ts +++ b/packages/docusaurus-theme-llms-txt/src/hooks/useCopyActions.ts @@ -7,19 +7,48 @@ import { useState } from 'react'; import { useLocation } from '@docusaurus/router'; +import useBaseUrl from '@docusaurus/useBaseUrl'; import { - constructMarkdownUrl, constructFullUrl, + constructMarkdownUrl, type SiteConfig, } from '../utils/copyButton'; import type { ResolvedCopyPageContentOptions } from './useCopyButtonConfig'; +/** + * Extract content from current DOM using CSS selectors + * Returns HTML content from the first matching element + */ +function extractContentFromDom(selectors: readonly string[]): string | null { + // Try each selector in order + for (const selector of selectors) { + const element = document.querySelector(selector); + if (element) { + // Return the HTML content of the first matching element + return element.innerHTML || null; + } + } + + // Fallback: try to find main content areas + const fallbackSelectors = ['main', '.main-wrapper', '#__docusaurus']; + for (const selector of fallbackSelectors) { + const element = document.querySelector(selector); + if (element) { + return element.innerHTML || null; + } + } + + return null; +} + export default function useCopyActions( finalConfig: ResolvedCopyPageContentOptions, siteConfig: SiteConfig | undefined, - setIsOpen: (isOpen: boolean) => void + setIsOpen: (isOpen: boolean) => void, + hasMarkdown?: boolean, + contentSelectors?: readonly string[] ): { copyStatus: 'idle' | 'success' | 'error'; handleAction: (action: string) => Promise; @@ -28,27 +57,55 @@ export default function useCopyActions( 'idle' ); const location = useLocation(); + // Docusaurus strips baseUrl from location.pathname, so we use it directly + // for relative fetches (which are served with baseUrl by the server) const pathname = location.pathname; + // For constructing full absolute URLs, we need to add baseUrl back + const pathnameWithBase = useBaseUrl(pathname); const handleAction = async (action: string) => { setIsOpen(false); if (action === 'copyRaw') { - // Copy raw markdown content using ClipboardItem with Promise + // Copy content using ClipboardItem with Promise // This approach works across all modern browsers and maintains // user gesture context required by Safari try { - const markdownUrl = constructMarkdownUrl(pathname); - - // Create a promise that fetches and returns the content as a Blob - const textPromise = fetch(markdownUrl) - .then((response) => { - if (!response.ok) { - throw new Error('Failed to fetch markdown'); - } - return response.text(); - }) - .then((text) => new Blob([text], { type: 'text/plain' })); + let textPromise: Promise; + + // Check contentStrategy - if html-only, always fetch HTML + const shouldFetchMarkdown = + finalConfig.contentStrategy === 'prefer-markdown' && hasMarkdown; + + if (shouldFetchMarkdown) { + // Fetch markdown content directly + // Use pathnameWithBase for proper routing with baseUrl + const markdownUrl = constructMarkdownUrl(pathnameWithBase); + textPromise = fetch(markdownUrl) + .then((response) => { + if (!response.ok) { + throw new Error(`Failed to fetch markdown: ${response.status}`); + } + return response.text(); + }) + .then((text) => new Blob([text], { type: 'text/plain' })); + } else { + // No markdown available - extract content from current DOM + // We're already on the HTML page, so just query the DOM directly + console.debug('No markdown available, extracting from current DOM'); + + // Extract content directly from document using selectors + const extracted = + contentSelectors && contentSelectors.length > 0 + ? extractContentFromDom(contentSelectors) + : document.body.innerHTML; + + textPromise = Promise.resolve( + new Blob([extracted || document.body.innerHTML], { + type: 'text/plain', + }) + ); + } // Create ClipboardItem with the promise const clipboardItem = new ClipboardItem({ @@ -72,19 +129,36 @@ export default function useCopyActions( setCopyStatus('error'); setTimeout(() => setCopyStatus('idle'), 3000); } + } else if (action === 'viewMarkdown') { + // Open markdown file in new tab using relative URL + // Use pathnameWithBase which includes baseUrl for proper routing + const markdownUrl = constructMarkdownUrl(pathnameWithBase); + window.open(markdownUrl, '_blank'); + setCopyStatus('success'); + setTimeout(() => setCopyStatus('idle'), 2000); } else if (action === 'openChatGPT' && siteConfig) { - // Open ChatGPT with content - const fullUrl = constructFullUrl(pathname, siteConfig); + // Open ChatGPT with content and search hints enabled + // Use pathnameWithBase which includes baseUrl for full URL construction + const fullUrl = constructFullUrl( + pathnameWithBase, + siteConfig, + hasMarkdown + ); const encodedPrompt = encodeURIComponent( `${finalConfig.chatGPT.prompt} ${fullUrl}` ); - const chatUrl = `https://chatgpt.com/?q=${encodedPrompt}`; + const chatUrl = `https://chatgpt.com/?hints=search&prompt=${encodedPrompt}`; window.open(chatUrl, '_blank'); setCopyStatus('success'); setTimeout(() => setCopyStatus('idle'), 2000); } else if (action === 'openClaude' && siteConfig) { // Open Claude with content - const fullUrl = constructFullUrl(pathname, siteConfig); + // Use pathnameWithBase which includes baseUrl for full URL construction + const fullUrl = constructFullUrl( + pathnameWithBase, + siteConfig, + hasMarkdown + ); const encodedPrompt = encodeURIComponent( `${finalConfig.claude.prompt} ${fullUrl}` ); diff --git a/packages/docusaurus-theme-llms-txt/src/hooks/useCopyButtonConfig.ts b/packages/docusaurus-theme-llms-txt/src/hooks/useCopyButtonConfig.ts index 272c485..2b0e91a 100644 --- a/packages/docusaurus-theme-llms-txt/src/hooks/useCopyButtonConfig.ts +++ b/packages/docusaurus-theme-llms-txt/src/hooks/useCopyButtonConfig.ts @@ -11,7 +11,12 @@ import type { CopyPageContentOptions } from '../types/copyButton'; // Resolved configuration type export interface ResolvedCopyPageContentOptions { buttonLabel: string; - markdown: boolean; + display: { + docs: boolean; + excludeRoutes: readonly string[]; + }; + contentStrategy: 'prefer-markdown' | 'html-only'; + viewMarkdown: boolean; chatGPT: { enabled: boolean; prompt: string }; claude: { enabled: boolean; prompt: string }; } @@ -19,7 +24,12 @@ export interface ResolvedCopyPageContentOptions { // Default configuration const DEFAULT_CONFIG: ResolvedCopyPageContentOptions = { buttonLabel: 'Copy Page', - markdown: true, + display: { + docs: true, + excludeRoutes: [], + }, + contentStrategy: 'prefer-markdown', + viewMarkdown: true, chatGPT: { enabled: true, prompt: 'Analyze this documentation:', @@ -36,10 +46,6 @@ const DEFAULT_CONFIG: ResolvedCopyPageContentOptions = { export default function useCopyButtonConfig( pluginConfig: boolean | CopyPageContentOptions | undefined ): ResolvedCopyPageContentOptions { - if (pluginConfig === false) { - throw new Error('Component should not render when disabled'); - } - // Memoize configuration merging to prevent unnecessary recalculations return useMemo(() => { let baseConfig = { ...DEFAULT_CONFIG }; @@ -48,7 +54,16 @@ export default function useCopyButtonConfig( if (pluginConfig && typeof pluginConfig === 'object') { baseConfig = { buttonLabel: pluginConfig.buttonLabel ?? baseConfig.buttonLabel, - markdown: pluginConfig.actions?.markdown ?? baseConfig.markdown, + display: { + docs: pluginConfig.display?.docs ?? baseConfig.display.docs, + excludeRoutes: + pluginConfig.display?.excludeRoutes ?? + baseConfig.display.excludeRoutes, + }, + contentStrategy: + pluginConfig.contentStrategy ?? baseConfig.contentStrategy, + viewMarkdown: + pluginConfig.actions?.viewMarkdown ?? baseConfig.viewMarkdown, chatGPT: { enabled: (() => { if (typeof pluginConfig.actions?.ai?.chatGPT === 'boolean') { diff --git a/packages/docusaurus-theme-llms-txt/src/hooks/useCopyContentData.ts b/packages/docusaurus-theme-llms-txt/src/hooks/useCopyContentData.ts index 6f17589..b98f138 100644 --- a/packages/docusaurus-theme-llms-txt/src/hooks/useCopyContentData.ts +++ b/packages/docusaurus-theme-llms-txt/src/hooks/useCopyContentData.ts @@ -4,115 +4,80 @@ * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ -import { useState, useEffect, useRef, useMemo } from 'react'; +import { useState, useEffect } from 'react'; -// JSON data structure +import ExecutionEnvironment from '@docusaurus/ExecutionEnvironment'; + +// JSON data structure - per-route copy content info interface CopyContentData { - [routePath: string]: boolean; + [routePath: string]: { + shouldDisplay: boolean; + hasMarkdown: boolean; + contentSelectors: readonly string[]; + }; } -interface CacheEntry { - url: string; - data: CopyContentData; - promise?: Promise; -} +// Simple in-memory cache: URL → data +// Prevents re-fetching when component remounts +// (e.g., on window resize mobile ↔ desktop) +const dataCache = new Map(); export default function useCopyContentData(dataUrl: string | undefined): { copyContentData: CopyContentData | null; isLoading: boolean; } { - // Component-managed cache using useRef for persistence across re-renders - // This cache resets when component unmounts or dataUrl changes - const cacheRef = useRef(null); + const [data, setData] = useState(null); const [isLoading, setIsLoading] = useState(true); - // Use useMemo to determine current data state without side effects - const _currentData = useMemo(() => { - if (!dataUrl || typeof window === 'undefined') { - return { data: null, needsFetch: false, shouldLoad: false }; - } - - // Check if we have cached data for the exact same URL - if (cacheRef.current?.url === dataUrl) { - return { - data: cacheRef.current.data, - needsFetch: false, - shouldLoad: false, - }; + useEffect(() => { + // Skip fetching during SSR - prevents hydration mismatch + if (!ExecutionEnvironment.canUseDOM) { + return undefined; } - // If we have a pending promise for the same URL, wait for it - if (cacheRef.current?.promise && cacheRef.current.url === dataUrl) { - return { data: null, needsFetch: false, shouldLoad: true }; + if (!dataUrl) { + setIsLoading(false); + setData(null); + return undefined; } - // Clear old cache since URL has changed and need to fetch - return { data: null, needsFetch: true, shouldLoad: true }; - }, [dataUrl]); - - useEffect(() => { - // Update loading state based on current data state - setIsLoading(_currentData.shouldLoad); - - // Early return if no fetch is needed or dataUrl is undefined - if (!_currentData.needsFetch || !dataUrl) { + // Check cache first - instant return if available + const cached = dataCache.get(dataUrl); + if (cached) { + setData(cached); + setIsLoading(false); return undefined; } - // Clear old cache since URL has changed - cacheRef.current = null; - - let isCancelled = false; - - // Create fetch function - const fetchData = async (): Promise => { - const response = await fetch(dataUrl); - if (!response.ok) { - throw new Error( - `Failed to fetch copy content data: ${response.status}` - ); - } - return (await response.json()) as CopyContentData; - }; + // Not in cache, fetch from network + setIsLoading(true); - // Start the fetch and store promise in cache - const promise = fetchData(); - cacheRef.current = { - url: dataUrl, - data: {} as CopyContentData, // Temporary, will be replaced - promise, - }; - - // Handle the promise - void promise - .then((data) => { - if (!isCancelled && cacheRef.current?.url === dataUrl) { - // Update cache with successful result - cacheRef.current = { - url: dataUrl, - data, - }; - setIsLoading(false); + fetch(dataUrl) + .then(async (response) => { + if (!response.ok) { + throw new Error( + `Failed to fetch copy content data: ${response.status}` + ); } - return undefined; + return (await response.json()) as CopyContentData; + }) + .then((fetchedData) => { + // Store in cache for future component mounts + dataCache.set(dataUrl, fetchedData); + setData(fetchedData); + setIsLoading(false); }) .catch((error) => { console.error('Failed to load copy content data:', error); - if (!isCancelled) { - // Clear cache on error - cacheRef.current = null; - setIsLoading(false); - } - return undefined; + setData(null); + setIsLoading(false); }); - return () => { - isCancelled = true; - }; - }, [dataUrl, _currentData]); + return undefined; + }, [dataUrl]); return { - copyContentData: _currentData.data || cacheRef.current?.data || null, + copyContentData: data, isLoading, }; } diff --git a/packages/docusaurus-theme-llms-txt/src/theme-llms-txt.d.ts b/packages/docusaurus-theme-llms-txt/src/theme-llms-txt.d.ts index 63fe816..0887432 100644 --- a/packages/docusaurus-theme-llms-txt/src/theme-llms-txt.d.ts +++ b/packages/docusaurus-theme-llms-txt/src/theme-llms-txt.d.ts @@ -14,13 +14,6 @@ * to provide copy page button functionality and other llms-txt related UI components. */ -/* eslint-disable @typescript-eslint/triple-slash-reference */ - -/// -/// -/// -/// - // This file, like all the other ambient declaration files for theme packages, is // needed for TS to understand our `@theme` aliases. The @theme/* module declarations // are provided by the @signalwire/docusaurus-plugin-llms-txt package, following diff --git a/packages/docusaurus-theme-llms-txt/src/theme/CopyPageContent/CopyButton/index.tsx b/packages/docusaurus-theme-llms-txt/src/theme/CopyPageContent/CopyButton/index.tsx index 80fbadb..db923d9 100644 --- a/packages/docusaurus-theme-llms-txt/src/theme/CopyPageContent/CopyButton/index.tsx +++ b/packages/docusaurus-theme-llms-txt/src/theme/CopyPageContent/CopyButton/index.tsx @@ -24,13 +24,15 @@ interface CopyButtonProps { onDropdownToggle: () => void; } -export default function CopyButton({ +function CopyButton({ copyStatus, finalConfig, isOpen, onMainAction, onDropdownToggle, }: CopyButtonProps): React.JSX.Element { + const ariaLabel = copyStatus === 'success' ? 'Copied' : 'Copy page'; + return (
{copyStatus === 'success' ? ( @@ -52,7 +52,7 @@ export default function CopyButton({ )} - {copyStatus === 'success' ? 'Copied!' : finalConfig.buttonLabel} + {copyStatus === 'success' ? 'Copied' : finalConfig.buttonLabel}