From a430284aa21e3ae1f0d5654e55b2ad2852519cc2 Mon Sep 17 00:00:00 2001 From: wwf <yearningwang@iqtogether.com> Date: 星期三, 04 六月 2025 15:17:49 +0800 Subject: [PATCH] 初始化 --- app/(commonLayout)/datasets/template/template.en.mdx | 1152 +++++++++++---------------------------------------------- 1 files changed, 224 insertions(+), 928 deletions(-) diff --git "a/app/\050commonLayout\051/datasets/template/template.en.mdx" "b/app/\050commonLayout\051/datasets/template/template.en.mdx" index 7f28610..ac57e3a 100644 --- "a/app/\050commonLayout\051/datasets/template/template.en.mdx" +++ "b/app/\050commonLayout\051/datasets/template/template.en.mdx" @@ -1,8 +1,3 @@ -{/** - * @typedef Props - * @property {string} apiBaseUrl - */} - import { CodeGroup } from '@/app/components/develop/code.tsx' import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx' @@ -37,7 +32,7 @@ <Col> This API is based on an existing knowledge and creates a new document through text based on this knowledge. - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -51,6 +46,44 @@ </Property> <Property name='text' type='string' key='text'> Document content + </Property> + <Property name='doc_type' type='string' key='doc_type'> + Type of document (optional): + - <code>book</code> Book + - <code>web_page</code> Web page + - <code>paper</code> Academic paper/article + - <code>social_media_post</code> Social media post + - <code>wikipedia_entry</code> Wikipedia entry + - <code>personal_document</code> Personal document + - <code>business_document</code> Business document + - <code>im_chat_log</code> Chat log + - <code>synced_from_notion</code> Notion document + - <code>synced_from_github</code> GitHub document + - <code>others</code> Other document types + </Property> + <Property name='doc_metadata' type='object' key='doc_metadata'> + Document metadata (required if doc_type is provided). Fields vary by doc_type: + For <code>book</code>: + - <code>title</code> Book title + - <code>language</code> Book language + - <code>author</code> Book author + - <code>publisher</code> Publisher name + - <code>publication_date</code> Publication date + - <code>isbn</code> ISBN number + - <code>category</code> Book category + + For <code>web_page</code>: + - <code>title</code> Page title + - <code>url</code> Page URL + - <code>language</code> Page language + - <code>publish_date</code> Publish date + - <code>author/publisher</code> Author or publisher + - <code>topic/keywords</code> Topic or keywords + - <code>description</code> Page description + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + + For doc_type "others", any valid JSON object is accepted </Property> <Property name='indexing_technique' type='string' key='indexing_technique'> Index mode @@ -175,7 +208,7 @@ <Col> This API is based on an existing knowledge and creates a new document through a file based on this knowledge. - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -199,6 +232,68 @@ - <code>text_model</code> Text documents are directly embedded; `economy` mode defaults to using this form - <code>hierarchical_model</code> Parent-child mode - <code>qa_model</code> Q&A Mode: Generates Q&A pairs for segmented documents and then embeds the questions + + - <code>doc_type</code> Type of document (optional) + - <code>book</code> Book + Document records a book or publication + - <code>web_page</code> Web page + Document records web page content + - <code>paper</code> Academic paper/article + Document records academic paper or research article + - <code>social_media_post</code> Social media post + Content from social media posts + - <code>wikipedia_entry</code> Wikipedia entry + Content from Wikipedia entries + - <code>personal_document</code> Personal document + Documents related to personal content + - <code>business_document</code> Business document + Documents related to business content + - <code>im_chat_log</code> Chat log + Records of instant messaging chats + - <code>synced_from_notion</code> Notion document + Documents synchronized from Notion + - <code>synced_from_github</code> GitHub document + Documents synchronized from GitHub + - <code>others</code> Other document types + Other document types not listed above + + - <code>doc_metadata</code> Document metadata (required if doc_type is provided) + Fields vary by doc_type: + + For <code>book</code>: + - <code>title</code> Book title + Title of the book + - <code>language</code> Book language + Language of the book + - <code>author</code> Book author + Author of the book + - <code>publisher</code> Publisher name + Name of the publishing house + - <code>publication_date</code> Publication date + Date when the book was published + - <code>isbn</code> ISBN number + International Standard Book Number + - <code>category</code> Book category + Category or genre of the book + + For <code>web_page</code>: + - <code>title</code> Page title + Title of the web page + - <code>url</code> Page URL + URL address of the web page + - <code>language</code> Page language + Language of the web page + - <code>publish_date</code> Publish date + Date when the web page was published + - <code>author/publisher</code> Author or publisher + Author or publisher of the web page + - <code>topic/keywords</code> Topic or keywords + Topics or keywords of the web page + - <code>description</code> Page description + Description of the web page content + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + For doc_type "others", any valid JSON object is accepted - <code>doc_language</code> In Q&A mode, specify the language of the document, for example: <code>English</code>, <code>Chinese</code> @@ -312,9 +407,46 @@ <Property name='description' type='string' key='description'> Knowledge description (optional) </Property> + <Property name='doc_type' type='string' key='doc_type'> + Type of document (optional): + - <code>book</code> Book + - <code>web_page</code> Web page + - <code>paper</code> Academic paper/article + - <code>social_media_post</code> Social media post + - <code>wikipedia_entry</code> Wikipedia entry + - <code>personal_document</code> Personal document + - <code>business_document</code> Business document + - <code>im_chat_log</code> Chat log + - <code>synced_from_notion</code> Notion document + - <code>synced_from_github</code> GitHub document + - <code>others</code> Other document types + </Property> + <Property name='doc_metadata' type='object' key='doc_metadata'> + Document metadata (required if doc_type is provided). Fields vary by doc_type: + For <code>book</code>: + - <code>title</code> Book title + - <code>language</code> Book language + - <code>author</code> Book author + - <code>publisher</code> Publisher name + - <code>publication_date</code> Publication date + - <code>isbn</code> ISBN number + - <code>category</code> Book category + + For <code>web_page</code>: + - <code>title</code> Page title + - <code>url</code> Page URL + - <code>language</code> Page language + - <code>publish_date</code> Publish date + - <code>author/publisher</code> Author or publisher + - <code>topic/keywords</code> Topic or keywords + - <code>description</code> Page description + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + + For doc_type "others", any valid JSON object is accepted + </Property> <Property name='indexing_technique' type='string' key='indexing_technique'> Index technique (optional) - If this is not set, embedding_model, embedding_model_provider and retrieval_model will be set to null - <code>high_quality</code> High quality - <code>economy</code> Economy </Property> @@ -334,26 +466,6 @@ </Property> <Property name='external_knowledge_id' type='str' key='external_knowledge_id'> External knowledge ID (optional) - </Property> - <Property name='embedding_model' type='str' key='embedding_model'> - Embedding model name (optional) - </Property> - <Property name='embedding_model_provider' type='str' key='embedding_model_provider'> - Embedding model provider name (optional) - </Property> - <Property name='retrieval_model' type='object' key='retrieval_model'> - Retrieval model (optional) - - <code>search_method</code> (string) Search method - - <code>hybrid_search</code> Hybrid search - - <code>semantic_search</code> Semantic search - - <code>full_text_search</code> Full-text search - - <code>reranking_enable</code> (bool) Whether to enable reranking - - <code>reranking_model</code> (object) Rerank model configuration - - <code>reranking_provider_name</code> (string) Rerank model provider - - <code>reranking_model_name</code> (string) Rerank model name - - <code>top_k</code> (int) Number of results to return - - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold - - <code>score_threshold</code> (float) Score threshold </Property> </Properties> </Col> @@ -412,20 +524,11 @@ <Col> ### Query <Properties> - <Property name='keyword' type='string' key='keyword'> - Search keyword, optional - </Property> - <Property name='tag_ids' type='array[string]' key='tag_ids'> - Tag ID list, optional - </Property> <Property name='page' type='string' key='page'> - Page number, optional, default 1 + Page number </Property> <Property name='limit' type='string' key='limit'> - Number of items returned, optional, default 20, range 1-100 - </Property> - <Property name='include_all' type='boolean' key='include_all'> - Whether to include all datasets (only effective for owners), optional, defaults to false + Number of items returned, default 20, range 1-100 </Property> </Properties> </Col> @@ -476,255 +579,13 @@ <Heading url='/datasets/{dataset_id}' - method='GET' - title='Get knowledge base details by knowledge base ID' - name='#view_dataset' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge Base ID - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="GET" - label="/datasets/{dataset_id}" - targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}'`} - > - ```bash {{ title: 'cURL' }} - curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}' \ - --header 'Authorization: Bearer {api_key}' - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f", - "name": "Test Knowledge Base", - "description": "", - "provider": "vendor", - "permission": "only_me", - "data_source_type": null, - "indexing_technique": null, - "app_count": 0, - "document_count": 0, - "word_count": 0, - "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6", - "created_at": 1735620612, - "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6", - "updated_at": 1735620612, - "embedding_model": null, - "embedding_model_provider": null, - "embedding_available": true, - "retrieval_model_dict": { - "search_method": "semantic_search", - "reranking_enable": false, - "reranking_mode": null, - "reranking_model": { - "reranking_provider_name": "", - "reranking_model_name": "" - }, - "weights": null, - "top_k": 2, - "score_threshold_enabled": false, - "score_threshold": null - }, - "tags": [], - "doc_form": null, - "external_knowledge_info": { - "external_knowledge_id": null, - "external_knowledge_api_id": null, - "external_knowledge_api_name": null, - "external_knowledge_api_endpoint": null - }, - "external_retrieval_model": { - "top_k": 2, - "score_threshold": 0.0, - "score_threshold_enabled": null - } - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}' - method='PATCH' - title='Update knowledge base' - name='#update_dataset' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge Base ID - </Property> - <Property name='indexing_technique' type='string' key='indexing_technique'> - Index technique (optional) - - <code>high_quality</code> High quality - - <code>economy</code> Economy - </Property> - <Property name='permission' type='string' key='permission'> - Permission - - <code>only_me</code> Only me - - <code>all_team_members</code> All team members - - <code>partial_members</code> Partial members - </Property> - <Property name='embedding_model_provider' type='string' key='embedding_model_provider'> - Specified embedding model provider, must be set up in the system first, corresponding to the provider field(Optional) - </Property> - <Property name='embedding_model' type='string' key='embedding_model'> - Specified embedding model, corresponding to the model field(Optional) - </Property> - <Property name='retrieval_model' type='object' key='retrieval_model'> - Retrieval model (optional, if not filled, it will be recalled according to the default method) - - <code>search_method</code> (text) Search method: One of the following four keywords is required - - <code>keyword_search</code> Keyword search - - <code>semantic_search</code> Semantic search - - <code>full_text_search</code> Full-text search - - <code>hybrid_search</code> Hybrid search - - <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional) - - <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled - - <code>reranking_provider_name</code> (string) Rerank model provider - - <code>reranking_model_name</code> (string) Rerank model name - - <code>weights</code> (float) Semantic search weight setting in hybrid search mode - - <code>top_k</code> (integer) Number of results to return (optional) - - <code>score_threshold_enabled</code> (bool) Whether to enable score threshold - - <code>score_threshold</code> (float) Score threshold - </Property> - <Property name='partial_member_list' type='array' key='partial_member_list'> - Partial member list(Optional) - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="PATCH" - label="/datasets/{dataset_id}" - targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{ - "name": "Test Knowledge Base", - "indexing_technique": "high_quality", - "permission": "only_me", - "embedding_model_provider": "zhipuai", - "embedding_model": "embedding-3", - "retrieval_model": { - "search_method": "keyword_search", - "reranking_enable": false, - "reranking_mode": null, - "reranking_model": { - "reranking_provider_name": "", - "reranking_model_name": "" - }, - "weights": null, - "top_k": 1, - "score_threshold_enabled": false, - "score_threshold": null - }, - "partial_member_list": [] - }' - `} - > - ```bash {{ title: 'cURL' }} - curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}' \ - --header 'Authorization: Bearer {api_key}' \ - --header 'Content-Type: application/json' \ - --data-raw '{ - "name": "Test Knowledge Base", - "indexing_technique": "high_quality", - "permission": "only_me", - "embedding_model_provider": "zhipuai", - "embedding_model": "embedding-3", - "retrieval_model": { - "search_method": "keyword_search", - "reranking_enable": false, - "reranking_mode": null, - "reranking_model": { - "reranking_provider_name": "", - "reranking_model_name": "" - }, - "weights": null, - "top_k": 1, - "score_threshold_enabled": false, - "score_threshold": null - }, - "partial_member_list": [] - }' - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "id": "eaedb485-95ac-4ffd-ab1e-18da6d676a2f", - "name": "Test Knowledge Base", - "description": "", - "provider": "vendor", - "permission": "only_me", - "data_source_type": null, - "indexing_technique": "high_quality", - "app_count": 0, - "document_count": 0, - "word_count": 0, - "created_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6", - "created_at": 1735620612, - "updated_by": "e99a1635-f725-4951-a99a-1daaaa76cfc6", - "updated_at": 1735622679, - "embedding_model": "embedding-3", - "embedding_model_provider": "zhipuai", - "embedding_available": null, - "retrieval_model_dict": { - "search_method": "semantic_search", - "reranking_enable": false, - "reranking_mode": null, - "reranking_model": { - "reranking_provider_name": "", - "reranking_model_name": "" - }, - "weights": null, - "top_k": 2, - "score_threshold_enabled": false, - "score_threshold": null - }, - "tags": [], - "doc_form": null, - "external_knowledge_info": { - "external_knowledge_id": null, - "external_knowledge_api_id": null, - "external_knowledge_api_name": null, - "external_knowledge_api_endpoint": null - }, - "external_retrieval_model": { - "top_k": 2, - "score_threshold": 0.0, - "score_threshold_enabled": null - }, - "partial_member_list": [] - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}' method='DELETE' title='Delete a Knowledge Base' name='#delete_dataset' /> <Row> <Col> - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -763,7 +624,7 @@ <Col> This API is based on an existing knowledge and updates the document through text based on this knowledge. - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -865,7 +726,7 @@ <Col> This API is based on an existing knowledge, and updates documents through files based on this knowledge - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -901,6 +762,67 @@ - <code>separator</code> Segmentation identifier. Currently, only one delimiter is allowed. The default is <code>***</code> - <code>max_tokens</code> The maximum length (tokens) must be validated to be shorter than the length of the parent chunk - <code>chunk_overlap</code> Define the overlap between adjacent chunks (optional) + - <code>doc_type</code> Type of document (optional) + - <code>book</code> Book + Document records a book or publication + - <code>web_page</code> Web page + Document records web page content + - <code>paper</code> Academic paper/article + Document records academic paper or research article + - <code>social_media_post</code> Social media post + Content from social media posts + - <code>wikipedia_entry</code> Wikipedia entry + Content from Wikipedia entries + - <code>personal_document</code> Personal document + Documents related to personal content + - <code>business_document</code> Business document + Documents related to business content + - <code>im_chat_log</code> Chat log + Records of instant messaging chats + - <code>synced_from_notion</code> Notion document + Documents synchronized from Notion + - <code>synced_from_github</code> GitHub document + Documents synchronized from GitHub + - <code>others</code> Other document types + Other document types not listed above + + - <code>doc_metadata</code> Document metadata (required if doc_type is provided) + Fields vary by doc_type: + + For <code>book</code>: + - <code>title</code> Book title + Title of the book + - <code>language</code> Book language + Language of the book + - <code>author</code> Book author + Author of the book + - <code>publisher</code> Publisher name + Name of the publishing house + - <code>publication_date</code> Publication date + Date when the book was published + - <code>isbn</code> ISBN number + International Standard Book Number + - <code>category</code> Book category + Category or genre of the book + + For <code>web_page</code>: + - <code>title</code> Page title + Title of the web page + - <code>url</code> Page URL + URL address of the web page + - <code>language</code> Page language + Language of the web page + - <code>publish_date</code> Publish date + Date when the web page was published + - <code>author/publisher</code> Author or publisher + Author or publisher of the web page + - <code>topic/keywords</code> Topic or keywords + Topics or keywords of the web page + - <code>description</code> Page description + Description of the web page content + + Please check [api/services/dataset_service.py](https://github.com/langgenius/dify/blob/main/api/services/dataset_service.py#L475) for more details on the fields required for each doc_type. + For doc_type "others", any valid JSON object is accepted </Property> </Properties> </Col> @@ -962,7 +884,7 @@ /> <Row> <Col> - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -1017,7 +939,7 @@ /> <Row> <Col> - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -1040,8 +962,10 @@ ``` </CodeGroup> <CodeGroup title="Response"> - ```text {{ title: 'Response' }} - 204 No Content + ```json {{ title: 'Response' }} + { + "result": "success" + } ``` </CodeGroup> </Col> @@ -1057,7 +981,7 @@ /> <Row> <Col> - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -1132,7 +1056,7 @@ /> <Row> <Col> - ### Path + ### Params <Properties> <Property name='dataset_id' type='string' key='dataset_id'> Knowledge ID @@ -1236,12 +1160,6 @@ <Property name='status' type='string' key='status'> Search status, completed </Property> - <Property name='page' type='string' key='page'> - Page number (optional) - </Property> - <Property name='limit' type='string' key='limit'> - Number of items returned, default 20, range 1-100 (optional) - </Property> </Properties> </Col> <Col sticky> @@ -1285,11 +1203,7 @@ "error": null, "stopped_at": null }], - "doc_form": "text_model", - "has_more": false, - "limit": 20, - "total": 9, - "page": 1 + "doc_form": "text_model" } ``` </CodeGroup> @@ -1324,17 +1238,19 @@ title="Request" tag="DELETE" label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}" - targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`} + targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`} > ```bash {{ title: 'cURL' }} - curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \ + curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' ``` </CodeGroup> <CodeGroup title="Response"> - ```text {{ title: 'Response' }} - 204 No Content + ```json {{ title: 'Response' }} + { + "result": "success" + } ``` </CodeGroup> </Col> @@ -1397,7 +1313,7 @@ <CodeGroup title="Response"> ```json {{ title: 'Response' }} { - "data": { + "data": [{ "id": "", "position": 1, "document_id": "", @@ -1421,276 +1337,8 @@ "completed_at": 1695312007, "error": null, "stopped_at": null - }, - "doc_form": "text_model" - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' - method='POST' - title='Create Child Chunk' - name='#create_child_chunk' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - <Property name='document_id' type='string' key='document_id'> - Document ID - </Property> - <Property name='segment_id' type='string' key='segment_id'> - Segment ID - </Property> - </Properties> - - ### Request Body - <Properties> - <Property name='content' type='string' key='content'> - Child chunk content - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="POST" - label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" - targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Child chunk content"}'`} - > - ```bash {{ title: 'cURL' }} - curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' \ - --header 'Authorization: Bearer {api_key}' \ - --header 'Content-Type: application/json' \ - --data-raw '{ - "content": "Child chunk content" - }' - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "data": { - "id": "", - "segment_id": "", - "content": "Child chunk content", - "word_count": 25, - "tokens": 0, - "index_node_id": "", - "index_node_hash": "", - "status": "completed", - "created_by": "", - "created_at": 1695312007, - "indexing_at": 1695312007, - "completed_at": 1695312007, - "error": null, - "stopped_at": null - } - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks' - method='GET' - title='Get Child Chunks' - name='#get_child_chunks' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - <Property name='document_id' type='string' key='document_id'> - Document ID - </Property> - <Property name='segment_id' type='string' key='segment_id'> - Segment ID - </Property> - </Properties> - - ### Query - <Properties> - <Property name='keyword' type='string' key='keyword'> - Search keyword (optional) - </Property> - <Property name='page' type='integer' key='page'> - Page number (optional, default: 1) - </Property> - <Property name='limit' type='integer' key='limit'> - Items per page (optional, default: 20, max: 100) - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="GET" - label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks" - targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} - > - ```bash {{ title: 'cURL' }} - curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks?page=1&limit=20' \ - --header 'Authorization: Bearer {api_key}' - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "data": [{ - "id": "", - "segment_id": "", - "content": "Child chunk content", - "word_count": 25, - "tokens": 0, - "index_node_id": "", - "index_node_hash": "", - "status": "completed", - "created_by": "", - "created_at": 1695312007, - "indexing_at": 1695312007, - "completed_at": 1695312007, - "error": null, - "stopped_at": null }], - "total": 1, - "total_pages": 1, - "page": 1, - "limit": 20 - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' - method='DELETE' - title='Delete Child Chunk' - name='#delete_child_chunk' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - <Property name='document_id' type='string' key='document_id'> - Document ID - </Property> - <Property name='segment_id' type='string' key='segment_id'> - Segment ID - </Property> - <Property name='child_chunk_id' type='string' key='child_chunk_id'> - Child Chunk ID - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="DELETE" - label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" - targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}'`} - > - ```bash {{ title: 'cURL' }} - curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ - --header 'Authorization: Bearer {api_key}' - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```text {{ title: 'Response' }} - 204 No Content - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' - method='PATCH' - title='Update Child Chunk' - name='#update_child_chunk' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - <Property name='document_id' type='string' key='document_id'> - Document ID - </Property> - <Property name='segment_id' type='string' key='segment_id'> - Segment ID - </Property> - <Property name='child_chunk_id' type='string' key='child_chunk_id'> - Child Chunk ID - </Property> - </Properties> - - ### Request Body - <Properties> - <Property name='content' type='string' key='content'> - Child chunk content - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="PATCH" - label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}" - targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"content": "Updated child chunk content"}'`} - > - ```bash {{ title: 'cURL' }} - curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}' \ - --header 'Authorization: Bearer {api_key}' \ - --header 'Content-Type: application/json' \ - --data-raw '{ - "content": "Updated child chunk content" - }' - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "data": { - "id": "", - "segment_id": "", - "content": "Updated child chunk content", - "word_count": 25, - "tokens": 0, - "index_node_id": "", - "index_node_hash": "", - "status": "completed", - "created_by": "", - "created_at": 1695312007, - "indexing_at": 1695312007, - "completed_at": 1695312007, - "error": null, - "stopped_at": null - } + "doc_form": "text_model" } ``` </CodeGroup> @@ -1880,6 +1528,7 @@ "id": "a8c6c36f-9f5d-4d7a-8472-f5d7b75d71d2", "data_source_type": "upload_file", "name": "readme.txt", + "doc_type": null } }, "score": 3.730463140527718e-05, @@ -1890,359 +1539,6 @@ ``` </CodeGroup> </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/metadata' - method='POST' - title='Create a Knowledge Metadata' - name='#create_metadata' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - </Properties> - - ### Request Body - <Properties> - <Property name='segment' type='object' key='segment'> - - <code>type</code> (string) Metadata type, required - - <code>name</code> (string) Metadata name, required - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="POST" - label="/datasets/{dataset_id}/metadata" - targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"type": "string", "name": "test"}'`} - > - ```bash {{ title: 'cURL' }} - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "id": "abc", - "type": "string", - "name": "test", - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/metadata/{metadata_id}' - method='PATCH' - title='Update a Knowledge Metadata' - name='#update_metadata' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - <Property name='metadata_id' type='string' key='metadata_id'> - Metadata ID - </Property> - </Properties> - - ### Request Body - <Properties> - <Property name='segment' type='object' key='segment'> - - <code>name</code> (string) Metadata name, required - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="PATCH" - label="/datasets/{dataset_id}/metadata/{metadata_id}" - targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"name": "test"}'`} - > - ```bash {{ title: 'cURL' }} - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "id": "abc", - "type": "string", - "name": "test", - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/metadata/{metadata_id}' - method='DELETE' - title='Delete a Knowledge Metadata' - name='#delete_metadata' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - <Property name='metadata_id' type='string' key='metadata_id'> - Metadata ID - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="DELETE" - label="/datasets/{dataset_id}/metadata/{metadata_id}" - targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/{metadata_id}' \\\n--header 'Authorization: Bearer {api_key}'`} - > - ```bash {{ title: 'cURL' }} - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/metadata/built-in/{action}' - method='POST' - title='Disable Or Enable Built-in Metadata' - name='#toggle_metadata' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - <Property name='action' type='string' key='action'> - disable/enable - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="POST" - label="/datasets/{dataset_id}/metadata/built-in/{action}" - targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/metadata/built-in/{action}' \\\n--header 'Authorization: Bearer {api_key}'`} - > - ```bash {{ title: 'cURL' }} - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/documents/metadata' - method='POST' - title='Update Documents Metadata' - name='#update_documents_metadata' -/> -<Row> - <Col> - ### Path - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - </Properties> - - ### Request Body - <Properties> - <Property name='operation_data' type='object list' key='segments'> - - <code>document_id</code> (string) Document ID - - <code>metadata_list</code> (list) Metadata list - - <code>id</code> (string) Metadata ID - - <code>value</code> (string) Metadata value - - <code>name</code> (string) Metadata name - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="POST" - label="/datasets/{dataset_id}/documents/metadata" - targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"operation_data": [{"document_id": "document_id", "metadata_list": [{"id": "id", "value": "value", "name": "name"}]}]}'`} - > - ```bash {{ title: 'cURL' }} - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/datasets/{dataset_id}/metadata' - method='GET' - title='Get Knowledge Metadata List' - name='#dataset_metadata_list' -/> -<Row> - <Col> - ### Params - <Properties> - <Property name='dataset_id' type='string' key='dataset_id'> - Knowledge ID - </Property> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="GET" - label="/datasets/{dataset_id}/metadata" - targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/metadata' \\\n--header 'Authorization: Bearer {api_key}'`} - > - ```bash {{ title: 'cURL' }} - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "doc_metadata": [ - { - "id": "", - "name": "name", - "type": "string", - "use_count": 0, - }, - ... - ], - "built_in_field_enabled": true - } - ``` - </CodeGroup> - </Col> -</Row> - -<hr className='ml-0 mr-0' /> - -<Heading - url='/workspaces/current/models/model-types/text-embedding' - method='GET' - title='Get available embedding models' - name='#model_type_list' -/> -<Row> - <Col> - ### Query - <Properties> - </Properties> - </Col> - <Col sticky> - <CodeGroup - title="Request" - tag="GET" - label="/datasets/{dataset_id}" - targetCode={`curl --location --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' `} - > - ```bash {{ title: 'cURL' }} - curl --location --request GET '${props.apiBaseUrl}/workspaces/current/models/model-types/text-embedding' \ - --header 'Authorization: Bearer {api_key}' \ - --header 'Content-Type: application/json' \ - ``` - </CodeGroup> - <CodeGroup title="Response"> - ```json {{ title: 'Response' }} - { - "data": [ - { - "provider": "zhipuai", - "label": { - "zh_Hans": "鏅鸿氨 AI", - "en_US": "ZHIPU AI" - }, - "icon_small": { - "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/zh_Hans", - "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_small/en_US" - }, - "icon_large": { - "zh_Hans": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/zh_Hans", - "en_US": "http://127.0.0.1:5001/console/api/workspaces/current/model-providers/zhipuai/icon_large/en_US" - }, - "status": "active", - "models": [ - { - "model": "embedding-3", - "label": { - "zh_Hans": "embedding-3", - "en_US": "embedding-3" - }, - "model_type": "text-embedding", - "features": null, - "fetch_from": "predefined-model", - "model_properties": { - "context_size": 8192 - }, - "deprecated": false, - "status": "active", - "load_balancing_enabled": false - }, - { - "model": "embedding-2", - "label": { - "zh_Hans": "embedding-2", - "en_US": "embedding-2" - }, - "model_type": "text-embedding", - "features": null, - "fetch_from": "predefined-model", - "model_properties": { - "context_size": 8192 - }, - "deprecated": false, - "status": "active", - "load_balancing_enabled": false - }, - { - "model": "text_embedding", - "label": { - "zh_Hans": "text_embedding", - "en_US": "text_embedding" - }, - "model_type": "text-embedding", - "features": null, - "fetch_from": "predefined-model", - "model_properties": { - "context_size": 512 - }, - "deprecated": false, - "status": "active", - "load_balancing_enabled": false - } - ] - } - ] - } - ``` - </CodeGroup> - </Col> </Row> <hr className='ml-0 mr-0' /> -- Gitblit v1.8.0