From a430284aa21e3ae1f0d5654e55b2ad2852519cc2 Mon Sep 17 00:00:00 2001
From: wwf <yearningwang@iqtogether.com>
Date: 星期三, 04 六月 2025 15:17:49 +0800
Subject: [PATCH] 初始化

---
 app/components/datasets/create/step-two/index.tsx |  125 ++++++++++++++++++++---------------------
 1 files changed, 61 insertions(+), 64 deletions(-)

diff --git a/app/components/datasets/create/step-two/index.tsx b/app/components/datasets/create/step-two/index.tsx
index 6b6580a..ec9b3a5 100644
--- a/app/components/datasets/create/step-two/index.tsx
+++ b/app/components/datasets/create/step-two/index.tsx
@@ -37,7 +37,7 @@
 import FloatRightContainer from '@/app/components/base/float-right-container'
 import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
 import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
-import type { RetrievalConfig } from '@/types/app'
+import { type RetrievalConfig } from '@/types/app'
 import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
 import Toast from '@/app/components/base/toast'
 import type { NotionPage } from '@/models/common'
@@ -62,10 +62,9 @@
 import CustomDialog from '@/app/components/base/dialog'
 import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem'
 import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
-import { noop } from 'lodash-es'
 
 const TextLabel: FC<PropsWithChildren> = (props) => {
-  return <label className='system-sm-semibold text-text-secondary'>{props.children}</label>
+  return <label className='text-text-secondary system-sm-semibold'>{props.children}</label>
 }
 
 type StepTwoProps = {
@@ -97,9 +96,9 @@
 }
 
 const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
-const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024
+const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500
 const DEFAULT_OVERLAP = 50
-const MAXIMUM_CHUNK_TOKEN_LENGTH = Number.parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10)
+const MAXIMUM_CHUNK_TOKEN_LENGTH = parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10)
 
 type ParentChildConfig = {
   chunkForContext: ParentMode
@@ -117,11 +116,11 @@
   chunkForContext: 'paragraph',
   parent: {
     delimiter: '\\n\\n',
-    maxLength: 1024,
+    maxLength: 500,
   },
   child: {
     delimiter: '\\n',
-    maxLength: 512,
+    maxLength: 200,
   },
 }
 
@@ -170,11 +169,12 @@
   const [rules, setRules] = useState<PreProcessingRule[]>([])
   const [defaultConfig, setDefaultConfig] = useState<Rules>()
   const hasSetIndexType = !!indexingType
-  const [indexType, setIndexType] = useState<IndexingType>(() => {
-    if (hasSetIndexType)
-      return indexingType
-    return isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL
-  })
+  const [indexType, setIndexType] = useState<IndexingType>(
+    (indexingType
+      || isAPIKeySet)
+      ? IndexingType.QUALIFIED
+      : IndexingType.ECONOMICAL,
+  )
 
   const [previewFile, setPreviewFile] = useState<DocumentItem>(
     (datasetId && documentDetail)
@@ -206,12 +206,12 @@
     if (value === ChunkingMode.parentChild && indexType === IndexingType.ECONOMICAL)
       setIndexType(IndexingType.QUALIFIED)
     setDocForm(value)
-    // eslint-disable-next-line ts/no-use-before-define
+    // eslint-disable-next-line @typescript-eslint/no-use-before-define
     currentEstimateMutation.reset()
   }
 
   const [docLanguage, setDocLanguage] = useState<string>(
-    (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese Simplified'),
+    (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'),
   )
 
   const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
@@ -421,13 +421,6 @@
     }
     else { // create
       const indexMethod = getIndexing_technique()
-      if (indexMethod === IndexingType.QUALIFIED && (!embeddingModel.model || !embeddingModel.provider)) {
-        Toast.notify({
-          type: 'error',
-          message: t('appDebug.datasetConfig.embeddingModelRequired'),
-        })
-        return
-      }
       if (
         !isReRankModelSelected({
           rerankModelList,
@@ -575,6 +568,7 @@
     // get indexing type by props
     if (indexingType)
       setIndexType(indexingType as IndexingType)
+
     else
       setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
   }, [isAPIKeySet, indexingType, datasetId])
@@ -585,14 +579,14 @@
   const isModelAndRetrievalConfigDisabled = !!datasetId && !!currentDataset?.data_source_type
 
   return (
-    <div className='flex h-full w-full'>
-      <div className={cn('relative h-full w-1/2 overflow-y-auto py-6', isMobile ? 'px-4' : 'px-12')}>
-        <div className={'system-md-semibold mb-1 text-text-secondary'}>{t('datasetCreation.stepTwo.segmentation')}</div>
+    <div className='flex w-full h-full'>
+      <div className={cn('relative h-full w-1/2 py-6 overflow-y-auto', isMobile ? 'px-4' : 'px-12')}>
+        <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.segmentation')}</div>
         {((isInUpload && [ChunkingMode.text, ChunkingMode.qa].includes(currentDataset!.doc_form))
           || isUploadInEmptyDataset
           || isInInit)
           && <OptionCard
-            className='mb-2 bg-background-section'
+            className='bg-background-section mb-2'
             title={t('datasetCreation.stepTwo.general')}
             icon={<Image width={20} height={20} src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />}
             activeHeaderClassName='bg-dataset-option-card-blue-gradient'
@@ -606,7 +600,7 @@
             actions={
               <>
                 <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
-                  <RiSearchEyeLine className='mr-0.5 h-4 w-4' />
+                  <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
                   {t('datasetCreation.stepTwo.previewChunk')}
                 </Button>
                 <Button variant={'ghost'} onClick={resetRules}>
@@ -623,18 +617,18 @@
                   onChange={e => setSegmentIdentifier(e.target.value, true)}
                 />
                 <MaxLengthInput
-                  unit='characters'
+                  unit='tokens'
                   value={maxChunkLength}
                   onChange={setMaxChunkLength}
                 />
                 <OverlapInput
-                  unit='characters'
+                  unit='tokens'
                   value={overlap}
                   min={1}
                   onChange={setOverlap}
                 />
               </div>
-              <div className='flex w-full flex-col'>
+              <div className='w-full flex flex-col'>
                 <div className='flex items-center gap-x-2'>
                   <div className='inline-flex shrink-0'>
                     <TextLabel>{t('datasetCreation.stepTwo.rules')}</TextLabel>
@@ -649,7 +643,7 @@
                       <Checkbox
                         checked={rule.enabled}
                       />
-                      <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
+                      <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
                     </div>
                   ))}
                   {IS_CE_EDITION && <>
@@ -667,7 +661,7 @@
                           checked={currentDocForm === ChunkingMode.qa}
                           disabled={!!currentDataset?.doc_form}
                         />
-                        <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
+                        <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">
                           {t('datasetCreation.stepTwo.useQALanguage')}
                         </label>
                       </div>
@@ -683,7 +677,7 @@
                         style={{
                           background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)',
                         }}
-                        className='mt-2 flex h-10 items-center gap-2 rounded-xl border border-components-panel-border px-3 text-xs shadow-xs backdrop-blur-[5px]'
+                        className='h-10 mt-2 flex items-center gap-2 rounded-xl backdrop-blur-[5px] border-components-panel-border border shadow-xs px-3 text-xs'
                       >
                         <RiAlertFill className='size-4 text-text-warning-secondary' />
                         <span className='system-xs-medium text-text-primary'>
@@ -713,7 +707,7 @@
             actions={
               <>
                 <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
-                  <RiSearchEyeLine className='mr-0.5 h-4 w-4' />
+                  <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
                   {t('datasetCreation.stepTwo.previewChunk')}
                 </Button>
                 <Button variant={'ghost'} onClick={resetRules}>
@@ -756,7 +750,7 @@
                         })}
                       />
                       <MaxLengthInput
-                        unit='characters'
+                        unit='tokens'
                         value={parentChildConfig.parent.maxLength}
                         onChange={value => setParentChildConfig({
                           ...parentChildConfig,
@@ -790,7 +784,7 @@
                   </div>
                   <Divider className='grow' bgStyle='gradient' />
                 </div>
-                <div className='mt-1 flex gap-3'>
+                <div className='flex gap-3 mt-1'>
                   <DelimiterInput
                     value={parentChildConfig.child.delimiter}
                     tooltip={t('datasetCreation.stepTwo.parentChildChunkDelimiterTip')!}
@@ -803,7 +797,7 @@
                     })}
                   />
                   <MaxLengthInput
-                    unit='characters'
+                    unit='tokens'
                     value={parentChildConfig.child.maxLength}
                     onChange={value => setParentChildConfig({
                       ...parentChildConfig,
@@ -830,7 +824,7 @@
                       <Checkbox
                         checked={rule.enabled}
                       />
-                      <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
+                      <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
                     </div>
                   ))}
                 </div>
@@ -838,10 +832,10 @@
             </div>
           </OptionCard>}
         <Divider className='my-5' />
-        <div className={'system-md-semibold mb-1 text-text-secondary'}>{t('datasetCreation.stepTwo.indexMode')}</div>
+        <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.indexMode')}</div>
         <div className='flex items-center gap-2'>
           {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
-            <OptionCard className='flex-1 self-stretch'
+            <OptionCard className='flex-1'
               title={<div className='flex items-center'>
                 {t('datasetCreation.stepTwo.qualified')}
                 <Badge className={cn('ml-1 h-[18px]', (!hasSetIndexType && indexType === IndexingType.QUALIFIED) ? 'border-text-accent-secondary text-text-accent-secondary' : '')} uppercase>
@@ -854,9 +848,10 @@
               description={t('datasetCreation.stepTwo.qualifiedTip')}
               icon={<Image src={indexMethodIcon.high_quality} alt='' />}
               isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED}
-              disabled={hasSetIndexType}
+              disabled={!isAPIKeySet || hasSetIndexType}
               onSwitched={() => {
-                setIndexType(IndexingType.QUALIFIED)
+                if (isAPIKeySet)
+                  setIndexType(IndexingType.QUALIFIED)
               }}
             />
           )}
@@ -864,11 +859,11 @@
           {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && (
             <>
               <CustomDialog show={isQAConfirmDialogOpen} onClose={() => setIsQAConfirmDialogOpen(false)} className='w-[432px]'>
-                <header className='mb-4 pt-6'>
+                <header className='pt-6 mb-4'>
                   <h2 className='text-lg font-semibold'>
                     {t('datasetCreation.stepTwo.qaSwitchHighQualityTipTitle')}
                   </h2>
-                  <p className='mt-2 text-sm font-normal'>
+                  <p className='font-normal text-sm mt-2'>
                     {t('datasetCreation.stepTwo.qaSwitchHighQualityTipContent')}
                   </p>
                 </header>
@@ -894,20 +889,21 @@
                 placement={'top'}
               >
                 <PortalToFollowElemTrigger asChild>
-                  <OptionCard className='flex-1 self-stretch'
+                  <OptionCard className='flex-1'
                     title={t('datasetCreation.stepTwo.economical')}
                     description={t('datasetCreation.stepTwo.economicalTip')}
                     icon={<Image src={indexMethodIcon.economical} alt='' />}
                     isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
-                    disabled={hasSetIndexType || docForm !== ChunkingMode.text}
+                    disabled={!isAPIKeySet || hasSetIndexType || docForm !== ChunkingMode.text}
                     ref={economyDomRef}
                     onSwitched={() => {
-                      setIndexType(IndexingType.ECONOMICAL)
+                      if (isAPIKeySet && docForm === ChunkingMode.text)
+                        setIndexType(IndexingType.ECONOMICAL)
                     }}
                   />
                 </PortalToFollowElemTrigger>
                 <PortalToFollowElemContent>
-                  <div className='rounded-lg border-components-panel-border bg-components-tooltip-bg p-3 text-xs font-medium text-text-secondary shadow-lg'>
+                  <div className='p-3 bg-components-tooltip-bg border-components-panel-border text-xs font-medium text-text-secondary rounded-lg shadow-lg'>
                     {
                       docForm === ChunkingMode.qa
                         ? t('datasetCreation.stepTwo.notAvailableForQA')
@@ -919,16 +915,16 @@
             </>)}
         </div>
         {!hasSetIndexType && indexType === IndexingType.QUALIFIED && (
-          <div className='mt-2 flex h-10 items-center gap-x-0.5 overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-2 shadow-xs backdrop-blur-[5px]'>
-            <div className='absolute bottom-0 left-0 right-0 top-0 bg-dataset-warning-message-bg opacity-40'></div>
+          <div className='mt-2 h-10 p-2 flex items-center gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border overflow-hidden bg-components-panel-bg-blur backdrop-blur-[5px] shadow-xs'>
+            <div className='absolute top-0 left-0 right-0 bottom-0 bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)] opacity-40'></div>
             <div className='p-1'>
               <AlertTriangle className='size-4 text-text-warning-secondary' />
             </div>
-            <span className='system-xs-medium text-text-primary'>{t('datasetCreation.stepTwo.highQualityTip')}</span>
+            <span className='system-xs-medium'>{t('datasetCreation.stepTwo.highQualityTip')}</span>
           </div>
         )}
         {hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
-          <div className='system-xs-medium mt-2'>
+          <div className='mt-2 system-xs-medium'>
             {t('datasetCreation.stepTwo.indexSettingTip')}
             <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
           </div>
@@ -936,7 +932,7 @@
         {/* Embedding model */}
         {indexType === IndexingType.QUALIFIED && (
           <div className='mt-5'>
-            <div className={cn('system-md-semibold mb-1 text-text-secondary', datasetId && 'flex items-center justify-between')}>{t('datasetSettings.form.embeddingModel')}</div>
+            <div className={cn('system-md-semibold mb-1', datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
             <ModelSelector
               readonly={isModelAndRetrievalConfigDisabled}
               triggerClassName={isModelAndRetrievalConfigDisabled ? 'opacity-50' : ''}
@@ -947,7 +943,7 @@
               }}
             />
             {isModelAndRetrievalConfigDisabled && (
-              <div className='system-xs-medium mt-2 text-text-tertiary'>
+              <div className='mt-2 system-xs-medium'>
                 {t('datasetCreation.stepTwo.indexSettingTip')}
                 <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
               </div>
@@ -960,7 +956,7 @@
           {!isModelAndRetrievalConfigDisabled
             ? (
               <div className={'mb-1'}>
-                <div className='system-md-semibold mb-0.5 text-text-secondary'>{t('datasetSettings.form.retrievalSetting.title')}</div>
+                <div className='system-md-semibold mb-0.5'>{t('datasetSettings.form.retrievalSetting.title')}</div>
                 <div className='body-xs-regular text-text-tertiary'>
                   <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
                   {t('datasetSettings.form.retrievalSetting.longDescription')}
@@ -968,7 +964,7 @@
               </div>
             )
             : (
-              <div className={cn('system-md-semibold mb-0.5 text-text-secondary', 'flex items-center justify-between')}>
+              <div className={cn('system-md-semibold mb-0.5', 'flex justify-between items-center')}>
                 <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
               </div>
             )}
@@ -996,22 +992,22 @@
 
         {!isSetting
           ? (
-            <div className='mt-8 flex items-center py-2'>
+            <div className='flex items-center mt-8 py-2'>
               <Button onClick={() => onStepChange && onStepChange(-1)}>
-                <RiArrowLeftLine className='mr-1 h-4 w-4' />
+                <RiArrowLeftLine className='w-4 h-4 mr-1' />
                 {t('datasetCreation.stepTwo.previousStep')}
               </Button>
               <Button className='ml-auto' loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
             </div>
           )
           : (
-            <div className='mt-8 flex items-center py-2'>
+            <div className='flex items-center mt-8 py-2'>
               <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
               <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
             </div>
           )}
       </div>
-      <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={noop} footer={null}>
+      <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={() => { }} footer={null}>
         <PreviewContainer
           header={<PreviewHeader
             title={t('datasetCreation.stepTwo.preview')}
@@ -1077,14 +1073,15 @@
               }
               {
                 currentDocForm !== ChunkingMode.qa
-                && <Badge text={t('datasetCreation.stepTwo.previewChunkCount', {
-                  count: estimate?.total_segments || 0,
-                }) as string}
+                && <Badge text={t(
+                  'datasetCreation.stepTwo.previewChunkCount', {
+                    count: estimate?.total_segments || 0,
+                  }) as string}
                 />
               }
             </div>
           </PreviewHeader>}
-          className={cn('relative flex h-full w-1/2 shrink-0 p-4 pr-0', isMobile && 'w-full max-w-[524px]')}
+          className={cn('flex shrink-0 w-1/2 p-4 pr-0 relative h-full', isMobile && 'w-full max-w-[524px]')}
           mainClassName='space-y-6'
         >
           {currentDocForm === ChunkingMode.qa && estimate?.qa_preview && (
@@ -1141,7 +1138,7 @@
             })
           )}
           {currentEstimateMutation.isIdle && (
-            <div className='flex h-full w-full items-center justify-center'>
+            <div className='h-full w-full flex items-center justify-center'>
               <div className='flex flex-col items-center justify-center gap-3'>
                 <RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
                 <p className='text-sm text-text-tertiary'>

--
Gitblit v1.8.0