wwf
2 天以前 a430284aa21e3ae1f0d5654e55b2ad2852519cc2
app/components/datasets/create/step-two/index.tsx
@@ -37,7 +37,7 @@
import FloatRightContainer from '@/app/components/base/float-right-container'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
import type { RetrievalConfig } from '@/types/app'
import { type RetrievalConfig } from '@/types/app'
import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import Toast from '@/app/components/base/toast'
import type { NotionPage } from '@/models/common'
@@ -62,10 +62,9 @@
import CustomDialog from '@/app/components/base/dialog'
import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem'
import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback'
import { noop } from 'lodash-es'
const TextLabel: FC<PropsWithChildren> = (props) => {
  return <label className='system-sm-semibold text-text-secondary'>{props.children}</label>
  return <label className='text-text-secondary system-sm-semibold'>{props.children}</label>
}
type StepTwoProps = {
@@ -97,9 +96,9 @@
}
const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024
const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500
const DEFAULT_OVERLAP = 50
const MAXIMUM_CHUNK_TOKEN_LENGTH = Number.parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10)
const MAXIMUM_CHUNK_TOKEN_LENGTH = parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10)
type ParentChildConfig = {
  chunkForContext: ParentMode
@@ -117,11 +116,11 @@
  chunkForContext: 'paragraph',
  parent: {
    delimiter: '\\n\\n',
    maxLength: 1024,
    maxLength: 500,
  },
  child: {
    delimiter: '\\n',
    maxLength: 512,
    maxLength: 200,
  },
}
@@ -170,11 +169,12 @@
  const [rules, setRules] = useState<PreProcessingRule[]>([])
  const [defaultConfig, setDefaultConfig] = useState<Rules>()
  const hasSetIndexType = !!indexingType
  const [indexType, setIndexType] = useState<IndexingType>(() => {
    if (hasSetIndexType)
      return indexingType
    return isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL
  })
  const [indexType, setIndexType] = useState<IndexingType>(
    (indexingType
      || isAPIKeySet)
      ? IndexingType.QUALIFIED
      : IndexingType.ECONOMICAL,
  )
  const [previewFile, setPreviewFile] = useState<DocumentItem>(
    (datasetId && documentDetail)
@@ -206,12 +206,12 @@
    if (value === ChunkingMode.parentChild && indexType === IndexingType.ECONOMICAL)
      setIndexType(IndexingType.QUALIFIED)
    setDocForm(value)
    // eslint-disable-next-line ts/no-use-before-define
    // eslint-disable-next-line @typescript-eslint/no-use-before-define
    currentEstimateMutation.reset()
  }
  const [docLanguage, setDocLanguage] = useState<string>(
    (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese Simplified'),
    (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'),
  )
  const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
@@ -421,13 +421,6 @@
    }
    else { // create
      const indexMethod = getIndexing_technique()
      if (indexMethod === IndexingType.QUALIFIED && (!embeddingModel.model || !embeddingModel.provider)) {
        Toast.notify({
          type: 'error',
          message: t('appDebug.datasetConfig.embeddingModelRequired'),
        })
        return
      }
      if (
        !isReRankModelSelected({
          rerankModelList,
@@ -575,6 +568,7 @@
    // get indexing type by props
    if (indexingType)
      setIndexType(indexingType as IndexingType)
    else
      setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
  }, [isAPIKeySet, indexingType, datasetId])
@@ -585,14 +579,14 @@
  const isModelAndRetrievalConfigDisabled = !!datasetId && !!currentDataset?.data_source_type
  return (
    <div className='flex h-full w-full'>
      <div className={cn('relative h-full w-1/2 overflow-y-auto py-6', isMobile ? 'px-4' : 'px-12')}>
        <div className={'system-md-semibold mb-1 text-text-secondary'}>{t('datasetCreation.stepTwo.segmentation')}</div>
    <div className='flex w-full h-full'>
      <div className={cn('relative h-full w-1/2 py-6 overflow-y-auto', isMobile ? 'px-4' : 'px-12')}>
        <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.segmentation')}</div>
        {((isInUpload && [ChunkingMode.text, ChunkingMode.qa].includes(currentDataset!.doc_form))
          || isUploadInEmptyDataset
          || isInInit)
          && <OptionCard
            className='mb-2 bg-background-section'
            className='bg-background-section mb-2'
            title={t('datasetCreation.stepTwo.general')}
            icon={<Image width={20} height={20} src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />}
            activeHeaderClassName='bg-dataset-option-card-blue-gradient'
@@ -606,7 +600,7 @@
            actions={
              <>
                <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
                  <RiSearchEyeLine className='mr-0.5 h-4 w-4' />
                  <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
                  {t('datasetCreation.stepTwo.previewChunk')}
                </Button>
                <Button variant={'ghost'} onClick={resetRules}>
@@ -623,18 +617,18 @@
                  onChange={e => setSegmentIdentifier(e.target.value, true)}
                />
                <MaxLengthInput
                  unit='characters'
                  unit='tokens'
                  value={maxChunkLength}
                  onChange={setMaxChunkLength}
                />
                <OverlapInput
                  unit='characters'
                  unit='tokens'
                  value={overlap}
                  min={1}
                  onChange={setOverlap}
                />
              </div>
              <div className='flex w-full flex-col'>
              <div className='w-full flex flex-col'>
                <div className='flex items-center gap-x-2'>
                  <div className='inline-flex shrink-0'>
                    <TextLabel>{t('datasetCreation.stepTwo.rules')}</TextLabel>
@@ -649,7 +643,7 @@
                      <Checkbox
                        checked={rule.enabled}
                      />
                      <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
                      <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
                    </div>
                  ))}
                  {IS_CE_EDITION && <>
@@ -667,7 +661,7 @@
                          checked={currentDocForm === ChunkingMode.qa}
                          disabled={!!currentDataset?.doc_form}
                        />
                        <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">
                        <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">
                          {t('datasetCreation.stepTwo.useQALanguage')}
                        </label>
                      </div>
@@ -683,7 +677,7 @@
                        style={{
                          background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)',
                        }}
                        className='mt-2 flex h-10 items-center gap-2 rounded-xl border border-components-panel-border px-3 text-xs shadow-xs backdrop-blur-[5px]'
                        className='h-10 mt-2 flex items-center gap-2 rounded-xl backdrop-blur-[5px] border-components-panel-border border shadow-xs px-3 text-xs'
                      >
                        <RiAlertFill className='size-4 text-text-warning-secondary' />
                        <span className='system-xs-medium text-text-primary'>
@@ -713,7 +707,7 @@
            actions={
              <>
                <Button variant={'secondary-accent'} onClick={() => updatePreview()}>
                  <RiSearchEyeLine className='mr-0.5 h-4 w-4' />
                  <RiSearchEyeLine className='h-4 w-4 mr-0.5' />
                  {t('datasetCreation.stepTwo.previewChunk')}
                </Button>
                <Button variant={'ghost'} onClick={resetRules}>
@@ -756,7 +750,7 @@
                        })}
                      />
                      <MaxLengthInput
                        unit='characters'
                        unit='tokens'
                        value={parentChildConfig.parent.maxLength}
                        onChange={value => setParentChildConfig({
                          ...parentChildConfig,
@@ -790,7 +784,7 @@
                  </div>
                  <Divider className='grow' bgStyle='gradient' />
                </div>
                <div className='mt-1 flex gap-3'>
                <div className='flex gap-3 mt-1'>
                  <DelimiterInput
                    value={parentChildConfig.child.delimiter}
                    tooltip={t('datasetCreation.stepTwo.parentChildChunkDelimiterTip')!}
@@ -803,7 +797,7 @@
                    })}
                  />
                  <MaxLengthInput
                    unit='characters'
                    unit='tokens'
                    value={parentChildConfig.child.maxLength}
                    onChange={value => setParentChildConfig({
                      ...parentChildConfig,
@@ -830,7 +824,7 @@
                      <Checkbox
                        checked={rule.enabled}
                      />
                      <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
                      <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label>
                    </div>
                  ))}
                </div>
@@ -838,10 +832,10 @@
            </div>
          </OptionCard>}
        <Divider className='my-5' />
        <div className={'system-md-semibold mb-1 text-text-secondary'}>{t('datasetCreation.stepTwo.indexMode')}</div>
        <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.indexMode')}</div>
        <div className='flex items-center gap-2'>
          {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && (
            <OptionCard className='flex-1 self-stretch'
            <OptionCard className='flex-1'
              title={<div className='flex items-center'>
                {t('datasetCreation.stepTwo.qualified')}
                <Badge className={cn('ml-1 h-[18px]', (!hasSetIndexType && indexType === IndexingType.QUALIFIED) ? 'border-text-accent-secondary text-text-accent-secondary' : '')} uppercase>
@@ -854,9 +848,10 @@
              description={t('datasetCreation.stepTwo.qualifiedTip')}
              icon={<Image src={indexMethodIcon.high_quality} alt='' />}
              isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED}
              disabled={hasSetIndexType}
              disabled={!isAPIKeySet || hasSetIndexType}
              onSwitched={() => {
                setIndexType(IndexingType.QUALIFIED)
                if (isAPIKeySet)
                  setIndexType(IndexingType.QUALIFIED)
              }}
            />
          )}
@@ -864,11 +859,11 @@
          {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && (
            <>
              <CustomDialog show={isQAConfirmDialogOpen} onClose={() => setIsQAConfirmDialogOpen(false)} className='w-[432px]'>
                <header className='mb-4 pt-6'>
                <header className='pt-6 mb-4'>
                  <h2 className='text-lg font-semibold'>
                    {t('datasetCreation.stepTwo.qaSwitchHighQualityTipTitle')}
                  </h2>
                  <p className='mt-2 text-sm font-normal'>
                  <p className='font-normal text-sm mt-2'>
                    {t('datasetCreation.stepTwo.qaSwitchHighQualityTipContent')}
                  </p>
                </header>
@@ -894,20 +889,21 @@
                placement={'top'}
              >
                <PortalToFollowElemTrigger asChild>
                  <OptionCard className='flex-1 self-stretch'
                  <OptionCard className='flex-1'
                    title={t('datasetCreation.stepTwo.economical')}
                    description={t('datasetCreation.stepTwo.economicalTip')}
                    icon={<Image src={indexMethodIcon.economical} alt='' />}
                    isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL}
                    disabled={hasSetIndexType || docForm !== ChunkingMode.text}
                    disabled={!isAPIKeySet || hasSetIndexType || docForm !== ChunkingMode.text}
                    ref={economyDomRef}
                    onSwitched={() => {
                      setIndexType(IndexingType.ECONOMICAL)
                      if (isAPIKeySet && docForm === ChunkingMode.text)
                        setIndexType(IndexingType.ECONOMICAL)
                    }}
                  />
                </PortalToFollowElemTrigger>
                <PortalToFollowElemContent>
                  <div className='rounded-lg border-components-panel-border bg-components-tooltip-bg p-3 text-xs font-medium text-text-secondary shadow-lg'>
                  <div className='p-3 bg-components-tooltip-bg border-components-panel-border text-xs font-medium text-text-secondary rounded-lg shadow-lg'>
                    {
                      docForm === ChunkingMode.qa
                        ? t('datasetCreation.stepTwo.notAvailableForQA')
@@ -919,16 +915,16 @@
            </>)}
        </div>
        {!hasSetIndexType && indexType === IndexingType.QUALIFIED && (
          <div className='mt-2 flex h-10 items-center gap-x-0.5 overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-2 shadow-xs backdrop-blur-[5px]'>
            <div className='absolute bottom-0 left-0 right-0 top-0 bg-dataset-warning-message-bg opacity-40'></div>
          <div className='mt-2 h-10 p-2 flex items-center gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border overflow-hidden bg-components-panel-bg-blur backdrop-blur-[5px] shadow-xs'>
            <div className='absolute top-0 left-0 right-0 bottom-0 bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)] opacity-40'></div>
            <div className='p-1'>
              <AlertTriangle className='size-4 text-text-warning-secondary' />
            </div>
            <span className='system-xs-medium text-text-primary'>{t('datasetCreation.stepTwo.highQualityTip')}</span>
            <span className='system-xs-medium'>{t('datasetCreation.stepTwo.highQualityTip')}</span>
          </div>
        )}
        {hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
          <div className='system-xs-medium mt-2'>
          <div className='mt-2 system-xs-medium'>
            {t('datasetCreation.stepTwo.indexSettingTip')}
            <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
          </div>
@@ -936,7 +932,7 @@
        {/* Embedding model */}
        {indexType === IndexingType.QUALIFIED && (
          <div className='mt-5'>
            <div className={cn('system-md-semibold mb-1 text-text-secondary', datasetId && 'flex items-center justify-between')}>{t('datasetSettings.form.embeddingModel')}</div>
            <div className={cn('system-md-semibold mb-1', datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
            <ModelSelector
              readonly={isModelAndRetrievalConfigDisabled}
              triggerClassName={isModelAndRetrievalConfigDisabled ? 'opacity-50' : ''}
@@ -947,7 +943,7 @@
              }}
            />
            {isModelAndRetrievalConfigDisabled && (
              <div className='system-xs-medium mt-2 text-text-tertiary'>
              <div className='mt-2 system-xs-medium'>
                {t('datasetCreation.stepTwo.indexSettingTip')}
                <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
              </div>
@@ -960,7 +956,7 @@
          {!isModelAndRetrievalConfigDisabled
            ? (
              <div className={'mb-1'}>
                <div className='system-md-semibold mb-0.5 text-text-secondary'>{t('datasetSettings.form.retrievalSetting.title')}</div>
                <div className='system-md-semibold mb-0.5'>{t('datasetSettings.form.retrievalSetting.title')}</div>
                <div className='body-xs-regular text-text-tertiary'>
                  <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
                  {t('datasetSettings.form.retrievalSetting.longDescription')}
@@ -968,7 +964,7 @@
              </div>
            )
            : (
              <div className={cn('system-md-semibold mb-0.5 text-text-secondary', 'flex items-center justify-between')}>
              <div className={cn('system-md-semibold mb-0.5', 'flex justify-between items-center')}>
                <div>{t('datasetSettings.form.retrievalSetting.title')}</div>
              </div>
            )}
@@ -996,22 +992,22 @@
        {!isSetting
          ? (
            <div className='mt-8 flex items-center py-2'>
            <div className='flex items-center mt-8 py-2'>
              <Button onClick={() => onStepChange && onStepChange(-1)}>
                <RiArrowLeftLine className='mr-1 h-4 w-4' />
                <RiArrowLeftLine className='w-4 h-4 mr-1' />
                {t('datasetCreation.stepTwo.previousStep')}
              </Button>
              <Button className='ml-auto' loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
            </div>
          )
          : (
            <div className='mt-8 flex items-center py-2'>
            <div className='flex items-center mt-8 py-2'>
              <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
              <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
            </div>
          )}
      </div>
      <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={noop} footer={null}>
      <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={() => { }} footer={null}>
        <PreviewContainer
          header={<PreviewHeader
            title={t('datasetCreation.stepTwo.preview')}
@@ -1077,14 +1073,15 @@
              }
              {
                currentDocForm !== ChunkingMode.qa
                && <Badge text={t('datasetCreation.stepTwo.previewChunkCount', {
                  count: estimate?.total_segments || 0,
                }) as string}
                && <Badge text={t(
                  'datasetCreation.stepTwo.previewChunkCount', {
                    count: estimate?.total_segments || 0,
                  }) as string}
                />
              }
            </div>
          </PreviewHeader>}
          className={cn('relative flex h-full w-1/2 shrink-0 p-4 pr-0', isMobile && 'w-full max-w-[524px]')}
          className={cn('flex shrink-0 w-1/2 p-4 pr-0 relative h-full', isMobile && 'w-full max-w-[524px]')}
          mainClassName='space-y-6'
        >
          {currentDocForm === ChunkingMode.qa && estimate?.qa_preview && (
@@ -1141,7 +1138,7 @@
            })
          )}
          {currentEstimateMutation.isIdle && (
            <div className='flex h-full w-full items-center justify-center'>
            <div className='h-full w-full flex items-center justify-center'>
              <div className='flex flex-col items-center justify-center gap-3'>
                <RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
                <p className='text-sm text-text-tertiary'>