| | |
| | | import FloatRightContainer from '@/app/components/base/float-right-container' |
| | | import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' |
| | | import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config' |
| | | import type { RetrievalConfig } from '@/types/app' |
| | | import { type RetrievalConfig } from '@/types/app' |
| | | import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model' |
| | | import Toast from '@/app/components/base/toast' |
| | | import type { NotionPage } from '@/models/common' |
| | |
| | | import CustomDialog from '@/app/components/base/dialog' |
| | | import { PortalToFollowElem, PortalToFollowElemContent, PortalToFollowElemTrigger } from '@/app/components/base/portal-to-follow-elem' |
| | | import { AlertTriangle } from '@/app/components/base/icons/src/vender/solid/alertsAndFeedback' |
| | | import { noop } from 'lodash-es' |
| | | |
| | | const TextLabel: FC<PropsWithChildren> = (props) => { |
| | | return <label className='system-sm-semibold text-text-secondary'>{props.children}</label> |
| | | return <label className='text-text-secondary system-sm-semibold'>{props.children}</label> |
| | | } |
| | | |
| | | type StepTwoProps = { |
| | |
| | | } |
| | | |
| | | const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' |
| | | const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024 |
| | | const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500 |
| | | const DEFAULT_OVERLAP = 50 |
| | | const MAXIMUM_CHUNK_TOKEN_LENGTH = Number.parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10) |
| | | const MAXIMUM_CHUNK_TOKEN_LENGTH = parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10) |
| | | |
| | | type ParentChildConfig = { |
| | | chunkForContext: ParentMode |
| | |
| | | chunkForContext: 'paragraph', |
| | | parent: { |
| | | delimiter: '\\n\\n', |
| | | maxLength: 1024, |
| | | maxLength: 500, |
| | | }, |
| | | child: { |
| | | delimiter: '\\n', |
| | | maxLength: 512, |
| | | maxLength: 200, |
| | | }, |
| | | } |
| | | |
| | |
| | | const [rules, setRules] = useState<PreProcessingRule[]>([]) |
| | | const [defaultConfig, setDefaultConfig] = useState<Rules>() |
| | | const hasSetIndexType = !!indexingType |
| | | const [indexType, setIndexType] = useState<IndexingType>(() => { |
| | | if (hasSetIndexType) |
| | | return indexingType |
| | | return isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL |
| | | }) |
| | | const [indexType, setIndexType] = useState<IndexingType>( |
| | | (indexingType |
| | | || isAPIKeySet) |
| | | ? IndexingType.QUALIFIED |
| | | : IndexingType.ECONOMICAL, |
| | | ) |
| | | |
| | | const [previewFile, setPreviewFile] = useState<DocumentItem>( |
| | | (datasetId && documentDetail) |
| | |
| | | if (value === ChunkingMode.parentChild && indexType === IndexingType.ECONOMICAL) |
| | | setIndexType(IndexingType.QUALIFIED) |
| | | setDocForm(value) |
| | | // eslint-disable-next-line ts/no-use-before-define |
| | | // eslint-disable-next-line @typescript-eslint/no-use-before-define |
| | | currentEstimateMutation.reset() |
| | | } |
| | | |
| | | const [docLanguage, setDocLanguage] = useState<string>( |
| | | (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese Simplified'), |
| | | (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'), |
| | | ) |
| | | |
| | | const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig) |
| | |
| | | } |
| | | else { // create |
| | | const indexMethod = getIndexing_technique() |
| | | if (indexMethod === IndexingType.QUALIFIED && (!embeddingModel.model || !embeddingModel.provider)) { |
| | | Toast.notify({ |
| | | type: 'error', |
| | | message: t('appDebug.datasetConfig.embeddingModelRequired'), |
| | | }) |
| | | return |
| | | } |
| | | if ( |
| | | !isReRankModelSelected({ |
| | | rerankModelList, |
| | |
| | | // get indexing type by props |
| | | if (indexingType) |
| | | setIndexType(indexingType as IndexingType) |
| | | |
| | | else |
| | | setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL) |
| | | }, [isAPIKeySet, indexingType, datasetId]) |
| | |
| | | const isModelAndRetrievalConfigDisabled = !!datasetId && !!currentDataset?.data_source_type |
| | | |
| | | return ( |
| | | <div className='flex h-full w-full'> |
| | | <div className={cn('relative h-full w-1/2 overflow-y-auto py-6', isMobile ? 'px-4' : 'px-12')}> |
| | | <div className={'system-md-semibold mb-1 text-text-secondary'}>{t('datasetCreation.stepTwo.segmentation')}</div> |
| | | <div className='flex w-full h-full'> |
| | | <div className={cn('relative h-full w-1/2 py-6 overflow-y-auto', isMobile ? 'px-4' : 'px-12')}> |
| | | <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.segmentation')}</div> |
| | | {((isInUpload && [ChunkingMode.text, ChunkingMode.qa].includes(currentDataset!.doc_form)) |
| | | || isUploadInEmptyDataset |
| | | || isInInit) |
| | | && <OptionCard |
| | | className='mb-2 bg-background-section' |
| | | className='bg-background-section mb-2' |
| | | title={t('datasetCreation.stepTwo.general')} |
| | | icon={<Image width={20} height={20} src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />} |
| | | activeHeaderClassName='bg-dataset-option-card-blue-gradient' |
| | |
| | | actions={ |
| | | <> |
| | | <Button variant={'secondary-accent'} onClick={() => updatePreview()}> |
| | | <RiSearchEyeLine className='mr-0.5 h-4 w-4' /> |
| | | <RiSearchEyeLine className='h-4 w-4 mr-0.5' /> |
| | | {t('datasetCreation.stepTwo.previewChunk')} |
| | | </Button> |
| | | <Button variant={'ghost'} onClick={resetRules}> |
| | |
| | | onChange={e => setSegmentIdentifier(e.target.value, true)} |
| | | /> |
| | | <MaxLengthInput |
| | | unit='characters' |
| | | unit='tokens' |
| | | value={maxChunkLength} |
| | | onChange={setMaxChunkLength} |
| | | /> |
| | | <OverlapInput |
| | | unit='characters' |
| | | unit='tokens' |
| | | value={overlap} |
| | | min={1} |
| | | onChange={setOverlap} |
| | | /> |
| | | </div> |
| | | <div className='flex w-full flex-col'> |
| | | <div className='w-full flex flex-col'> |
| | | <div className='flex items-center gap-x-2'> |
| | | <div className='inline-flex shrink-0'> |
| | | <TextLabel>{t('datasetCreation.stepTwo.rules')}</TextLabel> |
| | |
| | | <Checkbox |
| | | checked={rule.enabled} |
| | | /> |
| | | <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label> |
| | | <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label> |
| | | </div> |
| | | ))} |
| | | {IS_CE_EDITION && <> |
| | |
| | | checked={currentDocForm === ChunkingMode.qa} |
| | | disabled={!!currentDataset?.doc_form} |
| | | /> |
| | | <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary"> |
| | | <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary"> |
| | | {t('datasetCreation.stepTwo.useQALanguage')} |
| | | </label> |
| | | </div> |
| | |
| | | style={{ |
| | | background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)', |
| | | }} |
| | | className='mt-2 flex h-10 items-center gap-2 rounded-xl border border-components-panel-border px-3 text-xs shadow-xs backdrop-blur-[5px]' |
| | | className='h-10 mt-2 flex items-center gap-2 rounded-xl backdrop-blur-[5px] border-components-panel-border border shadow-xs px-3 text-xs' |
| | | > |
| | | <RiAlertFill className='size-4 text-text-warning-secondary' /> |
| | | <span className='system-xs-medium text-text-primary'> |
| | |
| | | actions={ |
| | | <> |
| | | <Button variant={'secondary-accent'} onClick={() => updatePreview()}> |
| | | <RiSearchEyeLine className='mr-0.5 h-4 w-4' /> |
| | | <RiSearchEyeLine className='h-4 w-4 mr-0.5' /> |
| | | {t('datasetCreation.stepTwo.previewChunk')} |
| | | </Button> |
| | | <Button variant={'ghost'} onClick={resetRules}> |
| | |
| | | })} |
| | | /> |
| | | <MaxLengthInput |
| | | unit='characters' |
| | | unit='tokens' |
| | | value={parentChildConfig.parent.maxLength} |
| | | onChange={value => setParentChildConfig({ |
| | | ...parentChildConfig, |
| | |
| | | </div> |
| | | <Divider className='grow' bgStyle='gradient' /> |
| | | </div> |
| | | <div className='mt-1 flex gap-3'> |
| | | <div className='flex gap-3 mt-1'> |
| | | <DelimiterInput |
| | | value={parentChildConfig.child.delimiter} |
| | | tooltip={t('datasetCreation.stepTwo.parentChildChunkDelimiterTip')!} |
| | |
| | | })} |
| | | /> |
| | | <MaxLengthInput |
| | | unit='characters' |
| | | unit='tokens' |
| | | value={parentChildConfig.child.maxLength} |
| | | onChange={value => setParentChildConfig({ |
| | | ...parentChildConfig, |
| | |
| | | <Checkbox |
| | | checked={rule.enabled} |
| | | /> |
| | | <label className="system-sm-regular ml-2 cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label> |
| | | <label className="ml-2 system-sm-regular cursor-pointer text-text-secondary">{getRuleName(rule.id)}</label> |
| | | </div> |
| | | ))} |
| | | </div> |
| | |
| | | </div> |
| | | </OptionCard>} |
| | | <Divider className='my-5' /> |
| | | <div className={'system-md-semibold mb-1 text-text-secondary'}>{t('datasetCreation.stepTwo.indexMode')}</div> |
| | | <div className={'system-md-semibold mb-1'}>{t('datasetCreation.stepTwo.indexMode')}</div> |
| | | <div className='flex items-center gap-2'> |
| | | {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.QUALIFIED)) && ( |
| | | <OptionCard className='flex-1 self-stretch' |
| | | <OptionCard className='flex-1' |
| | | title={<div className='flex items-center'> |
| | | {t('datasetCreation.stepTwo.qualified')} |
| | | <Badge className={cn('ml-1 h-[18px]', (!hasSetIndexType && indexType === IndexingType.QUALIFIED) ? 'border-text-accent-secondary text-text-accent-secondary' : '')} uppercase> |
| | |
| | | description={t('datasetCreation.stepTwo.qualifiedTip')} |
| | | icon={<Image src={indexMethodIcon.high_quality} alt='' />} |
| | | isActive={!hasSetIndexType && indexType === IndexingType.QUALIFIED} |
| | | disabled={hasSetIndexType} |
| | | disabled={!isAPIKeySet || hasSetIndexType} |
| | | onSwitched={() => { |
| | | setIndexType(IndexingType.QUALIFIED) |
| | | if (isAPIKeySet) |
| | | setIndexType(IndexingType.QUALIFIED) |
| | | }} |
| | | /> |
| | | )} |
| | |
| | | {(!hasSetIndexType || (hasSetIndexType && indexingType === IndexingType.ECONOMICAL)) && ( |
| | | <> |
| | | <CustomDialog show={isQAConfirmDialogOpen} onClose={() => setIsQAConfirmDialogOpen(false)} className='w-[432px]'> |
| | | <header className='mb-4 pt-6'> |
| | | <header className='pt-6 mb-4'> |
| | | <h2 className='text-lg font-semibold'> |
| | | {t('datasetCreation.stepTwo.qaSwitchHighQualityTipTitle')} |
| | | </h2> |
| | | <p className='mt-2 text-sm font-normal'> |
| | | <p className='font-normal text-sm mt-2'> |
| | | {t('datasetCreation.stepTwo.qaSwitchHighQualityTipContent')} |
| | | </p> |
| | | </header> |
| | |
| | | placement={'top'} |
| | | > |
| | | <PortalToFollowElemTrigger asChild> |
| | | <OptionCard className='flex-1 self-stretch' |
| | | <OptionCard className='flex-1' |
| | | title={t('datasetCreation.stepTwo.economical')} |
| | | description={t('datasetCreation.stepTwo.economicalTip')} |
| | | icon={<Image src={indexMethodIcon.economical} alt='' />} |
| | | isActive={!hasSetIndexType && indexType === IndexingType.ECONOMICAL} |
| | | disabled={hasSetIndexType || docForm !== ChunkingMode.text} |
| | | disabled={!isAPIKeySet || hasSetIndexType || docForm !== ChunkingMode.text} |
| | | ref={economyDomRef} |
| | | onSwitched={() => { |
| | | setIndexType(IndexingType.ECONOMICAL) |
| | | if (isAPIKeySet && docForm === ChunkingMode.text) |
| | | setIndexType(IndexingType.ECONOMICAL) |
| | | }} |
| | | /> |
| | | </PortalToFollowElemTrigger> |
| | | <PortalToFollowElemContent> |
| | | <div className='rounded-lg border-components-panel-border bg-components-tooltip-bg p-3 text-xs font-medium text-text-secondary shadow-lg'> |
| | | <div className='p-3 bg-components-tooltip-bg border-components-panel-border text-xs font-medium text-text-secondary rounded-lg shadow-lg'> |
| | | { |
| | | docForm === ChunkingMode.qa |
| | | ? t('datasetCreation.stepTwo.notAvailableForQA') |
| | |
| | | </>)} |
| | | </div> |
| | | {!hasSetIndexType && indexType === IndexingType.QUALIFIED && ( |
| | | <div className='mt-2 flex h-10 items-center gap-x-0.5 overflow-hidden rounded-xl border-[0.5px] border-components-panel-border bg-components-panel-bg-blur p-2 shadow-xs backdrop-blur-[5px]'> |
| | | <div className='absolute bottom-0 left-0 right-0 top-0 bg-dataset-warning-message-bg opacity-40'></div> |
| | | <div className='mt-2 h-10 p-2 flex items-center gap-x-0.5 rounded-xl border-[0.5px] border-components-panel-border overflow-hidden bg-components-panel-bg-blur backdrop-blur-[5px] shadow-xs'> |
| | | <div className='absolute top-0 left-0 right-0 bottom-0 bg-[linear-gradient(92deg,rgba(247,144,9,0.25)_0%,rgba(255,255,255,0.00)_100%)] opacity-40'></div> |
| | | <div className='p-1'> |
| | | <AlertTriangle className='size-4 text-text-warning-secondary' /> |
| | | </div> |
| | | <span className='system-xs-medium text-text-primary'>{t('datasetCreation.stepTwo.highQualityTip')}</span> |
| | | <span className='system-xs-medium'>{t('datasetCreation.stepTwo.highQualityTip')}</span> |
| | | </div> |
| | | )} |
| | | {hasSetIndexType && indexType === IndexingType.ECONOMICAL && ( |
| | | <div className='system-xs-medium mt-2'> |
| | | <div className='mt-2 system-xs-medium'> |
| | | {t('datasetCreation.stepTwo.indexSettingTip')} |
| | | <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link> |
| | | </div> |
| | |
| | | {/* Embedding model */} |
| | | {indexType === IndexingType.QUALIFIED && ( |
| | | <div className='mt-5'> |
| | | <div className={cn('system-md-semibold mb-1 text-text-secondary', datasetId && 'flex items-center justify-between')}>{t('datasetSettings.form.embeddingModel')}</div> |
| | | <div className={cn('system-md-semibold mb-1', datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div> |
| | | <ModelSelector |
| | | readonly={isModelAndRetrievalConfigDisabled} |
| | | triggerClassName={isModelAndRetrievalConfigDisabled ? 'opacity-50' : ''} |
| | |
| | | }} |
| | | /> |
| | | {isModelAndRetrievalConfigDisabled && ( |
| | | <div className='system-xs-medium mt-2 text-text-tertiary'> |
| | | <div className='mt-2 system-xs-medium'> |
| | | {t('datasetCreation.stepTwo.indexSettingTip')} |
| | | <Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link> |
| | | </div> |
| | |
| | | {!isModelAndRetrievalConfigDisabled |
| | | ? ( |
| | | <div className={'mb-1'}> |
| | | <div className='system-md-semibold mb-0.5 text-text-secondary'>{t('datasetSettings.form.retrievalSetting.title')}</div> |
| | | <div className='system-md-semibold mb-0.5'>{t('datasetSettings.form.retrievalSetting.title')}</div> |
| | | <div className='body-xs-regular text-text-tertiary'> |
| | | <a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a> |
| | | {t('datasetSettings.form.retrievalSetting.longDescription')} |
| | |
| | | </div> |
| | | ) |
| | | : ( |
| | | <div className={cn('system-md-semibold mb-0.5 text-text-secondary', 'flex items-center justify-between')}> |
| | | <div className={cn('system-md-semibold mb-0.5', 'flex justify-between items-center')}> |
| | | <div>{t('datasetSettings.form.retrievalSetting.title')}</div> |
| | | </div> |
| | | )} |
| | |
| | | |
| | | {!isSetting |
| | | ? ( |
| | | <div className='mt-8 flex items-center py-2'> |
| | | <div className='flex items-center mt-8 py-2'> |
| | | <Button onClick={() => onStepChange && onStepChange(-1)}> |
| | | <RiArrowLeftLine className='mr-1 h-4 w-4' /> |
| | | <RiArrowLeftLine className='w-4 h-4 mr-1' /> |
| | | {t('datasetCreation.stepTwo.previousStep')} |
| | | </Button> |
| | | <Button className='ml-auto' loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button> |
| | | </div> |
| | | ) |
| | | : ( |
| | | <div className='mt-8 flex items-center py-2'> |
| | | <div className='flex items-center mt-8 py-2'> |
| | | <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button> |
| | | <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button> |
| | | </div> |
| | | )} |
| | | </div> |
| | | <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={noop} footer={null}> |
| | | <FloatRightContainer isMobile={isMobile} isOpen={true} onClose={() => { }} footer={null}> |
| | | <PreviewContainer |
| | | header={<PreviewHeader |
| | | title={t('datasetCreation.stepTwo.preview')} |
| | |
| | | } |
| | | { |
| | | currentDocForm !== ChunkingMode.qa |
| | | && <Badge text={t('datasetCreation.stepTwo.previewChunkCount', { |
| | | count: estimate?.total_segments || 0, |
| | | }) as string} |
| | | && <Badge text={t( |
| | | 'datasetCreation.stepTwo.previewChunkCount', { |
| | | count: estimate?.total_segments || 0, |
| | | }) as string} |
| | | /> |
| | | } |
| | | </div> |
| | | </PreviewHeader>} |
| | | className={cn('relative flex h-full w-1/2 shrink-0 p-4 pr-0', isMobile && 'w-full max-w-[524px]')} |
| | | className={cn('flex shrink-0 w-1/2 p-4 pr-0 relative h-full', isMobile && 'w-full max-w-[524px]')} |
| | | mainClassName='space-y-6' |
| | | > |
| | | {currentDocForm === ChunkingMode.qa && estimate?.qa_preview && ( |
| | |
| | | }) |
| | | )} |
| | | {currentEstimateMutation.isIdle && ( |
| | | <div className='flex h-full w-full items-center justify-center'> |
| | | <div className='h-full w-full flex items-center justify-center'> |
| | | <div className='flex flex-col items-center justify-center gap-3'> |
| | | <RiSearchEyeLine className='size-10 text-text-empty-state-icon' /> |
| | | <p className='text-sm text-text-tertiary'> |