Compare commits

...

5 Commits

Author SHA1 Message Date
jyong
9190cd47b9 remove token in Indexing estimate 2024-08-30 14:22:35 +08:00
JzoNg
a7d5cc5d72 add unit for input 2024-08-27 14:02:24 +08:00
jyong
7fbeb2585e add Knowledge embedding model setting when create knowledge 2024-08-27 10:10:29 +08:00
JzoNg
d5ed576df4 support embedding model setting in creation 2024-08-22 16:46:09 +08:00
JzoNg
84cf0587cd remove estimation of tokens & price 2024-08-21 15:40:33 +08:00
15 changed files with 114 additions and 170 deletions

1
api/.idea/vcs.xml generated
View File

@@ -12,5 +12,6 @@
</component>
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>

View File

@@ -302,6 +302,10 @@ class DatasetInitApi(Resource):
location='json')
parser.add_argument('retrieval_model', type=dict, required=False, nullable=False,
location='json')
parser.add_argument('embedding_model', type=str, required=False, nullable=True,
location='json')
parser.add_argument('embedding_model_provider', type=str, required=False, nullable=True,
location='json')
args = parser.parse_args()
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
@@ -309,6 +313,8 @@ class DatasetInitApi(Resource):
raise Forbidden()
if args['indexing_technique'] == 'high_quality':
if args['embedding_model'] is None or args['embedding_model_provider'] is None:
raise ValueError('embedding model and embedding model provider are required for high quality indexing.')
try:
model_manager = ModelManager()
model_manager.get_default_model_instance(
@@ -419,9 +425,6 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
batch = str(batch)
documents = self.get_batch_documents(dataset_id, batch)
response = {
"tokens": 0,
"total_price": 0,
"currency": "USD",
"total_segments": 0,
"preview": []
}

View File

@@ -255,11 +255,8 @@ class IndexingRunner:
tenant_id=tenant_id,
model_type=ModelType.TEXT_EMBEDDING,
)
tokens = 0
preview_texts = []
total_segments = 0
total_price = 0
currency = 'USD'
index_type = doc_form
index_processor = IndexProcessorFactory(index_type).init_index_processor()
all_text_docs = []
@@ -286,54 +283,22 @@ class IndexingRunner:
for document in documents:
if len(preview_texts) < 5:
preview_texts.append(document.page_content)
if indexing_technique == 'high_quality' or embedding_model_instance:
tokens += embedding_model_instance.get_text_embedding_num_tokens(
texts=[self.filter_string(document.page_content)]
)
if doc_form and doc_form == 'qa_model':
model_instance = self.model_manager.get_default_model_instance(
tenant_id=tenant_id,
model_type=ModelType.LLM
)
model_type_instance = model_instance.model_type_instance
model_type_instance = cast(LargeLanguageModel, model_type_instance)
if len(preview_texts) > 0:
# qa model document
response = LLMGenerator.generate_qa_document(current_user.current_tenant_id, preview_texts[0],
doc_language)
document_qa_list = self.format_split_text(response)
price_info = model_type_instance.get_price(
model=model_instance.model,
credentials=model_instance.credentials,
price_type=PriceType.INPUT,
tokens=total_segments * 2000,
)
return {
"total_segments": total_segments * 20,
"tokens": total_segments * 2000,
"total_price": '{:f}'.format(price_info.total_amount),
"currency": price_info.currency,
"qa_preview": document_qa_list,
"preview": preview_texts
}
if embedding_model_instance:
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_instance.model_type_instance)
embedding_price_info = embedding_model_type_instance.get_price(
model=embedding_model_instance.model,
credentials=embedding_model_instance.credentials,
price_type=PriceType.INPUT,
tokens=tokens
)
total_price = '{:f}'.format(embedding_price_info.total_amount)
currency = embedding_price_info.currency
return {
"total_segments": total_segments,
"tokens": tokens,
"total_price": total_price,
"currency": currency,
"preview": preview_texts
}

View File

@@ -12,7 +12,7 @@ from typing import (
Optional,
TypedDict,
TypeVar,
Union,
Union, List,
)
from core.rag.models.document import BaseDocumentTransformer, Document
@@ -109,7 +109,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
else:
return text
def _merge_splits(self, splits: Iterable[str], separator: str) -> list[str]:
def _merge_splits(self, splits: Iterable[str], separator: str, lengths: List[int]) -> list[str]:
# We now want to combine these smaller pieces into medium size
# chunks to send to the LLM.
separator_len = self._length_function(separator)
@@ -117,8 +117,9 @@ class TextSplitter(BaseDocumentTransformer, ABC):
docs = []
current_doc: list[str] = []
total = 0
index = 0
for d in splits:
_len = self._length_function(d)
_len = lengths[index]
if (
total + _len + (separator_len if len(current_doc) > 0 else 0)
> self._chunk_size
@@ -146,6 +147,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
current_doc = current_doc[1:]
current_doc.append(d)
total += _len + (separator_len if len(current_doc) > 1 else 0)
index += 1
doc = self._join_docs(current_doc, separator)
if doc is not None:
docs.append(doc)
@@ -494,11 +496,10 @@ class RecursiveCharacterTextSplitter(TextSplitter):
self._separators = separators or ["\n\n", "\n", " ", ""]
def _split_text(self, text: str, separators: list[str]) -> list[str]:
"""Split incoming text and return chunks."""
final_chunks = []
# Get appropriate separator to use
separator = separators[-1]
new_separators = []
for i, _s in enumerate(separators):
if _s == "":
separator = _s
@@ -509,25 +510,31 @@ class RecursiveCharacterTextSplitter(TextSplitter):
break
splits = _split_text_with_regex(text, separator, self._keep_separator)
# Now go merging things, recursively splitting longer texts.
_good_splits = []
_good_splits_lengths = [] # cache the lengths of the splits
_separator = "" if self._keep_separator else separator
for s in splits:
if self._length_function(s) < self._chunk_size:
s_len = self._length_function(s)
if s_len < self._chunk_size:
_good_splits.append(s)
_good_splits_lengths.append(s_len)
else:
if _good_splits:
merged_text = self._merge_splits(_good_splits, _separator)
merged_text = self._merge_splits(_good_splits, _separator, _good_splits_lengths)
final_chunks.extend(merged_text)
_good_splits = []
_good_splits_lengths = []
if not new_separators:
final_chunks.append(s)
else:
other_info = self._split_text(s, new_separators)
final_chunks.extend(other_info)
if _good_splits:
merged_text = self._merge_splits(_good_splits, _separator)
merged_text = self._merge_splits(_good_splits, _separator, _good_splits_lengths)
final_chunks.extend(merged_text)
return final_chunks
def split_text(self, text: str) -> list[str]:

View File

@@ -1067,18 +1067,12 @@ class DocumentService:
DocumentService.check_documents_upload_quota(count, features)
embedding_model = None
dataset_collection_binding_id = None
retrieval_model = None
if document_data['indexing_technique'] == 'high_quality':
model_manager = ModelManager()
embedding_model = model_manager.get_default_model_instance(
tenant_id=current_user.current_tenant_id,
model_type=ModelType.TEXT_EMBEDDING
)
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider,
embedding_model.model
document_data['embedding_model_provider'],
document_data['embedding_model']
)
dataset_collection_binding_id = dataset_collection_binding.id
if document_data.get('retrieval_model'):
@@ -1100,10 +1094,10 @@ class DocumentService:
tenant_id=tenant_id,
name='',
data_source_type=document_data["data_source"]["type"],
indexing_technique=document_data["indexing_technique"],
indexing_technique=document_data.get('indexing_technique', 'high_quality'),
created_by=account.id,
embedding_model=embedding_model.model if embedding_model else None,
embedding_model_provider=embedding_model.provider if embedding_model else None,
embedding_model=document_data.get('embedding_model', None),
embedding_model_provider=document_data.get('embedding_model_provider', None),
collection_binding_id=dataset_collection_binding_id,
retrieval_model=retrieval_model
)

View File

@@ -13,8 +13,7 @@ import cn from '@/utils/classnames'
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
import Button from '@/app/components/base/button'
import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets'
import { formatNumber } from '@/utils/format'
import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchIndexingEstimateBatch, fetchProcessRule } from '@/service/datasets'
import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets'
import { DataSourceType } from '@/models/datasets'
import NotionIcon from '@/app/components/base/notion-icon'
import PriorityLabel from '@/app/components/billing/priority-label'
@@ -142,14 +141,6 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
}, apiParams => fetchProcessRule(omit(apiParams, 'action')), {
revalidateOnFocus: false,
})
// get cost
const { data: indexingEstimateDetail } = useSWR({
action: 'fetchIndexingEstimateBatch',
datasetId,
batchId,
}, apiParams => fetchIndexingEstimateBatch(omit(apiParams, 'action')), {
revalidateOnFocus: false,
})
const router = useRouter()
const navToDocumentList = () => {
@@ -190,28 +181,11 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
return (
<>
<div className='h-5 flex justify-between items-center mb-5'>
<div className='h-5 flex items-center mb-5'>
<div className={s.embeddingStatus}>
{isEmbedding && t('datasetDocuments.embedding.processing')}
{isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
</div>
<div className={s.cost}>
{indexingType === 'high_quality' && (
<div className='flex items-center'>
<div className={cn(s.commonIcon, s.highIcon)} />
{t('datasetDocuments.embedding.highQuality')} · {t('datasetDocuments.embedding.estimate')}
<span className={s.tokens}>{formatNumber(indexingEstimateDetail?.tokens || 0)}</span>tokens
(<span className={s.price}>${formatNumber(indexingEstimateDetail?.total_price || 0)}</span>)
</div>
)}
{indexingType === 'economy' && (
<div className='flex items-center'>
<div className={cn(s.commonIcon, s.economyIcon)} />
{t('datasetDocuments.embedding.economy')} · {t('datasetDocuments.embedding.estimate')}
<span className={s.tokens}>0</span>tokens
</div>
)}
</div>
</div>
{
enableBilling && plan.type !== Plan.team && (

View File

@@ -30,7 +30,7 @@
}
.indexItem {
min-height: 146px;
min-height: 126px;
}
.indexItem .disableMask {
@@ -121,10 +121,6 @@
@apply pb-1;
}
.radioItem.indexItem .typeHeader .tip {
@apply pb-3;
}
.radioItem .typeIcon {
position: absolute;
top: 18px;
@@ -264,7 +260,7 @@
}
.input {
@apply inline-flex h-9 w-full py-1 px-2 rounded-lg text-xs leading-normal;
@apply inline-flex h-9 w-full py-1 px-2 pr-14 rounded-lg text-xs leading-normal;
@apply bg-gray-100 caret-primary-600 hover:bg-gray-100 focus:ring-1 focus:ring-inset focus:ring-gray-200 focus-visible:outline-none focus:bg-white placeholder:text-gray-400;
}

View File

@@ -16,7 +16,7 @@ import PreviewItem, { PreviewType } from './preview-item'
import LanguageSelect from './language-select'
import s from './index.module.css'
import cn from '@/utils/classnames'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
createDocument,
createFirstDocument,
@@ -44,8 +44,10 @@ import { RETRIEVE_METHOD } from '@/types/app'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import Tooltip from '@/app/components/base/tooltip'
import TooltipPlus from '@/app/components/base/tooltip-plus'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { LanguagesSupported } from '@/i18n/language'
import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
@@ -112,7 +114,7 @@ const StepTwo = ({
const [previewScrolled, setPreviewScrolled] = useState(false)
const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO)
const [segmentIdentifier, setSegmentIdentifier] = useState('\\n')
const [max, setMax] = useState(500)
const [max, setMax] = useState(5000) // default chunk length
const [overlap, setOverlap] = useState(50)
const [rules, setRules] = useState<PreProcessingRule[]>([])
const [defaultConfig, setDefaultConfig] = useState<Rules>()
@@ -132,7 +134,6 @@ const StepTwo = ({
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
const [estimateTokes, setEstimateTokes] = useState<Pick<IndexingEstimateResponse, 'tokens' | 'total_price'> | null>(null)
const fileIndexingEstimate = (() => {
return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate
@@ -193,13 +194,10 @@ const StepTwo = ({
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
// eslint-disable-next-line @typescript-eslint/no-use-before-define
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)!)
if (segmentationType === SegmentType.CUSTOM) {
if (segmentationType === SegmentType.CUSTOM)
setCustomFileIndexingEstimate(res)
}
else {
else
setAutomaticFileIndexingEstimate(res)
indexType === IndexingType.QUALIFIED && setEstimateTokes({ tokens: res.tokens, total_price: res.total_price })
}
}
const confirmChangeCustomConfig = () => {
@@ -311,6 +309,19 @@ const StepTwo = ({
defaultModel: rerankDefaultModel,
currentModel: isRerankDefaultModelVaild,
} = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
const { data: defaultEmbeddingModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
const [embeddingModel, setEmbeddingModel] = useState<DefaultModel>(
currentDataset?.embedding_model
? {
provider: currentDataset.embedding_model_provider,
model: currentDataset.embedding_model,
}
: {
provider: defaultEmbeddingModel?.provider.provider || '',
model: defaultEmbeddingModel?.model || '',
},
)
const getCreationParams = () => {
let params
if (segmentationType === SegmentType.CUSTOM && overlap > max) {
@@ -325,6 +336,8 @@ const StepTwo = ({
process_rule: getProcessRule(),
// eslint-disable-next-line @typescript-eslint/no-use-before-define
retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
embedding_model: embeddingModel.model, // Readonly
embedding_model_provider: embeddingModel.provider, // Readonly
} as CreateDocumentReq
}
else { // create
@@ -361,6 +374,8 @@ const StepTwo = ({
doc_language: docLanguage,
retrieval_model: postRetrievalConfig,
embedding_model: embeddingModel.model,
embedding_model_provider: embeddingModel.provider,
} as CreateDocumentReq
if (dataSourceType === DataSourceType.FILE) {
params.data_source.info_list.file_info_list = {
@@ -614,14 +629,17 @@ const StepTwo = ({
<div className={s.formRow}>
<div className='w-full'>
<div className={s.label}>{t('datasetCreation.stepTwo.maxLength')}</div>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
value={max}
min={1}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='relative w-full'>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
value={max}
min={1}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='absolute top-2.5 right-2.5 text-text-tertiary system-sm-regular'>Tokens</div>
</div>
</div>
</div>
<div className={s.formRow}>
@@ -636,14 +654,17 @@ const StepTwo = ({
<RiQuestionLine className='ml-1 w-3.5 h-3.5 text-gray-400' />
</TooltipPlus>
</div>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
value={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='relative w-full'>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
value={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='absolute top-2.5 right-2.5 text-text-tertiary system-sm-regular'>Tokens</div>
</div>
</div>
</div>
<div className={s.formRow}>
@@ -676,7 +697,7 @@ const StepTwo = ({
!isAPIKeySet && s.disabled,
!hasSetIndexType && indexType === IndexingType.QUALIFIED && s.active,
hasSetIndexType && s.disabled,
hasSetIndexType && '!w-full',
hasSetIndexType && '!w-full !min-h-[96px]',
)}
onClick={() => {
if (isAPIKeySet)
@@ -691,16 +712,6 @@ const StepTwo = ({
{!hasSetIndexType && <span className={s.recommendTag}>{t('datasetCreation.stepTwo.recommend')}</span>}
</div>
<div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div>
<div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
{
estimateTokes
? (
<div className='text-xs font-medium text-gray-800'>{formatNumber(estimateTokes.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(estimateTokes.total_price)}</span>)</div>
)
: (
<div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>
)
}
</div>
{!isAPIKeySet && (
<div className={s.warningTip}>
@@ -718,7 +729,7 @@ const StepTwo = ({
s.indexItem,
!hasSetIndexType && indexType === IndexingType.ECONOMICAL && s.active,
hasSetIndexType && s.disabled,
hasSetIndexType && '!w-full',
hasSetIndexType && '!w-full !min-h-[96px]',
)}
onClick={changeToEconomicalType}
>
@@ -727,13 +738,11 @@ const StepTwo = ({
<div className={s.typeHeader}>
<div className={s.title}>{t('datasetCreation.stepTwo.economical')}</div>
<div className={s.tip}>{t('datasetCreation.stepTwo.economicalTip')}</div>
<div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
<div className='text-xs font-medium text-gray-800'>0 tokens</div>
</div>
</div>
)}
</div>
{hasSetIndexType && (
{hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
<div className='mt-2 text-xs text-gray-500 font-medium'>
{t('datasetCreation.stepTwo.indexSettedTip')}
<Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
@@ -768,12 +777,32 @@ const StepTwo = ({
)}
</div>
)}
{/* Embedding model */}
{indexType === IndexingType.QUALIFIED && (
<div className='mb-2'>
<div className={cn(s.label, datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
<ModelSelector
readonly={!!datasetId}
defaultModel={embeddingModel}
modelList={embeddingModelList}
onSelect={(model: DefaultModel) => {
setEmbeddingModel(model)
}}
/>
{!!datasetId && (
<div className='mt-2 text-xs text-gray-500 font-medium'>
{t('datasetCreation.stepTwo.indexSettedTip')}
<Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
</div>
)}
</div>
)}
{/* Retrieval Method Config */}
<div>
{!datasetId
? (
<div className={s.label}>
{t('datasetSettings.form.retrievalSetting.title')}
<div className='shrink-0 mr-4'>{t('datasetSettings.form.retrievalSetting.title')}</div>
<div className='leading-[18px] text-xs font-normal text-gray-500'>
<a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-6-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
{t('datasetSettings.form.retrievalSetting.longDescription')}

View File

@@ -49,7 +49,7 @@ const StepsNavBar = ({
key={item}
className={cn(s.stepItem, s[`step${item}`], step === item && s.active, step > item && s.done, isMobile && 'px-0')}
>
<div className={cn(s.stepNum)}>{item}</div>
<div className={cn(s.stepNum)}>{step > item ? '' : item}</div>
<div className={cn(s.stepName)}>{isMobile ? '' : t(STEP_T_MAP[item])}</div>
</div>
))}

View File

@@ -18,9 +18,7 @@ import { ToastContext } from '@/app/components/base/toast'
import type { FullDocumentDetail, ProcessRuleResponse } from '@/models/datasets'
import type { CommonResponse } from '@/models/common'
import { asyncRunSafe, sleep } from '@/utils'
import { formatNumber } from '@/utils/format'
import { fetchIndexingStatus as doFetchIndexingStatus, fetchIndexingEstimate, fetchProcessRule, pauseDocIndexing, resumeDocIndexing } from '@/service/datasets'
import DatasetDetailContext from '@/context/dataset-detail'
import { fetchIndexingStatus as doFetchIndexingStatus, fetchProcessRule, pauseDocIndexing, resumeDocIndexing } from '@/service/datasets'
import StopEmbeddingModal from '@/app/components/datasets/create/stop-embedding-modal'
type Props = {
@@ -108,16 +106,14 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse; docName?: string }> = (
</div>
}
const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: dstId, documentId: docId, indexingType, detailUpdate }) => {
const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: dstId, documentId: docId, detailUpdate }) => {
const onTop = stopPosition === 'top'
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const { datasetId = '', documentId = '' } = useContext(DocumentContext)
const { indexingTechnique } = useContext(DatasetDetailContext)
const localDatasetId = dstId ?? datasetId
const localDocumentId = docId ?? documentId
const localIndexingTechnique = indexingType ?? indexingTechnique
const [indexingStatusDetail, setIndexingStatusDetail] = useState<any>(null)
const fetchIndexingStatus = async () => {
@@ -160,14 +156,6 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
}
}, [startQueryStatus, stopQueryStatus])
const { data: indexingEstimateDetail, error: indexingEstimateErr } = useSWR({
action: 'fetchIndexingEstimate',
datasetId: localDatasetId,
documentId: localDocumentId,
}, apiParams => fetchIndexingEstimate(omit(apiParams, 'action')), {
revalidateOnFocus: false,
})
const { data: ruleDetail, error: ruleError } = useSWR({
action: 'fetchProcessRule',
params: { documentId: localDocumentId },
@@ -250,21 +238,6 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
</div>
<div className={s.progressData}>
<div>{t('datasetDocuments.embedding.segments')} {indexingStatusDetail?.completed_segments}/{indexingStatusDetail?.total_segments} · {percent}%</div>
{localIndexingTechnique === 'high_quaility' && (
<div className='flex items-center'>
<div className={cn(s.commonIcon, s.highIcon)} />
{t('datasetDocuments.embedding.highQuality')} · {t('datasetDocuments.embedding.estimate')}
<span className={s.tokens}>{formatNumber(indexingEstimateDetail?.tokens || 0)}</span>tokens
(<span className={s.price}>${formatNumber(indexingEstimateDetail?.total_price || 0)}</span>)
</div>
)}
{localIndexingTechnique === 'economy' && (
<div className='flex items-center'>
<div className={cn(s.commonIcon, s.economyIcon)} />
{t('datasetDocuments.embedding.economy')} · {t('datasetDocuments.embedding.estimate')}
<span className={s.tokens}>0</span>tokens
</div>
)}
</div>
<RuleDetail sourceData={ruleDetail} docName={detail?.name} />
{!onTop && (

View File

@@ -31,7 +31,7 @@
@apply rounded-r-md;
}
.progressData {
@apply w-full flex justify-between items-center text-xs text-gray-700;
@apply w-full flex items-center text-xs text-gray-700;
}
.previewTip {
@apply pb-1 pt-12 text-gray-900 text-sm font-medium;

View File

@@ -86,7 +86,7 @@ const translation = {
autoDescription: 'Automatically set chunk and preprocessing rules. Unfamiliar users are recommended to select this.',
custom: 'Custom',
customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.',
separator: 'Segment identifier',
separator: 'Delimiter',
separatorPlaceholder: 'For example, newline (\\\\n) or special separator (such as "***")',
maxLength: 'Maximum chunk length',
overlap: 'Chunk overlap',
@@ -135,8 +135,8 @@ const translation = {
previewSwitchTipStart: 'The current chunk preview is in text format, switching to a question-and-answer format preview will',
previewSwitchTipEnd: ' consume additional tokens',
characters: 'characters',
indexSettedTip: 'To change the index method, please go to the ',
retrivalSettedTip: 'To change the index method, please go to the ',
indexSettedTip: 'To change the index method & embedding model, please go to the ',
retrivalSettedTip: 'To change the retrieval setting, please go to the ',
datasetSettingLink: 'Knowledge settings.',
},
stepThree: {

View File

@@ -136,7 +136,7 @@ const translation = {
previewSwitchTipEnd: ' 追加のトークンが消費されます',
characters: '文字',
indexSettedTip: 'インデックス方法を変更するには、',
retrivalSettedTip: 'インデックス方法を変更するには、',
retrivalSettedTip: '検索方法を変更するには、',
datasetSettingLink: 'ナレッジ設定',
},
stepThree: {

View File

@@ -135,7 +135,7 @@ const translation = {
previewSwitchTipStart: '当前分段预览是文本模式,切换到 Q&A 模式将会',
previewSwitchTipEnd: '消耗额外的 token',
characters: '字符',
indexSettedTip: '要更改索引方法,请转到',
indexSettedTip: '要更改索引方法和 embedding 模型,请转到',
retrivalSettedTip: '要更改检索方法,请转到',
datasetSettingLink: '知识库设置。',
},

View File

@@ -226,6 +226,8 @@ export type DocumentReq = {
export type CreateDocumentReq = DocumentReq & {
data_source: DataSource
retrieval_model: RetrievalConfig
embedding_model: string
embedding_model_provider: string
}
export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {