Compare commits

...

3 Commits

Author SHA1 Message Date
Yeuoly
2cda79699c feat: min temp 2024-01-23 18:29:48 +08:00
Yeuoly
f02d34cccb feat: xinference supports tool call and fill in max tokens 2024-01-21 13:51:50 +08:00
Yeuoly
56d2bdf73a fix: add max chunks to xinference 2024-01-21 12:47:22 +08:00
3 changed files with 48 additions and 10 deletions

View File

@@ -5,12 +5,12 @@ from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResu
from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessage, PromptMessageTool,
SystemPromptMessage, UserPromptMessage)
from core.model_runtime.entities.model_entities import (AIModelEntity, FetchFrom, ModelPropertyKey, ModelType,
ParameterRule, ParameterType)
ParameterRule, ParameterType, ModelFeature)
from core.model_runtime.errors.invoke import (InvokeAuthorizationError, InvokeBadRequestError, InvokeConnectionError,
InvokeError, InvokeRateLimitError, InvokeServerUnavailableError)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.model_providers.xinference.llm.xinference_helper import (XinferenceHelper,
from core.model_runtime.model_providers.xinference.xinference_helper import (XinferenceHelper,
XinferenceModelExtraParameter)
from core.model_runtime.utils import helper
from openai import (APIConnectionError, APITimeoutError, AuthenticationError, ConflictError, InternalServerError,
@@ -33,6 +33,12 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
see `core.model_runtime.model_providers.__base.large_language_model.LargeLanguageModel._invoke`
"""
if 'temperature' in model_parameters:
if model_parameters['temperature'] < 0.01:
model_parameters['temperature'] = 0.01
elif model_parameters['temperature'] > 1.0:
model_parameters['temperature'] = 0.99
return self._generate(
model=model, credentials=credentials, prompt_messages=prompt_messages, model_parameters=model_parameters,
tools=tools, stop=stop, stream=stream, user=user,
@@ -65,6 +71,9 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
credentials['completion_type'] = 'completion'
else:
raise ValueError(f'xinference model ability {extra_param.model_ability} is not supported')
if extra_param.support_function_call:
credentials['support_function_call'] = True
except RuntimeError as e:
raise CredentialsValidateFailedError(f'Xinference credentials validate failed: {e}')
@@ -237,7 +246,7 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
label=I18nObject(
zh_Hans='温度',
en_US='Temperature'
)
),
),
ParameterRule(
name='top_p',
@@ -282,6 +291,8 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
completion_type = LLMMode.COMPLETION.value
else:
raise ValueError(f'xinference model ability {extra_args.model_ability} is not supported')
support_function_call = credentials.get('support_function_call', False)
entity = AIModelEntity(
model=model,
@@ -290,6 +301,9 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
),
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=ModelType.LLM,
features=[
ModelFeature.TOOL_CALL
] if support_function_call else [],
model_properties={
ModelPropertyKey.MODE: completion_type,
},

View File

@@ -2,7 +2,7 @@ import time
from typing import Optional
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType, ModelPropertyKey
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (InvokeAuthorizationError, InvokeBadRequestError, InvokeConnectionError,
InvokeError, InvokeRateLimitError, InvokeServerUnavailableError)
@@ -10,6 +10,7 @@ from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle, RESTfulModelHandle
from core.model_runtime.model_providers.xinference.xinference_helper import XinferenceHelper
class XinferenceTextEmbeddingModel(TextEmbeddingModel):
"""
@@ -102,6 +103,13 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
:return:
"""
try:
server_url = credentials['server_url']
model_uid = credentials['model_uid']
extra_args = XinferenceHelper.get_xinference_extra_parameter(server_url=server_url, model_uid=model_uid)
if extra_args.max_tokens:
credentials['max_tokens'] = extra_args.max_tokens
self._invoke(model=model, credentials=credentials, texts=['ping'])
except InvokeAuthorizationError:
raise CredentialsValidateFailedError('Invalid api key')
@@ -160,6 +168,7 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
"""
used to define customizable model schema
"""
entity = AIModelEntity(
model=model,
label=I18nObject(
@@ -167,7 +176,10 @@ class XinferenceTextEmbeddingModel(TextEmbeddingModel):
),
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=ModelType.TEXT_EMBEDDING,
model_properties={},
model_properties={
ModelPropertyKey.MAX_CHUNKS: 1,
ModelPropertyKey.CONTEXT_SIZE: 'max_tokens' in credentials and credentials['max_tokens'] or 512,
},
parameter_rules=[]
)

View File

@@ -12,11 +12,16 @@ class XinferenceModelExtraParameter(object):
model_format: str
model_handle_type: str
model_ability: List[str]
max_tokens: int = 512
support_function_call: bool = False
def __init__(self, model_format: str, model_handle_type: str, model_ability: List[str]) -> None:
def __init__(self, model_format: str, model_handle_type: str, model_ability: List[str],
support_function_call: bool, max_tokens: int) -> None:
self.model_format = model_format
self.model_handle_type = model_handle_type
self.model_ability = model_ability
self.support_function_call = support_function_call
self.max_tokens = max_tokens
cache = {}
cache_lock = Lock()
@@ -66,10 +71,12 @@ class XinferenceHelper:
response_json = response.json()
model_format = response_json['model_format']
model_ability = response_json['model_ability']
model_format = response_json.get('model_format', 'ggmlv3')
model_ability = response_json.get('model_ability', [])
if model_format == 'ggmlv3' and 'chatglm' in response_json['model_name']:
if response_json.get('model_type') == 'embedding':
model_handle_type = 'embedding'
elif model_format == 'ggmlv3' and 'chatglm' in response_json['model_name']:
model_handle_type = 'chatglm'
elif 'generate' in model_ability:
model_handle_type = 'generate'
@@ -78,8 +85,13 @@ class XinferenceHelper:
else:
raise NotImplementedError(f'xinference model handle type {model_handle_type} is not supported')
support_function_call = 'tools' in model_ability
max_tokens = response_json.get('max_tokens', 512)
return XinferenceModelExtraParameter(
model_format=model_format,
model_handle_type=model_handle_type,
model_ability=model_ability
model_ability=model_ability,
support_function_call=support_function_call,
max_tokens=max_tokens
)