Spaces:

Severian
/

dify

Paused

App Files Files Community

dify / api /tests /integration_tests /model_runtime /gpustack /test_llm.py

Severian

initial commit

a8b3f00 about 1 year ago

raw

history blame contribute delete

5.04 kB

	import os
	from collections.abc import Generator

	import pytest

	from core.model_runtime.entities.llm_entities import (
	LLMResult,
	LLMResultChunk,
	LLMResultChunkDelta,
	)
	from core.model_runtime.entities.message_entities import (
	AssistantPromptMessage,
	PromptMessageTool,
	SystemPromptMessage,
	UserPromptMessage,
	)
	from core.model_runtime.errors.validate import CredentialsValidateFailedError
	from core.model_runtime.model_providers.gpustack.llm.llm import GPUStackLanguageModel


	def test_validate_credentials_for_chat_model():
	model = GPUStackLanguageModel()

	with pytest.raises(CredentialsValidateFailedError):
	model.validate_credentials(
	model="llama-3.2-1b-instruct",
	credentials={
	"endpoint_url": "invalid_url",
	"api_key": "invalid_api_key",
	"mode": "chat",
	},
	)

	model.validate_credentials(
	model="llama-3.2-1b-instruct",
	credentials={
	"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
	"api_key": os.environ.get("GPUSTACK_API_KEY"),
	"mode": "chat",
	},
	)


	def test_invoke_completion_model():
	model = GPUStackLanguageModel()

	response = model.invoke(
	model="llama-3.2-1b-instruct",
	credentials={
	"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
	"api_key": os.environ.get("GPUSTACK_API_KEY"),
	"mode": "completion",
	},
	prompt_messages=[UserPromptMessage(content="ping")],
	model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
	stop=[],
	user="abc-123",
	stream=False,
	)

	assert isinstance(response, LLMResult)
	assert len(response.message.content) > 0
	assert response.usage.total_tokens > 0


	def test_invoke_chat_model():
	model = GPUStackLanguageModel()

	response = model.invoke(
	model="llama-3.2-1b-instruct",
	credentials={
	"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
	"api_key": os.environ.get("GPUSTACK_API_KEY"),
	"mode": "chat",
	},
	prompt_messages=[UserPromptMessage(content="ping")],
	model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
	stop=[],
	user="abc-123",
	stream=False,
	)

	assert isinstance(response, LLMResult)
	assert len(response.message.content) > 0
	assert response.usage.total_tokens > 0


	def test_invoke_stream_chat_model():
	model = GPUStackLanguageModel()

	response = model.invoke(
	model="llama-3.2-1b-instruct",
	credentials={
	"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
	"api_key": os.environ.get("GPUSTACK_API_KEY"),
	"mode": "chat",
	},
	prompt_messages=[UserPromptMessage(content="Hello World!")],
	model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
	stop=["you"],
	stream=True,
	user="abc-123",
	)

	assert isinstance(response, Generator)
	for chunk in response:
	assert isinstance(chunk, LLMResultChunk)
	assert isinstance(chunk.delta, LLMResultChunkDelta)
	assert isinstance(chunk.delta.message, AssistantPromptMessage)
	assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True


	def test_get_num_tokens():
	model = GPUStackLanguageModel()

	num_tokens = model.get_num_tokens(
	model="????",
	credentials={
	"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
	"api_key": os.environ.get("GPUSTACK_API_KEY"),
	"mode": "chat",
	},
	prompt_messages=[
	SystemPromptMessage(
	content="You are a helpful AI assistant.",
	),
	UserPromptMessage(content="Hello World!"),
	],
	tools=[
	PromptMessageTool(
	name="get_current_weather",
	description="Get the current weather in a given location",
	parameters={
	"type": "object",
	"properties": {
	"location": {
	"type": "string",
	"description": "The city and state e.g. San Francisco, CA",
	},
	"unit": {"type": "string", "enum": ["c", "f"]},
	},
	"required": ["location"],
	},
	)
	],
	)

	assert isinstance(num_tokens, int)
	assert num_tokens == 80

	num_tokens = model.get_num_tokens(
	model="????",
	credentials={
	"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
	"api_key": os.environ.get("GPUSTACK_API_KEY"),
	"mode": "chat",
	},
	prompt_messages=[UserPromptMessage(content="Hello World!")],
	)

	assert isinstance(num_tokens, int)
	assert num_tokens == 10