Custom Usage#
Custom Result Analysis#
This tool saves all data to an sqlite3 database during the testing period, including requests and responses. You can analyze the test data after the testing is complete.
import sqlite3
import base64
import pickle
import json
result_db_path = 'db_name.db'
con = sqlite3.connect(result_db_path)
query_sql = "SELECT request, response_messages, prompt_tokens, completion_tokens \
FROM result WHERE success='1'"
# how to save base64.b64encode(pickle.dumps(benchmark_data["request"])).decode("ascii"),
with con:
rows = con.execute(query_sql).fetchall()
if len(rows) > 0:
for row in rows:
request = row[0]
responses = row[1]
request = base64.b64decode(request)
request = pickle.loads(request)
responses = base64.b64decode(responses)
responses = pickle.loads(responses)
response_content = ''
for response in responses:
response = json.loads(response)
if not response['choices']:
continue
response_content += response['choices'][0]['delta']['content']
print('prompt: %s, tokens: %s, completion: %s, tokens: %s' %
(request['messages'][0]['content'], row[2], response_content,
row[3]))
Custom Request API#
The currently supported API request formats are openai
and dashscope
. To extend the API, you can inherit the ApiPluginBase
class and use @register_api("api_name")
for annotation, and you need to implement the following two methods:
The
build_request()
method constructs the request usingmessages
and themodel
andquery_template
fromparam
, which will be sent to the target API.The
parse_responses()
method returns the counts ofprompt_tokens
andcompletion_tokens
, which are used to calculate inference speed.
Refer to the code below:
from typing import Any, Dict, List, Tuple
from evalscope.perf.arguments import Arguments
@register_api('custom')
class CustomPlugin(ApiPluginBase):
def __init__(self, model_path: str) -> None:
self.model_path = model_path
@abstractmethod
def build_request(self, messages: List[Dict], param: Arguments) -> Dict:
"""Build an API request body.
Args:
messages (List[Dict]): The messages generated by the dataset.
param (QueryParameters): The query parameters.
Raises:
NotImplementedError: Not implemented.
Returns:
Dict: The API request body.
"""
raise NotImplementedError
@abstractmethod
def parse_responses(self,
responses: List,
request: Any=None,
**kwargs:Any) -> Tuple[int, int]:
"""Parse responses and return the number of request and response tokens.
Args:
responses (List[bytes]): List of HTTP response bodies; for stream output,
there are multiple responses, each in bytes; for general, only one.
request (Any): The request body.
Returns:
Tuple: (Number of prompt_tokens and number of completion_tokens).
"""
raise NotImplementedError
Custom Dataset#
To customize a dataset, you can inherit the DatasetPluginBase
class and use @register_dataset('dataset_name')
for annotation, then implement the build_messages
method to return a message, formatted according to the OpenAI API.
from typing import Dict, Iterator, List
from evalscope.perf.arguments import Arguments
from evalscope.perf.plugin.datasets.base import DatasetPluginBase
from evalscope.perf.plugin.registry import register_dataset
@register_dataset('custom')
class CustomDatasetPlugin(DatasetPluginBase):
"""Read dataset and return prompt.
"""
def __init__(self, query_parameters: Arguments):
super().__init__(query_parameters)
def build_messages(self) -> Iterator[List[Dict]]:
for item in self.dataset_line_by_line(self.query_parameters.dataset_path):
prompt = item.strip()
if len(prompt) > self.query_parameters.min_prompt_length and len(
prompt) < self.query_parameters.max_prompt_length:
yield [{'role': 'user', 'content': prompt}]