Custom Usage#

Custom Result Analysis#

This tool saves all data to an sqlite3 database during the testing period, including requests and responses. You can analyze the test data after the testing is complete.

import sqlite3
import base64
import pickle
import json
result_db_path = 'db_name.db'
con = sqlite3.connect(result_db_path)
query_sql = "SELECT request, response_messages, prompt_tokens, completion_tokens \
                FROM result WHERE success='1'"
# how to save base64.b64encode(pickle.dumps(benchmark_data["request"])).decode("ascii"), 
with con:
    rows = con.execute(query_sql).fetchall()
    if len(rows) > 0:
        for row in rows:
            request = row[0]
            responses = row[1]
            request = base64.b64decode(request)
            request = pickle.loads(request)
            responses = base64.b64decode(responses)
            responses = pickle.loads(responses)
            response_content = ''
            for response in responses:
                response = json.loads(response)
                if not response['choices']:
                   continue
                response_content += response['choices'][0]['delta']['content']
            print('prompt: %s, tokens: %s, completion: %s, tokens: %s' %
                  (request['messages'][0]['content'], row[2], response_content,
                   row[3]))

Custom Request API#

The currently supported API request formats are openai and dashscope. To extend the API, you can inherit the ApiPluginBase class and use @register_api("api_name") for annotation, and you need to implement the following two methods:

  • The build_request() method constructs the request using messages and the model and query_template from param, which will be sent to the target API.

  • The parse_responses() method returns the counts of prompt_tokens and completion_tokens, which are used to calculate inference speed.

Refer to the code below:

from typing import Any, Dict, List, Tuple
from evalscope.perf.arguments import Arguments

@register_api('custom')
class CustomPlugin(ApiPluginBase):
    def __init__(self, model_path: str) -> None:
        self.model_path = model_path
        
    @abstractmethod
    def build_request(self, messages: List[Dict], param: Arguments) -> Dict:
        """Build an API request body.

        Args:
            messages (List[Dict]): The messages generated by the dataset.
            param (QueryParameters): The query parameters.

        Raises:
            NotImplementedError: Not implemented.

        Returns:
            Dict: The API request body.
        """
        raise NotImplementedError
    
    @abstractmethod
    def parse_responses(self, 
                        responses: List, 
                        request: Any=None,
                        **kwargs:Any) -> Tuple[int, int]:
        """Parse responses and return the number of request and response tokens.

        Args:
            responses (List[bytes]): List of HTTP response bodies; for stream output,
                there are multiple responses, each in bytes; for general, only one.
            request (Any): The request body.

        Returns:
            Tuple: (Number of prompt_tokens and number of completion_tokens).
        """
        raise NotImplementedError  

Custom Dataset#

To customize a dataset, you can inherit the DatasetPluginBase class and use @register_dataset('dataset_name') for annotation, then implement the build_messages method to return a message, formatted according to the OpenAI API.

from typing import Dict, Iterator, List

from evalscope.perf.arguments import Arguments
from evalscope.perf.plugin.datasets.base import DatasetPluginBase
from evalscope.perf.plugin.registry import register_dataset


@register_dataset('custom')
class CustomDatasetPlugin(DatasetPluginBase):
    """Read dataset and return prompt.
    """

    def __init__(self, query_parameters: Arguments):
        super().__init__(query_parameters)

    def build_messages(self) -> Iterator[List[Dict]]:
        for item in self.dataset_line_by_line(self.query_parameters.dataset_path):
            prompt = item.strip()
            if len(prompt) > self.query_parameters.min_prompt_length and len(
                    prompt) < self.query_parameters.max_prompt_length:
                yield [{'role': 'user', 'content': prompt}]