diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 00000000..ffd3ee7b --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1 @@ +This folder contains resources to run performance benchmarks. Pls follow the benchmark guide here https://gateway-api-inference-extension.sigs.k8s.io/performance/benchmark. \ No newline at end of file diff --git a/benchmark/benchmark.ipynb b/benchmark/benchmark.ipynb new file mode 100644 index 00000000..993279cb --- /dev/null +++ b/benchmark/benchmark.ipynb @@ -0,0 +1,358 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "executionInfo": { + "elapsed": 391, + "status": "ok", + "timestamp": 1741734317446, + "user": { + "displayName": "Cong Liu", + "userId": "18222691451061354557" + }, + "user_tz": 420 + }, + "id": "ziJD5zt0c1Rt" + }, + "outputs": [], + "source": [ + "#@title Configuration. Edit this before running the rest.\n", + "\n", + "OUTPUT_DIR='output'\n", + "RUN_ID='example-run'\n", + "# Path to the benchmark dir under `gateway-api-inference-extension/benchmark`\n", + "BENCHMARK_DIR =\"./\"\n", + "# A regex to match the model name, which matches the output file name.\n", + "MODEL_MATCHER='.*llama.*'" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "executionInfo": { + "elapsed": 33, + "status": "ok", + "timestamp": 1741735749209, + "user": { + "displayName": "Cong Liu", + "userId": "18222691451061354557" + }, + "user_tz": 420 + }, + "id": "dB7xALgLawN-" + }, + "outputs": [], + "source": [ + "#@title Plot Helper\n", + "import os\n", + "import pandas as pd\n", + "import re\n", + "import json\n", + "from collections import OrderedDict\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import math\n", + "import logging\n", + "level = logging.INFO\n", + "logger = logging.getLogger(__name__)\n", + "logger.setLevel(level)\n", + "handler = logging.StreamHandler() # This sends output to the console\n", + "handler.setLevel(level) # Set handler level\n", + "logger.addHandler(handler)\n", + "\n", + "title_fontsize = 18\n", + "axis_label_fontsize = 18\n", + "legend_fontsize = 16\n", + "tick_label_fontsize = 14\n", + "\n", + "# Encapsulates some basic information needed to plot metrics.\n", + "class XY:\n", + " def __init__(self, x: str, y: str, x_label=None, y_label=None):\n", + " self.x = x\n", + " self.y = y\n", + " self.x_label = x if x_label is None else x_label\n", + " self.y_label = y if y_label is None else y_label\n", + "\n", + "NUM_PLOTS_PER_ROW = 4\n", + "# The arguments need to match the metric name fields generated by the benchmark tool.\n", + "CORE_METRICS = [\n", + " XY(x = 'request_rate', x_label = 'QPS', y = 'output_tokens_per_min'),\n", + " XY(x = \"request_rate\", x_label = 'QPS', y = \"p90_per_output_token_latency\"),\n", + " XY(x = \"request_rate\", x_label = 'QPS', y = \"p90_latency\"),\n", + "]\n", + "SANITY_CHECK_METRICS = [\n", + " XY(x = 'request_rate', x_label = 'QPS', y = 'benchmark_time'),\n", + " XY(x = \"request_rate\", x_label = 'QPS', y=\"num_prompts_attempted\"),\n", + " XY(x = \"request_rate\", x_label = 'QPS', y=\"num_prompts_succeeded\"),\n", + " XY(x = 'request_rate', x_label = 'QPS', y = 'throughput_rps'),\n", + " XY(x = 'request_rate', x_label = 'QPS', y = 'total_input_tokens'),\n", + " XY(x = 'request_rate', x_label = 'QPS', y = 'total_output_token'),\n", + " XY(x = 'request_rate', x_label = 'QPS', y = 'avg_input_len'),\n", + " XY(x = 'request_rate', x_label = 'QPS', y = 'avg_output_len'),\n", + "]\n", + "\n", + "class Label:\n", + " def __init__(self, name, alias=None):\n", + " self.name = name\n", + " self.alias = name if alias is None else alias\n", + "\n", + "ALL_METRICS = CORE_METRICS + SANITY_CHECK_METRICS\n", + "\n", + "class Plotter:\n", + " def __init__(self, run_id, labels=None, metrics=CORE_METRICS, num_plots_per_row=5, interactive=False, annotate=False, output_dir=OUTPUT_DIR):\n", + " self.run_id = run_id\n", + " self.labels = labels\n", + " self.metrics = metrics\n", + " self.num_plots_per_row = num_plots_per_row\n", + " self.interactive = interactive\n", + " self.annotate = annotate\n", + " self.output_dir = output_dir\n", + "\n", + " def withRunId(self, run_id):\n", + " return Plotter(run_id, self.labels, self.metrics, self.num_plots_per_row, self.interactive, self.annotate, self.output_dir)\n", + "\n", + " def withLabels(self, labels):\n", + " return Plotter(self.run_id, labels, self.metrics, self.num_plots_per_row, self.interactive, self.annotate, self.output_dir)\n", + "\n", + " def withMetrics(self, metrics):\n", + " return Plotter(self.run_id, self.labels, metrics, self.num_plots_per_row, self.interactive, self.annotate, self.output_dir)\n", + "\n", + " def withOutputDir(self, output_dir):\n", + " return Plotter(self.run_id, self.labels, self.metrics, self.num_plots_per_row, self.interactive, self.annotate, output_dir)\n", + "\n", + " def plot_bar(self):\n", + " data = load_data(self.labels, self.run_id, self.output_dir)\n", + " groups = group_data(data, self.metrics)\n", + " logger.debug(\"Plotting run id...\")\n", + " plot_bar(self.labels, groups, self.metrics, self.num_plots_per_row, self.interactive, annotate=self.annotate)\n", + "\n", + "def filepaths(root_dir):\n", + " \"\"\"\n", + " Recursively reads files within a directory and returns a list of file paths.\n", + " \"\"\"\n", + "\n", + " filepaths = []\n", + " for dirpath, dirnames, filenames in os.walk(root_dir):\n", + " for filename in filenames:\n", + " filepath = os.path.join(dirpath, filename)\n", + " filepaths.append(filepath)\n", + " return filepaths\n", + "\n", + "def flatten_server_metrics(server_metrics):\n", + " \"\"\"\n", + " Flattens the server metrics json to a single level.\n", + " \"\"\"\n", + " flattend = {}\n", + " for k, v in server_metrics.items():\n", + " if isinstance(v, dict):\n", + " for k2, v2 in v.items():\n", + " flattend[k + \".\" + k2] = v2\n", + "\n", + " return flattend\n", + "\n", + "def load_data(labels, run_id, output_dir=OUTPUT_DIR):\n", + " data_path =f\"{BENCHMARK_DIR}/{output_dir}/{run_id}\"\n", + " records = []\n", + " logger.debug(f\"Loading data for {data_path}\")\n", + " for file in filepaths(data_path):\n", + " for label in labels:\n", + " regex = f\".*\\/{label.name}\\/results/json/{MODEL_MATCHER}.json\"\n", + " logger.debug(f\"matching file {file} for regex {regex} and label {label}\")\n", + " if re.match(regex, file):\n", + " logger.debug(f\"found match file {file} for regex {regex} and label {label}\")\n", + " with open(file, 'r') as f:\n", + " raw_data = json.load(f)\n", + " sample_data = {\n", + " 'file_name': f.name,\n", + " 'label': label.alias,\n", + " **raw_data.get(\"metrics\",{}),\n", + " **flatten_server_metrics(raw_data.get(\"metrics\",{}).get(\"server_metrics\", {})),\n", + " }\n", + " sample_data['request_rate'] = sample_data['request_rate'] * raw_data['config']['num_models']\n", + " records.append(sample_data)\n", + " all_data = pd.DataFrame.from_records(records, index='file_name') if len(records) > 0 else pd.DataFrame()\n", + " return all_data\n", + "\n", + "def group_data(all_data, metrics=CORE_METRICS):\n", + " try:\n", + " data = all_data.sort_values(by=['request_rate'], ascending=True).copy().dropna()\n", + " except:\n", + " # print(\"No data found\")\n", + " return None\n", + "\n", + " # Ensure there is exactly one benchmark result per label and x-axis for each\n", + " # metric.\n", + " x_axes = set()\n", + " for m in metrics:\n", + " x_axes.add(m.x)\n", + "\n", + " for x in x_axes:\n", + " sizes = data.groupby(by=['label', x], dropna=True).size()\n", + " for index, v in sizes.items():\n", + " if v > 1:\n", + " label, _ = index\n", + " # print(f\"Multiple benchmark results for the same label ({label}), and x-axis ({x}). {index}: {v}. Please use more selective file filters.\")\n", + " # raise ValueError(f\"Multiple benchmark results for the same label ({label}), and x-axis ({x}). Please use more selective file filters.\")\n", + "\n", + " # Group by label.\n", + " groups = data.groupby(by=['label'],sort=True)\n", + " return groups\n", + "\n", + "def init_plot(metrics, num_plots_per_row=NUM_PLOTS_PER_ROW):\n", + " num_plots_per_row = min(num_plots_per_row, len(metrics))\n", + " row_count = math.ceil(len(metrics) / num_plots_per_row)\n", + " fig, axes = plt.subplots(nrows=row_count, ncols=num_plots_per_row, figsize=(20, 5*row_count), tight_layout=True)\n", + " if row_count == 1 and num_plots_per_row == 1:\n", + " axes = [axes]\n", + " return fig, axes\n", + "\n", + "def plot_metrics(metrics, plot_func, num_plots_per_row=NUM_PLOTS_PER_ROW, fig=None, axes=None):\n", + " \"\"\"\n", + " plot_func: a function in the form of def plot_func(ax:~matplotlib.axes.Axes , m: XY):\n", + " \"\"\"\n", + " logger.debug(f'Plotting metrics: {metrics}')\n", + " num_plots_per_row = min(num_plots_per_row, len(metrics))\n", + " if fig is None or axes is None:\n", + " logger.debug(f'Creating new figure and axes')\n", + " fig, axes = init_plot(metrics, num_plots_per_row)\n", + " row_count = math.ceil(len(metrics) / num_plots_per_row)\n", + " for i, m in enumerate(metrics):\n", + " row = math.floor(i/num_plots_per_row)\n", + " col = i%num_plots_per_row\n", + " if row_count == 1:\n", + " curAx = axes[col]\n", + " else:\n", + " curAx = axes[row, col]\n", + " plot_func(curAx, m)\n", + " return fig, axes\n", + "\n", + "def plot_bar(labels, groups, metrics=CORE_METRICS, num_plots_per_row=NUM_PLOTS_PER_ROW, interactive=INTERACTIVE_PLOT, annotate=False):\n", + " labels = [label.alias for label in labels]\n", + " logger.debug(f'Prnting bar chart for {labels}')\n", + " logger.debug(f'groups: {groups}')\n", + " dataframes = []\n", + " for label in labels:\n", + " try:\n", + " dataframes.append(groups.get_group((label,)))\n", + " except:\n", + " logger.debug(f\"No data found for label {label}\")\n", + " continue\n", + " y_columns = [m.y for m in metrics]\n", + " logger.debug(f'y_columns: {y_columns}')\n", + " logger.debug(f'dataframes: {dataframes}')\n", + "\n", + " # 1. Combine all request rates\n", + " all_request_rates = set()\n", + " for df in dataframes:\n", + " all_request_rates.update(df['request_rate'].astype(int))\n", + " all_request_rates = sorted(list(all_request_rates))\n", + "\n", + " # 2. Prepare data for plotting: Create a nested dictionary\n", + " plot_data = {y_col: {label: {} for label in labels} for y_col in y_columns}\n", + "\n", + " for i, df in enumerate(dataframes):\n", + " label = labels[i]\n", + " df_dict = df.set_index('request_rate').to_dict()\n", + " for y_col in y_columns:\n", + " for request_rate in all_request_rates:\n", + " plot_data[y_col][label][request_rate] = df_dict.get(y_col, {}).get(request_rate, np.nan)\n", + "\n", + " logger.debug(f'Plot_data: {plot_data}')\n", + "\n", + " # 3. Plotting\n", + " def plot_func(curAx, m):\n", + " num_request_rates = len(all_request_rates)\n", + " num_labels = len(labels)\n", + " x = np.arange(num_request_rates) # the label locations (x-axis positions)\n", + " width = 0.4 / num_labels # width of the bars\n", + "\n", + " for i, label in enumerate(labels):\n", + " bar_x = x - (width*num_labels)/2 + i*width + width/2\n", + " #Extract y-values to plot\n", + " y_values = [plot_data[m.y][label][rr] for rr in all_request_rates]\n", + "\n", + " rects = curAx.bar(bar_x, y_values, width, label=label)\n", + " if annotate:\n", + " for rect, val in zip(rects, y_values):\n", + " if not np.isnan(val):\n", + " height = rect.get_height()\n", + " curAx.annotate(f'{val:.2f}',\n", + " xy=(rect.get_x() + rect.get_width() / 2, height),\n", + " xytext=(0, 3), # 3 points vertical offset\n", + " textcoords=\"offset points\",\n", + " ha='center', va='bottom')\n", + " # Add labels, title, and legend\n", + " curAx.set_xlabel(m.x_label, fontsize=axis_label_fontsize)\n", + " curAx.set_ylabel(m.y_label, fontsize=axis_label_fontsize)\n", + " curAx.set_xticks(x)\n", + " curAx.set_xticklabels(all_request_rates)\n", + " curAx.tick_params(axis='both', labelsize=tick_label_fontsize)\n", + " curAx.legend(fontsize=legend_fontsize, loc='upper left', frameon=True, framealpha=0.8, edgecolor='black')\n", + " fig, axes = plot_metrics(metrics, plot_func, num_plots_per_row)\n", + " fig.tight_layout(rect=[0, 0.03, 1, 0.95])\n", + " plt.show()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 1000 + }, + "executionInfo": { + "elapsed": 2232, + "status": "ok", + "timestamp": 1741735855456, + "user": { + "displayName": "Cong Liu", + "userId": "18222691451061354557" + }, + "user_tz": 420 + }, + "id": "HbGEAOucb_Jn", + "outputId": "faf0304b-92f4-4fa7-ae71-83b8bd987e70" + }, + "outputs": [], + "source": [ + "#@title Plot Result\n", + "\n", + "pl = Plotter(run_id=RUN_ID, labels=[Label('inference-extension'),Label('k8s-svc')], output_dir=OUTPUT_DIR)\n", + "pl.plot_bar()" + ] + } + ], + "metadata": { + "colab": { + "last_runtime": { + "build_target": "", + "kind": "local" + }, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/benchmark/download-benchmark-results.bash b/benchmark/download-benchmark-results.bash new file mode 100755 index 00000000..333fc6cc --- /dev/null +++ b/benchmark/download-benchmark-results.bash @@ -0,0 +1,30 @@ +#!/bin/bash + +# Downloads the benchmark result files from the benchmark tool pod. +download_benchmark_results() { + until echo $(kubectl logs deployment/benchmark-tool -n ${namespace}) | grep -q -m 1 "LPG_FINISHED"; do sleep 30 ; done; + benchmark_pod=$(kubectl get pods -l app=benchmark-tool -n ${namespace} -o jsonpath="{.items[0].metadata.name}") + echo "Downloading JSON results from pod ${benchmark_pod}" + kubectl exec ${benchmark_pod} -n ${namespace} -- rm -f ShareGPT_V3_unfiltered_cleaned_split.json + for f in $(kubectl exec ${benchmark_pod} -n ${namespace} -- /bin/sh -c ls -l | grep json); do + echo "Downloading json file ${f}" + kubectl cp -n ${namespace} ${benchmark_pod}:$f ${benchmark_output_dir}/results/json/$f; + done +} + +# Env vars to be passed when calling this script. +# The id of the benchmark. This is needed to identify what the benchmark is for. +# It decides the filepath to save the results, which later is used by the jupyter notebook to assign +# the benchmark_id as data labels for plotting. +benchmark_id=${benchmark_id:-"inference-extension"} +# run_id can be used to group different runs of the same benchmarks for comparison. +run_id=${run_id:-"default-run"} +namespace=${namespace:-"default"} +output_dir=${output_dir:-'output'} + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +benchmark_output_dir=${SCRIPT_DIR}/${output_dir}/${run_id}/${benchmark_id} + +echo "Saving benchmark results to ${benchmark_output_dir}/results/json/" +download_benchmark_results +kubectl delete -f ${SCRIPT_DIR}/../config/manifests/benchmark/benchmark.yaml \ No newline at end of file diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt new file mode 100644 index 00000000..44974cf4 --- /dev/null +++ b/benchmark/requirements.txt @@ -0,0 +1,3 @@ +pandas +numpy +matplotlib \ No newline at end of file diff --git a/config/manifests/benchmark/benchmark.yaml b/config/manifests/benchmark/benchmark.yaml new file mode 100644 index 00000000..a47b4617 --- /dev/null +++ b/config/manifests/benchmark/benchmark.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: benchmark-tool + name: benchmark-tool +spec: + replicas: 1 + selector: + matchLabels: + app: benchmark-tool + template: + metadata: + labels: + app: benchmark-tool + spec: + containers: + # The following image was built from this source https://github.com/AI-Hypercomputer/inference-benchmark/tree/07628c9fe01b748f5a4cc9e5c2ee4234aaf47699 + - image: 'us-docker.pkg.dev/cloud-tpu-images/inference/inference-benchmark@sha256:1c100b0cc949c7df7a2db814ae349c790f034b4b373aaad145e77e815e838438' + imagePullPolicy: Always + name: benchmark-tool + command: + - bash + - -c + - ./latency_throughput_curve.sh + env: + - name: IP + value: '' + - name: REQUEST_RATES + value: '10,20,30' + - name: BENCHMARK_TIME_SECONDS + value: '60' + - name: TOKENIZER + value: 'meta-llama/Llama-2-7b-hf' + - name: MODELS + value: 'meta-llama/Llama-2-7b-hf' + - name: BACKEND + value: vllm + - name: PORT + value: "8081" + - name: INPUT_LENGTH + value: "1024" + - name: OUTPUT_LENGTH + value: '2048' + - name: FILE_PREFIX + value: benchmark + - name: PROMPT_DATASET_FILE + value: ShareGPT_V3_unfiltered_cleaned_split.json + - name: HF_TOKEN + valueFrom: + secretKeyRef: + key: token + name: hf-token + resources: + limits: + cpu: "2" + memory: 20Gi + requests: + cpu: "2" + memory: 20Gi diff --git a/config/manifests/benchmark/model-server-service.yaml b/config/manifests/benchmark/model-server-service.yaml new file mode 100644 index 00000000..014054cf --- /dev/null +++ b/config/manifests/benchmark/model-server-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: my-pool-service +spec: + ports: + - port: 8081 + protocol: TCP + targetPort: 8000 + selector: + app: my-pool + type: LoadBalancer diff --git a/mkdocs.yml b/mkdocs.yml index 8cd3f3fb..fc4c9438 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -59,6 +59,8 @@ nav: - Adapter Rollout: guides/adapter-rollout.md - Metrics: guides/metrics.md - Implementer's Guide: guides/implementers.md + - Performance: + - Benchmark: performance/benchmark/index.md - Reference: - API Reference: reference/spec.md - API Types: diff --git a/site-src/performance/benchmark/example-bar-chart.png b/site-src/performance/benchmark/example-bar-chart.png new file mode 100644 index 00000000..54dc6589 Binary files /dev/null and b/site-src/performance/benchmark/example-bar-chart.png differ diff --git a/site-src/performance/benchmark/index.md b/site-src/performance/benchmark/index.md new file mode 100644 index 00000000..445729a6 --- /dev/null +++ b/site-src/performance/benchmark/index.md @@ -0,0 +1,98 @@ +# Benchmark + +This user guide shows how to run benchmarks against a vLLM deployment, by using both the Gateway API +inference extension, and a Kubernetes service as the load balancing strategy. The +benchmark uses the [Latency Profile Generator](https://github.com/AI-Hypercomputer/inference-benchmark) (LPG) +tool to generate load and collect results. + +## Prerequisites + +### Deploy the inference extension and sample model server + +Follow this user guide https://gateway-api-inference-extension.sigs.k8s.io/guides/ to deploy the +sample vLLM application, and the inference extension. + +### [Optional] Scale the sample vLLM deployment + +You will more likely to see the benefits of the inference extension when there are a decent number of replicas to make the optimal routing decision. + +```bash +kubectl scale --replicas=8 -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml +``` + +### Expose the model server via a k8s service + +As the baseline, let's also expose the vLLM deployment as a k8s service: + +```bash +kubectl expose -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --port=8081 --target-port=8000 --type=LoadBalancer +``` + +## Run benchmark + +The LPG benchmark tool works by sending traffic to the specified target IP and port, and collect results. Follow the steps below to run a single benchmark. You can deploy multiple LPG instances if you want to run benchmarks in parallel against different targets. + +1. Check out the repo. + + ```bash + git clone https://github.com/kubernetes-sigs/gateway-api-inference-extension + cd gateway-api-inference-extension + ``` + +1. Get the target IP. Examples below show how to get the IP of a gateway or a LoadBalancer k8s service. + + ```bash + # Get gateway IP + GW_IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}') + # Get LoadBalancer k8s service IP + SVC_IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}') + + echo $GW_IP + echo $SVC_IP + ``` + +1. Then update the `` in `./config/manifests/benchmark/benchmark.yaml` to your target IP. Feel free to adjust other parameters such as request_rates as well. For a complete list of LPG configurations, pls refer to the [LPG user guide](https://github.com/AI-Hypercomputer/inference-benchmark?tab=readme-ov-file#configuring-the-benchmark). + +1. Start the benchmark tool. `kubectl apply -f ./config/manifests/benchmark/benchmark.yaml` + +1. Wait for benchmark to finish and download the results. Use the `benchmark_id` environment variable +to specify what this benchmark is for. For instance, `inference-extension` or `k8s-svc`. When the LPG tool finishes benchmarking, it will print a log line `LPG_FINISHED`, +the script below will watch for that log line and then start downloading results. + + ```bash + benchmark_id='my-benchmark' ./benchmark/download-benchmark-results.bash + ``` + +1. After the script finishes, you should see benchmark results under `./benchmark/output/default-run/my-benchmark/results/json` folder. + +### Tips + +* You can specify `run_id="runX"` environment variable when running the `./download-benchmark-results.bash` script. +This is useful when you run benchmarks multiple times to get a more statistically meaningful results and group the results accordingly. +* Update the `request_rates` that best suit your benchmark environment. + +### Advanced Benchmark Configurations + +Pls refer to the [LPG user guide](https://github.com/AI-Hypercomputer/inference-benchmark?tab=readme-ov-file#configuring-the-benchmark) for a detailed list of configuration knobs. + +## Analyze the results + +This guide shows how to run the jupyter notebook using vscode. + +1. Create a python virtual environment. + + ```bash + python3 -m venv .venv + source .venv/bin/activate + ``` + +1. Install the dependencies. + + ```bash + pip install -r ./benchmark/requirements.txt + ``` + +1. Open the notebook `./benchmark/benchmark.ipynb`, and run each cell. At the end you should + see a bar chart like below: + + ![alt text](example-bar-chart.png) \ No newline at end of file