{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# This file is part of nannos\n# License: GPLv3\n%matplotlib notebook" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Backends comparison\n\nNumerical backends performace comparison.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\nimport numpy as np\n\nmarkers = [\"o\", \"s\", \"d\", \"v\", \"^\", \">\"]\nfigsize = (2, 2)\nthreads = [1, 2, 4, 8, 16]\ndevices = [\"cpu\", \"gpu\"]\n# backends = [\"numpy\", \"scipy\", \"autograd\", \"jax\", \"torch\"]\n# we skip jax as it is complicated to deal with multithreading so a fair comparison is impossible\nbackends = [\"numpy\", \"scipy\", \"autograd\", \"torch\"]\n\ncolors = [\"#3b9dd4\", \"#ecd142\", \"#e87c40\", \"#b33dd1\", \"#50ba61\", \"#cd2323\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Time vs. number of harmonics\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for num_threads in threads:\n plt.figure(figsize=figsize)\n i = 0\n for backend in backends:\n for device in devices:\n g = \"cuda\" if device == \"gpu\" else device\n if device != \"gpu\" or backend not in [\n \"numpy\",\n \"scipy\",\n \"autograd\",\n \"jax\",\n ]:\n arch = np.load(\n f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n )\n NH = arch[\"real_nh\"]\n plt.plot(\n arch[\"real_nh\"],\n arch[\"times\"],\n f\"-{markers[i]}\",\n c=colors[i],\n label=f\"{backend} {device}\",\n )\n\n times_all = np.array(arch[\"times_all\"])\n times_std = np.std(times_all, axis=1)\n plt.errorbar(\n arch[\"real_nh\"],\n arch[\"times\"],\n times_std,\n c=colors[i],\n capsize=1,\n )\n\n i += 1\n plt.legend()\n plt.yscale(\"log\")\n plt.xscale(\"log\")\n plt.xlabel(\"number of harmonics\")\n plt.ylabel(\"time (s)\")\n plt.title(f\"backends comparison {num_threads} threads\")\n plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Speedup vs. number of harmonics\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for num_threads in threads:\n plt.figure(figsize=figsize)\n arch_np = np.load(f\"{num_threads}/benchmark_numpy_cpu.npz\", allow_pickle=True)\n\n i = 1\n for backend in backends:\n for device in devices:\n g = \"cuda\" if device == \"gpu\" else device\n if device != \"gpu\" or backend == \"torch\":\n arch = np.load(\n f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n )\n if backend != \"numpy\":\n speedup = np.array(arch_np[\"times\"]) / np.array(arch[\"times\"])\n plt.plot(\n arch[\"real_nh\"],\n speedup,\n f\"-{markers[i]}\",\n c=colors[i],\n label=f\"{backend} {device}\",\n )\n\n speedup_all = np.array(arch_np[\"times_all\"]) / np.array(\n arch[\"times_all\"]\n )\n speedup_std = np.std(speedup_all, axis=1)\n plt.errorbar(\n arch[\"real_nh\"],\n speedup,\n speedup_std,\n c=colors[i],\n capsize=1,\n )\n i += 1\n plt.legend()\n # plt.yscale(\"log\")\n # plt.xscale(\"log\")\n plt.xlabel(\"number of harmonics\")\n plt.ylabel(\"speedup vs. numpy\")\n plt.title(f\"backends comparison {num_threads} threads\")\n plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Time vs. number of threads\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for inh in range(len(NH)):\n plt.figure(figsize=figsize)\n i = 0\n for backend in backends:\n for device in devices:\n t_threads = []\n t_threads_all = []\n for num_threads in threads:\n if device != \"gpu\" or backend == \"torch\":\n g = \"cuda\" if device == \"gpu\" else device\n arch = np.load(\n f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n )\n t = arch[\"times\"]\n # t = np.array(t)\n t_threads.append(t)\n t_threads_all.append(arch[\"times_all\"])\n if t_threads != []:\n t_threads = np.array(t_threads)\n plt.plot(\n threads,\n t_threads[:, inh],\n f\"-{markers[i]}\",\n c=colors[i],\n label=f\"{backend} {device}\",\n )\n times_all = np.array(t_threads_all)[:, inh]\n times_std = np.std(times_all, axis=1)\n plt.errorbar(\n threads,\n t_threads[:, inh],\n times_std,\n c=colors[i],\n capsize=1,\n )\n i += 1\n plt.xticks(threads)\n\n plt.legend(ncol=2)\n plt.yscale(\"log\")\n # plt.xscale(\"log\")\n plt.xlabel(\"number of threads\")\n plt.ylabel(\"time (s)\")\n plt.title(f\"backends comparison {NH[inh]} harmonics\")\n plt.tight_layout()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Speedup vs. number of threads\n\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for inh in range(len(NH)):\n plt.figure(figsize=figsize)\n i = 1\n for backend in backends:\n for device in devices:\n speedup_threads = []\n speedup_threads_all = []\n for num_threads in threads:\n if device != \"gpu\" or backend == \"torch\":\n g = \"cuda\" if device == \"gpu\" else device\n arch = np.load(\n f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n )\n\n arch_np = np.load(\n f\"{num_threads}/benchmark_numpy_cpu.npz\", allow_pickle=True\n )\n if backend != \"numpy\":\n t = arch[\"times\"]\n speedup = np.array(arch_np[\"times\"]) / np.array(arch[\"times\"])\n speedup_threads.append(speedup)\n speedup_all = np.array(arch_np[\"times_all\"]) / np.array(\n arch[\"times_all\"]\n )\n speedup_threads_all.append(speedup_all)\n if speedup_threads != []:\n speedup_threads = np.array(speedup_threads)\n if backend != \"numpy\":\n plt.plot(\n threads,\n speedup_threads[:, inh],\n f\"-{markers[i]}\",\n c=colors[i],\n label=f\"{backend} {device}\",\n )\n\n speedup_std = np.std(np.array(speedup_threads_all)[:, inh], axis=1)\n plt.errorbar(\n threads,\n speedup_threads[:, inh],\n speedup_std,\n c=colors[i],\n capsize=1,\n )\n i += 1\n plt.xticks(threads)\n # plt.ylim(0.25, 3.8)\n\n plt.legend(ncol=2)\n # plt.yscale(\"log\")\n # plt.xscale(\"log\")\n plt.xlabel(\"number of threads\")\n plt.ylabel(\"speedup vs. numpy\")\n plt.title(f\"backends comparison {NH[inh]} harmonics\")\n plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import nannos.utils.jupyter\n%nannos_version_table" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 0 }