{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# This file is part of nannos\n# License: GPLv3\n%matplotlib notebook"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Backends comparison\n\nNumerical backends performace comparison.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\nimport numpy as np\n\nmarkers = [\"o\", \"s\", \"d\", \"v\", \"^\", \">\"]\nfigsize = (2, 2)\nthreads = [1, 2, 4, 8, 16]\ndevices = [\"cpu\", \"gpu\"]\n# backends = [\"numpy\", \"scipy\", \"autograd\", \"jax\", \"torch\"]\n# we skip jax as it is complicated to deal with multithreading so a fair comparison is impossible\nbackends = [\"numpy\", \"scipy\", \"autograd\", \"torch\"]\n\ncolors = [\"#3b9dd4\", \"#ecd142\", \"#e87c40\", \"#b33dd1\", \"#50ba61\", \"#cd2323\"]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Time vs. number of harmonics\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "for num_threads in threads:\n    plt.figure(figsize=figsize)\n    i = 0\n    for backend in backends:\n        for device in devices:\n            g = \"cuda\" if device == \"gpu\" else device\n            if device != \"gpu\" or backend not in [\n                \"numpy\",\n                \"scipy\",\n                \"autograd\",\n                \"jax\",\n            ]:\n                arch = np.load(\n                    f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n                )\n                NH = arch[\"real_nh\"]\n                plt.plot(\n                    arch[\"real_nh\"],\n                    arch[\"times\"],\n                    f\"-{markers[i]}\",\n                    c=colors[i],\n                    label=f\"{backend} {device}\",\n                )\n\n                times_all = np.array(arch[\"times_all\"])\n                times_std = np.std(times_all, axis=1)\n                plt.errorbar(\n                    arch[\"real_nh\"],\n                    arch[\"times\"],\n                    times_std,\n                    c=colors[i],\n                    capsize=1,\n                )\n\n                i += 1\n    plt.legend()\n    plt.yscale(\"log\")\n    plt.xscale(\"log\")\n    plt.xlabel(\"number of harmonics\")\n    plt.ylabel(\"time (s)\")\n    plt.title(f\"backends comparison {num_threads} threads\")\n    plt.tight_layout()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Speedup vs. number of harmonics\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "for num_threads in threads:\n    plt.figure(figsize=figsize)\n    arch_np = np.load(f\"{num_threads}/benchmark_numpy_cpu.npz\", allow_pickle=True)\n\n    i = 1\n    for backend in backends:\n        for device in devices:\n            g = \"cuda\" if device == \"gpu\" else device\n            if device != \"gpu\" or backend == \"torch\":\n                arch = np.load(\n                    f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n                )\n                if backend != \"numpy\":\n                    speedup = np.array(arch_np[\"times\"]) / np.array(arch[\"times\"])\n                    plt.plot(\n                        arch[\"real_nh\"],\n                        speedup,\n                        f\"-{markers[i]}\",\n                        c=colors[i],\n                        label=f\"{backend} {device}\",\n                    )\n\n                    speedup_all = np.array(arch_np[\"times_all\"]) / np.array(\n                        arch[\"times_all\"]\n                    )\n                    speedup_std = np.std(speedup_all, axis=1)\n                    plt.errorbar(\n                        arch[\"real_nh\"],\n                        speedup,\n                        speedup_std,\n                        c=colors[i],\n                        capsize=1,\n                    )\n                    i += 1\n    plt.legend()\n    # plt.yscale(\"log\")\n    # plt.xscale(\"log\")\n    plt.xlabel(\"number of harmonics\")\n    plt.ylabel(\"speedup vs. numpy\")\n    plt.title(f\"backends comparison {num_threads} threads\")\n    plt.tight_layout()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Time vs. number of threads\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "for inh in range(len(NH)):\n    plt.figure(figsize=figsize)\n    i = 0\n    for backend in backends:\n        for device in devices:\n            t_threads = []\n            t_threads_all = []\n            for num_threads in threads:\n                if device != \"gpu\" or backend == \"torch\":\n                    g = \"cuda\" if device == \"gpu\" else device\n                    arch = np.load(\n                        f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n                    )\n                    t = arch[\"times\"]\n                    # t = np.array(t)\n                    t_threads.append(t)\n                    t_threads_all.append(arch[\"times_all\"])\n            if t_threads != []:\n                t_threads = np.array(t_threads)\n                plt.plot(\n                    threads,\n                    t_threads[:, inh],\n                    f\"-{markers[i]}\",\n                    c=colors[i],\n                    label=f\"{backend} {device}\",\n                )\n                times_all = np.array(t_threads_all)[:, inh]\n                times_std = np.std(times_all, axis=1)\n                plt.errorbar(\n                    threads,\n                    t_threads[:, inh],\n                    times_std,\n                    c=colors[i],\n                    capsize=1,\n                )\n                i += 1\n    plt.xticks(threads)\n\n    plt.legend(ncol=2)\n    plt.yscale(\"log\")\n    # plt.xscale(\"log\")\n    plt.xlabel(\"number of threads\")\n    plt.ylabel(\"time (s)\")\n    plt.title(f\"backends comparison {NH[inh]} harmonics\")\n    plt.tight_layout()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Speedup vs. number of threads\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "for inh in range(len(NH)):\n    plt.figure(figsize=figsize)\n    i = 1\n    for backend in backends:\n        for device in devices:\n            speedup_threads = []\n            speedup_threads_all = []\n            for num_threads in threads:\n                if device != \"gpu\" or backend == \"torch\":\n                    g = \"cuda\" if device == \"gpu\" else device\n                    arch = np.load(\n                        f\"{num_threads}/benchmark_{backend}_{g}.npz\", allow_pickle=True\n                    )\n\n                    arch_np = np.load(\n                        f\"{num_threads}/benchmark_numpy_cpu.npz\", allow_pickle=True\n                    )\n                    if backend != \"numpy\":\n                        t = arch[\"times\"]\n                        speedup = np.array(arch_np[\"times\"]) / np.array(arch[\"times\"])\n                        speedup_threads.append(speedup)\n                        speedup_all = np.array(arch_np[\"times_all\"]) / np.array(\n                            arch[\"times_all\"]\n                        )\n                        speedup_threads_all.append(speedup_all)\n            if speedup_threads != []:\n                speedup_threads = np.array(speedup_threads)\n                if backend != \"numpy\":\n                    plt.plot(\n                        threads,\n                        speedup_threads[:, inh],\n                        f\"-{markers[i]}\",\n                        c=colors[i],\n                        label=f\"{backend} {device}\",\n                    )\n\n                    speedup_std = np.std(np.array(speedup_threads_all)[:, inh], axis=1)\n                    plt.errorbar(\n                        threads,\n                        speedup_threads[:, inh],\n                        speedup_std,\n                        c=colors[i],\n                        capsize=1,\n                    )\n                    i += 1\n    plt.xticks(threads)\n    # plt.ylim(0.25, 3.8)\n\n    plt.legend(ncol=2)\n    # plt.yscale(\"log\")\n    # plt.xscale(\"log\")\n    plt.xlabel(\"number of threads\")\n    plt.ylabel(\"speedup vs. numpy\")\n    plt.title(f\"backends comparison {NH[inh]} harmonics\")\n    plt.tight_layout()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import nannos.utils.jupyter\n%nannos_version_table"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.12.5"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}