{ "cells": [ { "cell_type": "markdown", "id": "012b1512", "metadata": {}, "source": [ "# Daily-Dose-of-Data-Science\n", "\n", "[Daily Dose of Data Science](https://avichawla.substack.com) is a publication on Substack that brings together intriguing frameworks, libraries, technologies, and tips that make the life cycle of a Data Science project effortless. \n", "\n", "Author: Avi Chawla\n", "\n", "[Medium](https://medium.com/@avi_chawla) | [LinkedIn](https://www.linkedin.com/in/avi-chawla/)\n", "\n", "# A Little Bit Of Extra Effort Can Hugely Transform Your Basic Matplotlib Plots\n", "\n", "Post Link: [Substack](https://avichawla.substack.com/p/a-little-bit-of-extra-effort-can)\n", "\n", "LinkedIn Post: [LinkedIn](https://www.linkedin.com/feed/update/urn:li:activity:7049335450402009088/)\n", "\n", "Twitter Post: [Twitter](https://twitter.com/_avichawla/status/1643572511945244673)" ] }, { "cell_type": "markdown", "id": "610348b9", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "220200d3", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import os\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", "import matplotlib.ticker as mtick" ] }, { "cell_type": "markdown", "id": "76228559", "metadata": {}, "source": [ "## Dummy Data" ] }, { "cell_type": "code", "execution_count": 2, "id": "519d38f7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div><div id=f37846bf-01b2-4dbc-8b6e-5e36e3a64978 style=\"display:none; background-color:#9D6CFF; color:white; width:200px; height:30px; padding-left:5px; border-radius:4px; flex-direction:row; justify-content:space-around; align-items:center;\" onmouseover=\"this.style.backgroundColor='#BA9BF8'\" onmouseout=\"this.style.backgroundColor='#9D6CFF'\" onclick=\"window.commands?.execute('create-mitosheet-from-dataframe-output');\">See Full Dataframe in Mito</div> <script> if (window.commands?.hasCommand('create-mitosheet-from-dataframe-output')) document.getElementById('f37846bf-01b2-4dbc-8b6e-5e36e3a64978').style.display = 'flex' </script> <table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Spend_Category</th>\n", " <th>Profit_Margin</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Clothing</td>\n", " <td>0.10</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Home Decor</td>\n", " <td>0.43</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Electronics</td>\n", " <td>0.13</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Stationery</td>\n", " <td>0.20</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Toys</td>\n", " <td>0.14</td>\n", " </tr>\n", " </tbody>\n", "</table></div>" ], "text/plain": [ " Spend_Category Profit_Margin\n", "0 Clothing 0.10\n", "1 Home Decor 0.43\n", "2 Electronics 0.13\n", "3 Stationery 0.20\n", "4 Toys 0.14" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame([['Clothing', 0.1],\n", " ['Home Decor', 0.43],\n", " ['Electronics', 0.13],\n", " ['Stationery', 0.2],\n", " ['Toys', 0.14]], columns = ['Spend_Category', 'Profit_Margin'])\n", "\n", "df" ] }, { "cell_type": "markdown", "id": "2a98f8a2", "metadata": {}, "source": [ "## Basic Matplotlib Chart" ] }, { "cell_type": "code", "execution_count": 3, "id": "c7987305", "metadata": {}, "outputs": [], "source": [ "def percent_formatter(x, pos):\n", " return f\"{x*100:.0f}%\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "f3830565", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 864x648 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Define figure\n", "fig,ax = plt.subplots(figsize = (12,9))\n", "\n", "# Create bar plot\n", "plt.bar(df['Spend_Category'], df['Profit_Margin'])\n", "\n", "# Convert y-axis labels to percentage\n", "ax.yaxis.set_major_formatter(mtick.FuncFormatter(percent_formatter))\n", "\n", "# show grid\n", "plt.grid(axis = 'y',alpha = 0.45)\n", "\n", "# set title and and x-y label \n", "plt.title('Profit Margin for top five spend categories')\n", "plt.ylabel('Profit Margin')\n", "plt.xlabel('Category')\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "d0115fc2", "metadata": {}, "source": [ "## Matplotlib + Little Extra Effort" ] }, { "cell_type": "code", "execution_count": 5, "id": "04e04da7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div><div id=640b9d17-f598-4c6b-9a89-232d4052e72a style=\"display:none; background-color:#9D6CFF; color:white; width:200px; height:30px; padding-left:5px; border-radius:4px; flex-direction:row; justify-content:space-around; align-items:center;\" onmouseover=\"this.style.backgroundColor='#BA9BF8'\" onmouseout=\"this.style.backgroundColor='#9D6CFF'\" onclick=\"window.commands?.execute('create-mitosheet-from-dataframe-output');\">See Full Dataframe in Mito</div> <script> if (window.commands?.hasCommand('create-mitosheet-from-dataframe-output')) document.getElementById('640b9d17-f598-4c6b-9a89-232d4052e72a').style.display = 'flex' </script> <table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Spend_Category</th>\n", " <th>Profit_Margin</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Clothing</td>\n", " <td>0.10</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Electronics</td>\n", " <td>0.13</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Toys</td>\n", " <td>0.14</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Stationery</td>\n", " <td>0.20</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Home Decor</td>\n", " <td>0.43</td>\n", " </tr>\n", " </tbody>\n", "</table></div>" ], "text/plain": [ " Spend_Category Profit_Margin\n", "0 Clothing 0.10\n", "1 Electronics 0.13\n", "2 Toys 0.14\n", "3 Stationery 0.20\n", "4 Home Decor 0.43" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## sort the dataframe\n", "df = df.sort_values(\"Profit_Margin\", ascending=True, ignore_index = True)\n", "df" ] }, { "cell_type": "code", "execution_count": 6, "id": "8201757f", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 720x432 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Define figure\n", "fig,ax = plt.subplots(figsize = (10,6))\n", "\n", "# add title to plot\n", "title = 'Profit Margin for top five spend categories'\n", "plt.text(s=title, # the text to be displayed\n", " x = 0,y = 0.91, # the position of the text (x, y)\n", " fontname = 'Times New Roman', # the font to be used for the text\n", " fontweight = \"bold\", # the weight of the font (e.g. bold)\n", " fontsize = 22, # the size of the font\n", " ha='left', # horizontal alignment of the text ('left', 'center', 'right')\n", " transform = fig.transFigure # the coordinate system to use for positioning the text\n", ")\n", "\n", "# add subtitle to plot\n", "subtitle = 'Consumers are willing to pay higher prices for decorative items for their home.'\n", "plt.text(s=subtitle, # the text to be displayed\n", " x = 0, y = 0.85, # the position of the text (x, y)\n", " fontname = 'Times New Roman', # the font to be used for the text\n", " fontsize = 18, # the size of the font\n", " ha = 'left', # horizontal alignment of the text ('left', 'center', 'right')\n", " transform = fig.transFigure # the coordinate system to use for positioning the text\n", ")\n", "\n", "# Create bar plot\n", "bars = plt.barh(df['Spend_Category'], \n", " df['Profit_Margin'], \n", " color = ['lightgray', 'lightgray', 'lightgray', 'lightgray', \"black\"], # color for bars\n", " edgecolor='none')\n", "\n", "# add margin\n", "plt.margins(y=0.14)\n", "\n", "# remove plot borders\n", "ax.spines['top'].set_visible(False)\n", "ax.spines['right'].set_visible(False)\n", "ax.spines['bottom'].set_visible(False)\n", "ax.spines['left'].set_visible(False)\n", "\n", "\n", "# remove xlabels\n", "ax.set_xticklabels([])\n", "ax.set_xticks([])\n", "\n", "# position and format ylabels\n", "ax.tick_params(axis='y', left=False)\n", "plt.yticks(fontsize=14, fontweight='bold')\n", "\n", "plt.bar_label(\n", " bars, # the bars object to add labels to\n", " labels=[f'{x:.0%}' for x in bars.datavalues], # the list of labels to display on each bar\n", " padding=12, # padding between the bar and the label\n", " fontsize=15 # font size of the labels\n", ")\n", "\n", "# add line\n", "ax.plot(\n", " [-0.025, .94], # X-coordinates of the ends of the line\n", " [.8335, .8335], # Y-coordinates of the ends of the line\n", " transform = fig.transFigure, # Coordinate system for the line\n", " clip_on = False, # Whether to clip the line to the axes bounds\n", " color = 'tab:blue', # Color of the line\n", " linewidth = 3 # Width of the line\n", ")\n", "\n", "\n", "# add rectangle\n", "ax.add_patch(\n", " plt.Rectangle(\n", " (-0.03,.95), # (x, y) location of the bottom-left corner of the rectangle\n", " 0.015, # width of the rectangle\n", " -0.12, # height of the rectangle\n", " facecolor = 'tab:blue', # fill color of the rectangle\n", " transform = fig.transFigure, # coordinate system used for the rectangle\n", " clip_on = False, # whether the rectangle is drawn outside the plot area\n", " linewidth = 0 # width of the edge line of the rectangle\n", " )\n", ")\n", "\n", "# add footnote\n", "footnote = \"*Data from Jan-2022 to Mar-2023\"\n", "plt.text(s = footnote,\n", " x = 0,\n", " y = 0.13,\n", " fontname = 'Arial',\n", " fontstyle = 'italic',\n", " fontsize = 15,\n", " ha = 'left',\n", " transform = fig.transFigure\n", ");" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }