Source code for rick

# -*- coding: utf-8 -*-
"""
Version 0.8.0
"""
from psycopg2 import connect
import psycopg2.sql as pg
import pandas.io.sql as pandasql
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.ticker as ticker
import geopandas as gpd
import os
import shapely
import seaborn as sns
from shapely.geometry import Point
import matplotlib.font_manager as font_manager
import numpy as np
import pandas as pd
import copy
import datetime

[docs] class font: """ Class defining the global font variables for all functions. """ leg_font = font_manager.FontProperties(family='DejaVu Sans',size=9) normal = 'DejaVu Sans' semibold = 'DejaVu Sans SemiBold'
[docs] class colour(): """ Class defining the global colour variables for all functions. """ purple = '#660159' grey = '#7f7e7e' orange = '#d95f02' green = '#0D9F73' blue = '#253494' light_grey = '#777777' cmap = 'YlOrRd' colours_map = { 1: purple, 2: grey, 3: orange, 4: blue, 5: green, 6: light_grey } def get_colour_from_index(self, index): return self.colours_map[index]
[docs] class geo: """ Class for additional gis layers needed for the cloropleth map. """
[docs] def ttc(con): """Function to return the TTC subway layer. Parameters ------------ con : SQL connection object Connection object needed to connect to the RDS Returns -------- gdf Geopandas Dataframe of the Subway Layer """ query = ''' SELECT * FROM gis.subway_to ''' ttc = gpd.GeoDataFrame.from_postgis(query, con, geom_col='geom') # ttc = ttc.to_crs({'init' :'epsg:3857'}) ttc = ttc.to_crs(epsg=3857) # Below can be replaced by an apply lambda # in case one row is of a different type (e.g. MULTIPOLYGON vs POLYGON) #for index, row in ttc.iterrows(): # rotated = shapely.affinity.rotate(row['geom'], angle=-17, origin = Point(0, 0)) # ttc.loc[index, 'geom'] = rotated ttc['geom']=ttc['geom'].apply(lambda x: shapely.affinity.rotate(x, angle=-17, origin = Point(0, 0))) return ttc
[docs] def island(con): """Function to return a layer of the Toronto island. Since the island is classified in the same neighbourhood as the waterfront, in some cases its not completely accurate to show the island shares the same data as the waterfront. Parameters ------------ con : SQL connection object Connection object needed to connect to the RDS Returns -------- gdf Geopandas Dataframe of the Toronto island. """ query = ''' SELECT geom FROM tts.zones_tts06 WHERE gta06 = 81 ''' island = gpd.GeoDataFrame.from_postgis(query, con, geom_col='geom') # island = island.to_crs({'init' :'epsg:3857'}) island = island.to_crs(epsg=3857) # Below can be replaced by an apply lambda # in case one row is of a different type (e.g. MULTIPOLYGON vs POLYGON) #for index, row in island.iterrows(): # rotated = shapely.affinity.rotate(row['geom'], angle=-17, origin = Point(0, 0)) # island.loc[index, 'geom'] = rotated island['geom']=island['geom'].apply(lambda x: shapely.affinity.rotate(x, angle=-17, origin = Point(0, 0))) return island
[docs] class charts: """ Class defining all the charting functions. """ global func def func(): """Function to set global settings for the charts class. """ sns.set(font_scale=1.5) mpl.rc('font',family='DejaVu Sans')
[docs] def chloro_map(con, df, lower, upper, title, **kwargs): """Creates a chloropleth map Parameters ----------- con : SQL connection object Connection object needed to connect to the RDS df : GeoPandas Dataframe Data for the chloropleth map. The data must only contain 2 columns; the first column has to be the geom column and the second has to be the data that needs to be mapped. lower : int Lower bound for colourmap upper : int Upper bound for the colourmap title : str Legend label subway : boolean, optional, default: False True to display subway on the map island : boolean, optional, defailt: True False to grey out the Toronto island cmap : str, optional, default: YlOrRd Matplotlib colourmap to use for the map unit : str, optional Unit to append to the end of the legend tick nbins : int, optional, defualt: 2 Number of ticks in the colourmap Returns -------- fig Matplotlib fig object ax Matplotlib ax object """ func() subway = kwargs.get('subway', False) island = kwargs.get('island', True) cmap = kwargs.get('cmap', colour.cmap) unit = kwargs.get('unit', None) nbins = kwargs.get('nbins', 2) df.columns = ['geom', 'values'] light = '#d9d9d9' fig, ax = plt.subplots() fig.set_size_inches(6.69,3.345) ax.set_yticklabels([]) ax.set_xticklabels([]) ax.set_axis_off() mpd = df.plot(column='values', ax=ax, vmin=lower, vmax=upper, cmap = cmap, edgecolor = 'w', linewidth = 0.5) if island == False: island_grey = geo.island(con) island_grey.plot(ax=ax, edgecolor = 'w', linewidth = 4, color = light) island_grey.plot(ax=ax, edgecolor = 'w', linewidth = 0, color = light) if subway == True: ttc_df = geo.ttc(con) line = ttc_df.plot( ax=ax, linewidth =4, color = 'w', alpha =0.6) # ttc subway layer line = ttc_df.plot( ax=ax, linewidth =2, color = 'k', alpha =0.4) # ttc subway layer props = dict(boxstyle='round', facecolor='w', alpha=0) plt.text(0.775, 0.37, title, transform=ax.transAxes, wrap = True, fontsize=7, fontname = font.semibold, verticalalignment='bottom', bbox=props, fontweight = 'bold') # Adding the Legend Title cax = fig.add_axes([0.718, 0.16, 0.01, 0.22]) # Size of colorbar rect = patches.Rectangle((0.76, 0.01),0.235,0.43,linewidth=0.5, transform=ax.transAxes, edgecolor=light,facecolor='none') ax.add_patch(rect) ax.margins(0.1) sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=lower, vmax=upper)) sm._A = [] cbr = fig.colorbar(sm, cax=cax) cbr.outline.set_linewidth(0) tick_locator = ticker.MaxNLocator(nbins=nbins) cbr.locator = tick_locator cbr.update_ticks() cbr.ax.yaxis.set_tick_params(width=0.5) cbr.ax.tick_params(labelsize=6) # Formatting for Colorbar Text for l in cbr.ax.yaxis.get_ticklabels(): l.set_family(font.normal) if unit is not None: if 0 < upper < 10: ax.text(0.829, 0.32, unit, transform=ax.transAxes, wrap = True, fontsize=6, fontname = font.normal, verticalalignment='bottom', ha = 'left') elif 10 <= upper < 100: ax.text(0.839, 0.32, unit, transform=ax.transAxes, wrap = True, fontsize=6, fontname = font.normal, verticalalignment='bottom', ha = 'left') elif 100 <= upper < 1000: ax.text(0.851, 0.32, unit, transform=ax.transAxes, wrap = True, fontsize=6, fontname = font.normal, verticalalignment='bottom', ha = 'left') elif 1000 <= upper < 100000: ax.text(0.862, 0.32, unit, transform=ax.transAxes, wrap = True, fontsize=6, fontname = font.normal, verticalalignment='bottom', ha = 'left') else: pass return fig, ax
[docs] def line_chart(data, ylab, xlab, **kwargs): """Creates a line chart. x axis must be modified manually Parameters ----------- data : array like or scalar Data for the line chart. ylab : str Label for the y axis. xlab : str Label for the x axis. ymax : int, optional, default is the max y value The max value of the y axis ymin : int, optional, default is 0 The minimum value of the y axis baseline : array like or scalar, optional, default is None Data for another line representing the baseline. yinc : int, optional The increment of ticks on the y axis. Returns -------- fig Matplotlib fig object ax Matplotlib ax object props Dictionary of the text annotation properties """ func() ymax = kwargs.get('ymax', int(data.max())) ymin = kwargs.get('ymin', 0) baseline = kwargs.get('baseline', None) delta = (ymax - ymin)/4 i = 0 while True: delta /= 10 i += 1 if delta < 10: break yinc = kwargs.get('yinc', int(round(delta+1)*pow(10,i))) fig, ax =plt.subplots() ax.plot(data ,linewidth=3, color = colour.purple) if baseline is not None: ax.plot(baseline ,linewidth=3, color = colour.grey) plt.grid() ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}')) ax.set_facecolor('xkcd:white') plt.xlabel(xlab, fontsize=9, fontweight = 'bold', horizontalalignment='right', x=0, labelpad=10, fontname = font.normal) ax.grid(color='k', linestyle='-', linewidth=0.2) plt.ylabel(ylab, fontsize=9, fontweight = 'bold', horizontalalignment='right', y=1.0, labelpad=10, fontname = font.normal) fig.set_size_inches(6.1, 4.1) plt.xticks(fontsize=9, fontname = font.normal) plt.yticks(range(ymin, ymax + yinc, yinc), fontsize =9, fontname = font.normal) props = dict(boxstyle='round, pad=0.4',edgecolor=colour.purple, linewidth = 2, facecolor = 'w', alpha=1) ax.set_ylim([ymin, ymax]) fig.patch.set_facecolor('w') return fig, ax, props
[docs] def tow_chart(data, ylab, **kwargs): """Creates a 7 day time of week line chart. Each data point represents 1 hour out of 168 hours. Parameters ----------- data : array like or scalar Data for the tow chart. Data must only have 168 entries, with row 0 representing Monday at midnight. ylab : str Label for the y axis. ymax : int, optional, default is the max y value The max value of the y axis ymin : int, optional, default is 0 The minimum value of the y axis yinc : int, optional The increment of ticks on the y axis. Returns -------- fig Matplotlib fig object ax Matplotlib ax object props Dictionary of the text annotation properties """ func() ymax = kwargs.get('ymax', None) ymin = kwargs.get('ymin', 0) ymax_flag = True if ymax == None: ymax = int(data.max()) ymax_flag = False delta = (ymax - ymin)/3 i = 0 while True: delta /= 10 i += 1 if delta < 10: break yinc = kwargs.get('yinc', int(round(delta+1)*pow(10,i))) if ymax_flag == True: upper = ymax else: upper = int(3*yinc+ymin) fig, ax =plt.subplots() ax.plot(data, linewidth = 2.5, color = colour.purple) plt.grid() ax.set_facecolor('xkcd:white') plt.xlabel('Time of week', fontname = font.normal, fontsize=9, horizontalalignment='left', x=0, labelpad=3, fontweight = 'bold') ax.set_ylim([ymin,upper]) ax.grid(color='k', linestyle='-', linewidth=0.2) plt.ylabel(ylab, fontname = font.normal, fontsize=9, horizontalalignment='right', y=1, labelpad=7, fontweight = 'bold') fig.set_size_inches(6.1, 1.8) ax.yaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}')) plt.yticks(range(ymin,upper+int(0.1*yinc), yinc), fontsize =9, fontname = font.normal) ax.set_xticks(range(0,180,12)) ax.set_xticklabels(['0','12','0','12', '0','12','0','12', '0','12','0','12','0','12'], fontname = font.normal, fontsize = 7, color = colour.light_grey) ax.xaxis.set_minor_locator(ticker.FixedLocator(list(range(12,180,24)))) ax.xaxis.set_minor_formatter(ticker.FixedFormatter(['Monday','Tuesday', 'Wednesday','Thursday', 'Friday','Saturday','Sunday'])) ax.tick_params(axis='x', which='minor', colors = 'k', labelsize=9, pad =14) props = dict(boxstyle='round, pad=0.3',edgecolor=colour.purple, linewidth = 1.5, facecolor = 'w', alpha=1) ax.set_xlim([0,167]) return fig, ax, props
[docs] def stacked_chart(data_in, xlab, lab1, lab2, **kwargs): """Creates a stacked bar chart comparing 2 sets of data Parameters ----------- data : dataframe Data for the stacked bar chart. The dataframe must have 3 columns, the first representing the y ticks, the second representing the baseline, and the third representing the next series of data. xlab : str Label for the x axis. lab1 : str Label in the legend for the baseline lab2 : str Label in the legend fot the next data series xmax : int, optional, default is the max s value The max value of the y axis xmin : int, optional, default is 0 The minimum value of the x axis precision : int, optional, default is -1 Decimal places in the annotations percent : boolean, optional, default is False Whether the annotations should be formatted as percentages xinc : int, optional The increment of ticks on the x axis. Returns -------- fig Matplotlib fig object ax Matplotlib ax object """ func() data = data_in.copy(deep=True) data.columns = ['name', 'values1', 'values2'] xmin = kwargs.get('xmin', 0) xmax = kwargs.get('xmax', None) precision = kwargs.get('precision', -1) percent = kwargs.get('percent', False) xmax_flag = True if xmax == None: xmax = int(max(data[['values1', 'values2']].max())) xmax_flag = False delta = (xmax - xmin)/4 i = 0 while True: delta /= 10 i += 1 if delta < 10: break xinc = kwargs.get('xinc', int(round(delta+1)*pow(10,i))) if xmax_flag == True: upper = xmax else: upper = int(4*xinc+xmin) ind = np.arange(len(data)) fig, ax = plt.subplots() fig.set_size_inches(6.1, len(data)) ax.grid(color='k', linestyle='-', linewidth=0.25) p1 = ax.barh(ind+0.4, data['values1'], 0.4, align='center', color = colour.grey) p2 = ax.barh(ind, data['values2'], 0.4, align='center', color = colour.purple) ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}')) ax.xaxis.grid(True) ax.yaxis.grid(False) ax.set_yticks(ind+0.4/2) ax.set_xlim(0,upper) ax.set_yticklabels(data['name']) ax.set_xlabel(xlab, horizontalalignment='left', x=0, labelpad=10, fontname = font.normal, fontsize=10, fontweight = 'bold') ax.set_facecolor('xkcd:white') j=0 if precision < 1: data[['values1', 'values2']] = data[['values1', 'values2']].astype(int) for i in data['values2']: if i < 0.1*upper: ax.annotate(str(format(round(i,precision), ',')), xy=(i+0.015*upper, j-0.05), ha = 'left', color = 'k', fontname = font.normal, fontsize=10) else: ax.annotate(str(format(round(i,precision), ',')), xy=(i-0.015*upper, j-0.05), ha = 'right', color = 'w', fontname = font.normal, fontsize=10) j=j+1 j=0.4 for i in data['values1']: if i < 0.1*upper: ax.annotate(str(format(round(i,precision), ',')), xy=(i+0.015*upper, j-0.05), ha = 'left', color = 'k', fontname = font.normal, fontsize=10) else: ax.annotate(str(format(round(i,precision), ',')), xy=(i-0.015*upper, j-0.05), ha = 'right', color = 'w', fontname = font.normal, fontsize=10) j=j+1 ax.legend((p1[0], p2[0]), (lab1, lab2), loc=4, frameon=False, prop=font.leg_font) plt.xticks(range(xmin,upper+int(0.1*xinc), xinc), fontname = font.normal, fontsize =10) plt.yticks( fontname = font.normal, fontsize =10) if percent == True: data_yoy = data data_yoy['percent'] = (data['values2']-data['values1'])*100/data['values1'] j=0.15 for index, row in data_yoy.iterrows(): ax.annotate(('+' if row['percent'] > 0 else '')+str(format(int(round(row['percent'],0)), ','))+'%', xy=(max(row[['values1', 'values2']]) + (0.12 if row['values2'] < 0.1*upper else 0.03)*upper, j), color = 'k', fontname = font.normal, fontsize=10) j=j+1 return fig, ax
[docs] def stacked_chart_quad(data_in, xlab, lab1, lab2, lab3, lab4, **kwargs): """Creates a stacked bar chart comparing 4 sets of data Parameters ----------- data : dataframe Data for the stacked bar chart. The dataframe must have 5 columns, the first representing the y ticks, the second representing the baseline, and the third representing the next series of data. xlab : str Label for the x axis. lab1 : str Label in the legend for the baseline lab2 : str Label in the legend fot the next data series xmax : int, optional, default is the max s value The max value of the y axis xmin : int, optional, default is 0 The minimum value of the x axis precision : int, optional, default is -1 Decimal places in the annotations percent : boolean, optional, default is False Whether the annotations should be formatted as percentages xinc : int, optional The increment of ticks on the x axis. Returns -------- fig Matplotlib fig object ax Matplotlib ax object """ func() data = data_in.copy(deep=True) data.columns = ['name', 'values1', 'values2', 'values3', 'values4'] xmin = kwargs.get('xmin', 0) xmax = kwargs.get('xmax', None) precision = kwargs.get('precision', -1) percent = kwargs.get('percent', False) xmax_flag = True if xmax == None: xmax = int(max(data[['values1', 'values2', 'values3', 'values4']].max())) xmax_flag = False delta = (xmax - xmin)/4 i = 0 while True: delta /= 10 i += 1 if delta < 10: break xinc = kwargs.get('xinc', int(round(delta+1)*pow(10,i))) if xmax_flag == True: upper = xmax else: upper = int(4*xinc+xmin) ind = np.arange(len(data)) fig, ax = plt.subplots() fig.set_size_inches(6.1, len(data)*1.5) ax.grid(color='k', linestyle='-', linewidth=0.25) p1 = ax.barh(ind+0.6, data['values1'], 0.2, align='center', color = colour.green) p2 = ax.barh(ind+0.4, data['values2'], 0.2, align='center', color = colour.blue) p3 = ax.barh(ind+0.2, data['values3'], 0.2, align='center', color = colour.grey) p4 = ax.barh(ind, data['values4'], 0.2, align='center', color=colour.purple) ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}')) ax.xaxis.grid(True) ax.yaxis.grid(False) ax.set_yticks(ind+0.6/2) ax.set_xlim(0,upper) ax.set_yticklabels(data['name']) ax.set_xlabel(xlab, horizontalalignment='left', x=0, labelpad=10, fontname = font.normal, fontsize=10, fontweight = 'bold') ax.set_facecolor('xkcd:white') if precision < 1: data[['values1', 'values2', 'values3', 'values4']] = data[['values1', 'values2', 'values3', 'values4']].astype(int) j = 0.0 for k in range(4,0,-1): for i in data[f'values{k}']: if i < 0.1*upper: ax.annotate(str(format(round(i,precision), ',')), xy=(i+0.015*upper, j-0.05), ha = 'left', color = 'k', fontname = font.normal, fontsize=10) else: ax.annotate(str(format(round(i,precision), ',')), xy=(i-0.015*upper, j-0.05), ha = 'right', color = 'w', fontname = font.normal, fontsize=10) j=j+1 j = j-len(data[f'values{k}']) + 0.2 ax.legend((p1[0], p2[0], p3[0], p4[0]), (lab1, lab2, lab3, lab4), loc=4, frameon=False, prop=font.leg_font) plt.xticks(range(xmin,upper+int(0.1*xinc), xinc), fontname = font.normal, fontsize =10) plt.yticks( fontname = font.normal, fontsize =10) if percent == True: j = 0.15 data_yoy = data for k in range(3,0,-1): data_yoy[f'percent{k}'] = (data['values4']-data[f'values{k}'])*100/data[f'values{k}'] for index, row in data_yoy.iterrows(): ax.annotate(('+' if row[f'percent{k}'] > 0 else '')+str(format(int(round(row[f'percent{k}'],0)), ','))+'%', xy=(max(row[['values1', 'values2', 'values3', 'values4']]) + (0.12 if row['values4'] < 0.1*upper else 0.03)*upper, j), color = 'k', fontname = font.normal, fontsize=10) j+=1 j = j-len(data_yoy) + 0.2 return fig, ax
[docs] def horizontal_grouped_bar_chart(data: pd.DataFrame, **kwargs: dict) -> (plt.figure, plt.axes): ''' Creates a horizontal grouped bar chart. Number of bars in each group to plot is determined from the number of columns in input dataframe, while the number of groups is determined by the number of rows. Parameters ----------- Required: data : pd.DataFrame Data for the grouped bar chart. Optional: ylab : str Label for the y axis. xlab : str Label for the x axis. xmax : float The max value of the x axis. xmin : float The minimum value of the x axis Should include this if minimum < 0. xinc : float The increment of ticks on the x axis. ax : plt.axes The axis that the plot will be located on. plot_size : (int, int) Custom plot dimensions. precision : int Decimal points in the annotations. percent : bool Flag determining whether to show percentage change between baseline column (assumed to be the first column) and remaining columns. additional_annotations : dict Dictionary with keys of type (int, int) and values of type (str), indicating the coordinates and annotation to be added. legend : list[str] A list of labels to be used for the legend. Returns -------- fig Matplotlib fig object ax Matplotlib ax object ''' return general_grouped_bar_chart( data=data, param_axis='x', index_axis='y', horizontal=True, standard_plot_size=(6.1, len(data)*1.5), grid_y=False, **kwargs )
[docs] def vertical_grouped_bar_chart(data: pd.DataFrame, **kwargs: dict) -> (plt.figure, plt.axes): ''' Creates a vertical grouped bar chart. Number of bars in each group to plot is determined from the number of columns in input dataframe, while the number of groups is determined by the number of rows. Parameters ----------- Required: data : pd.DataFrame Data for the grouped bar chart. Optional: ylab : str Label for the y axis. xlab : str Label for the x axis. ymax : float The max value of the y axis. ymin : float The minimum value of the y axis Should include this if minimum < 0. yinc : float The increment of ticks on the y axis. ax : plt.axes The axis that the plot will be located on. plot_size : (int, int) Custom plot dimensions. precision : int Decimal points in the annotations. percent : int Flag determining whether to show percentage change between baseline column (assumed to be the first column) and remaining columns. additional_annotations : dict Dictionary with keys of type (int, int) and values of type (str), indicating the coordinates and annotation to be added. legend : list[str] A list of string objects to be used for the legend. Returns -------- fig Matplotlib fig object ax Matplotlib ax object ''' return general_grouped_bar_chart( data=data, param_axis='y', index_axis='x', horizontal=False, standard_plot_size=(len(data)*1.5, 6.1), grid_x=False, **kwargs )
[docs] def bar_chart(data_in, xlab,**kwargs): """Creates a bar chart Parameters ----------- data : dataframe Data for the bar chart. The dataframe must have 2 columns, the first representing the y ticks, and the second representing the data xlab : str Label for the x axis. xmax : int, optional, default is the max s value The max value of the y axis xmin : int, optional, default is 0 The minimum value of the x axis precision : int, optional, default is -1 Decimal places in the annotations xinc : int, optional The increment of ticks on the x axis. Returns -------- fig Matplotlib fig object ax Matplotlib ax object """ func() data = data_in.copy(deep=True) data.columns = ['name', 'values1'] xmin = kwargs.get('xmin', 0) xmax = kwargs.get('xmax', None) precision = kwargs.get('precision', 0) xmax_flag = True if xmax == None: xmax = data['values1'].max() xmax_flag = False delta = (xmax - xmin)/4 i = 0 while True: if delta < 10: break delta /= 10 i += 1 xinc = kwargs.get('xinc', int(round(delta+1)*pow(10,i))) if xmax_flag == True: upper = xmax else: upper = int(4*xinc+xmin) ind = np.arange(len(data)) fig, ax = plt.subplots() fig.set_size_inches(6.1, len(data)*0.7) ax.grid(color='k', linestyle='-', linewidth=0.25) p2 = ax.barh(ind, data['values1'], 0.75, align='center', color = colour.purple) ax.xaxis.set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}')) ax.xaxis.grid(True) ax.yaxis.grid(False) ax.set_yticks(ind) ax.set_xlim(0,upper) ax.set_yticklabels(data['name']) ax.set_xlabel(xlab, horizontalalignment='left', x=0, labelpad=10, fontname = font.normal, fontsize=10, fontweight = 'bold') ax.set_facecolor('xkcd:white') j=0 if precision < 1: data['values1'] = data['values1'].astype(int) j=0 for i in data['values1']: if i < 0.1*upper: ax.annotate(str(format(round(i,precision), ',')), xy=(i+0.015*upper, j-0.05), ha = 'left', color = 'k', fontname = font.normal, fontsize=10) else: ax.annotate(str(format(round(i,precision), ',')), xy=(i-0.015*upper, j-0.05), ha = 'right', color = 'w', fontname = font.normal, fontsize=10) j=j+1 plt.xticks(range(xmin,upper+int(0.1*xinc), xinc), fontname = font.normal, fontsize =10) plt.yticks( fontname = font.normal, fontsize =10) return fig, ax
[docs] def multi_linechart(data:pd.DataFrame, ylab:str, xlab:str, **kwargs:dict) -> (plt.figure, plt.axes): ''' Creates a line chart of one or more lines. Number of lines to plot determined from columns in input dataframe. Parameters ---------- data : pd.DataFrame Data for the line chart. ylab : str Label for the y axis. xlab : str Label for the x axis. ymax : float, optional The max value of the y axis. ymin : float, optional The min value of the y axis. Should include this if ymin < 0. yinc : float, optional The increment of ticks on the y axis. ax : plt.axes, optional The axis that the plot will be located on. plot_size : (int, int), optional The dimensions of the plot if given a custom size. minor_x : bool, optional When set to True, a minor grid is added to the plot along x axis. minor_y : bool, optional When set to True, a minor grid is added to the plot along y axis. num_minor_x: int, optional The number of minor ticks between major ticks along the x axis. num_minor_y: int, optional The number of minor ticks between major ticks along the y axis. shaded_areas : dict[(str, str): (Any, Any)], optional Start and end x coordinates indicate range of shaded region and must be specified. Label can be specified or left as None. Colour can be specified or left as None in which case light grey is used by default. Returns -------- fig Matplotlib fig object ax Matplotlib ax object ''' func() ymax, ymin, yinc, upper = calculate_params( df=data, param_axis ='y', **kwargs ) fig, ax = plot_line_data( df=data, axis=kwargs.get('ax',None), legend=kwargs.get('legend',None) ) set_plot_style( fig=fig, ax=ax, plot_size=kwargs.get('plot_size', (6.1, 4.1)), grid_x=True, grid_y=True, grid_minor_x=kwargs.get('minor_x',False), grid_minor_y=kwargs.get('minor_y',False), min_value=ymin, max_value=ymax, param_axis='y' ) set_ticks( ax=ax, df=data, min_value=ymin, max_value=ymax, inc=yinc, minor_x=kwargs.get('minor_x',False), minor_y=kwargs.get('minor_y',False), num_minor_x=kwargs.get('num_minor_x',None), num_minor_y=kwargs.get('num_minor_y',None) ) set_labels( ax=ax, xlab=xlab, ylab=ylab ) add_shaded_areas( ax=ax, df=data, shaded_areas=kwargs.get('shaded_areas', None) ) return fig, ax
[docs] def calculate_params(df:pd.DataFrame, param_axis:str, **kwargs:dict) -> (float, float, float, float): ''' Checks if minimum, maximum and increment values are passed into the plotting function for the specified axis, and returns these. Otherwise, calculates them. Parameters ---------- df : pd.DataFrame Data for the line chart. param_axis : str Axis along which max, min, upper, inc values should be calculated. xmin : float, optional Minimum value of x axis. xmax : float, optional Maximum value of x axis. xinc : float, optional The increment of ticks on the x axis. ymin : float, optional Minimum value of y axis ymax : float, optional Maximum value of y axis. yinc : float, optional The increment of ticks on the y axis. Returns ------- max_value : float Maximum value along param_axis min_value : float Minimum value along param_axis inc : float Increment of ticks along param_axis upper : float Value used for placing of annotations. ''' # TODO: check whether the calculation of inc can be improved, at what value does it fail, # maybe print a warning if the user should specify it. max_value = kwargs.get(f'{param_axis}max', int(df.max(axis=1).max(axis=0))) min_value = kwargs.get(f'{param_axis}min', 0) delta, i = calculate_delta(max_value, min_value) inc = kwargs.get(f'{param_axis}inc', int(round(delta+1)*pow(10,i))) if kwargs.get(f'{param_axis}max',None)==None: upper=max_value else: upper=int(min_value+4*inc) return max_value, min_value, inc, upper
[docs] def calculate_delta(max_value:float, min_value:float) -> (float, float): ''' Returns parameters used to find the size of the y or x axis increments. Parameters ---------- max_value : float Maximum value of data being plotted in non-index axis. min_value : float Minimum value of data being plotted in non-index axis. Returns ------- float The spacing between ticks in non-index axis. float Order of magnitude of spacing. ''' delta = (max_value - min_value)/4 i = 0 while True: delta /= 10 i += 1 if delta < 10: break return delta, i
[docs] def plot_line_data(df:pd.DataFrame, axis:plt.axes, legend:list[str]) -> (plt.figure, plt.axes): ''' Plots all columns in the input dataframe as lines in one graph on the specified axis object. Parameters ---------- df : pd.DataFrame Data to be plotted. axis : plt.axes Prespecified axis to be used for the plot. legend : list[str] List of labels to be used for the legend. Returns ------- fig Matplotlib fig object ax Matplotlib ax object ''' fig, ax = init_fig(axis) colour_instance = colour() lines = [] for i, col in enumerate(df.columns): hex_code = colour_instance.get_colour_from_index(i+1) lines.extend(ax.plot(df[col], linewidth=3, color=hex_code)) if legend != None: ax.legend(handles=lines, labels=legend, loc='upper left', bbox_to_anchor=(1.04, 1), frameon=False, prop=font.leg_font, borderpad=0 ) return fig, ax
[docs] def plot_grouped_bar_data(df:pd.DataFrame, ax:plt.axes, legend:list[str], horizontal:bool) -> (plt.figure, plt.axes): ''' Plots all columns in the input dataframe as bars in a grouped bar graph. Also adds a legend if a list of strings is provided. Parameters ---------- df : pd.DataFrame Input dataframe being plotted. ax : plt.axes Axis object to be used for the plot if specified by user. legend : list[str] List of labels to be used for the legend. horizontal : bool Flag indicating whether this is a horizontal plot. Returns ------- fig Matplotlib figure object. ax Matplotlib axis object. ''' fig, ax = init_fig(ax) bar_width = 1/(len(df.columns)+1) adjustment = 0 colour_instance = colour() ind = np.arange(len(df)) bars = [] for i, col in enumerate((reversed(df.columns)) if horizontal else (df.columns)): hex_code = colour_instance.get_colour_from_index(i+1) bars.append(getattr(ax, 'barh' if horizontal else 'bar')(ind+adjustment, df[col], bar_width, align='center', color = hex_code)) adjustment += bar_width if legend != None: ax.legend(handles=bars[::-1] if horizontal else bars, labels=legend , loc='upper left', bbox_to_anchor=(1.04, 1), frameon=False, prop=font.leg_font, borderpad=2 ) return fig, ax
[docs] def init_fig(axis:plt.axes) -> (plt.figure, plt.axes): ''' Sets the plot fig and axes objects to be the ones specified by the user or creates new ones. Parameters ---------- axis : plt.axes Axis object for the plot prespecified by the user. Returns ------- fig Matplotlib figure object. ax Matplotlib axis object. ''' if axis != None: ax = axis fig = ax.get_figure() else: fig, ax = plt.subplots() return fig, ax
[docs] def set_plot_style(fig:plt.figure, ax:plt.axes, plot_size:(int, int), grid_x:bool, grid_y:bool, min_value:float, max_value:float, param_axis:str, grid_minor_x:bool = False, grid_minor_y:bool = False )-> None: ''' Sets size, background and grid for plot. Parameters ---------- fig : plt.figure Figure object corresponding to plot. ax : plt.axes Axis object corresponding to plot. plot_size : (int, int) The dimensions of the plot in inches. grid_x : bool Whether there is a grid parallel to x ticks. grid_y : bool Whether there is a grid parallel to y ticks. min_value : float Minimum value of param_axis. max_value : float Maximum value of param_axis. param_axis : str Axis opposite to index axis. grid_minor_x : bool, optional When set to True, a minor grid is added to the plot along x axis. grid_minor_y : bool, optional When set to True, a minor grid is added to the plot along y axis. ''' fig.set_size_inches(plot_size) getattr(ax, 'set_ylim' if param_axis == 'y' else 'set_xlim')([min_value, max_value]) ax.set_facecolor('xkcd:white') set_grid(ax, grid_x, grid_y, grid_minor_x, grid_minor_y)
[docs] def set_grid(ax:plt.axes, grid_x:bool, grid_y:bool, grid_minor_x:bool, grid_minor_y:bool)-> None: ''' Sets the grid for plot. Parameters ---------- ax : plt.axes Axis object corresponding to plot. grid_x : bool Flag indicating whether to add a grid along the x axis. grid_y : bool Flag indicating whether to add a grid along the y axis. grid_minor_x : bool, optional When set to True, a minor grid is added to the plot along x axis. grid_minor_y : bool, optional When set to True, a minor grid is added to the plot along y axis. ''' # Minor ticks if grid_minor_x: ax.xaxis.grid(grid_minor_x, which='minor', color='k', linestyle='-', linewidth=0.05) if grid_minor_y: ax.yaxis.grid(grid_minor_y, which='minor', color='k', linestyle='-', linewidth=0.05) # Major ticks if grid_x: ax.xaxis.grid(grid_x, which='major', color='k', linestyle='-', linewidth=0.2) if grid_y: ax.yaxis.grid(grid_y, which='major', color='k', linestyle='-', linewidth=0.2)
[docs] def set_ticks(ax:plt.axes, df:pd.DataFrame, min_value:float, max_value:float, inc:float, index_axis:str='x', offset:float=0.0, minor_x:bool=False, minor_y:bool=False, num_minor_x:int=None, num_minor_y:int=None) -> None: ''' Sets x and y axis tick locations and tick labels. Parameters ---------- ax : plt.axes Matplotlib axes object. df : pd.DataFrame Dataset being plotted. min_value : float Minimum value along non index axis. max_value : float Maximum value along non index axis. inc : float Incrementation of ticks along non index axis. index_axis : str The index of the DataFrame object, e.g. the x axis for a line chart. Defaults to 'x'. offset : float Offset in the placement of ticks. Used for grouped bar charts to center labels. Defaults to 0.0. minor_x : bool, optional When set to True, a minor grid is added to the plot along x axis. minor_y : bool, optional When set to True, a minor grid is added to the plot along y axis. num_minor_x: int, optional The number of minor ticks between major ticks along the x axis. num_minor_y: int, optional The number of minor ticks between major ticks along the y axis. ''' NUM_SLICES = int(len(df)/8) # makes it so that there is 8 date labels along x axis # Checking if data being plotted is indexed by date # Assumption: dates are plotted on x axis if type(df.index) == pd.core.indexes.datetimes.DatetimeIndex: locs = mpl.dates.date2num(df.index)[::NUM_SLICES] label_rotation = 45 index_labels = df.index.strftime('%Y-%m-%d')[::NUM_SLICES] else: locs = [x+offset for x in np.arange(len(df.index))] label_rotation = 0 index_labels = df.index ####################### Minor ticks ######################### if minor_x: if num_minor_x!=None: ax.xaxis.set_minor_locator(mpl.ticker.AutoMinorLocator(num_minor_x)) else: ax.xaxis.set_minor_locator(mpl.ticker.AutoMinorLocator()) if minor_y: if num_minor_y!=None: ax.yaxis.set_minor_locator(mpl.ticker.AutoMinorLocator(num_minor_y)) else: ax.yaxis.set_minor_locator(mpl.ticker.AutoMinorLocator()) ####################### Major ticks ######################### # Set the locations for the ticks of the two axes getattr(ax, 'yaxis' if index_axis == 'y' else 'xaxis').set_major_locator(mpl.ticker.FixedLocator(locs)) getattr(ax, 'xaxis' if index_axis == 'y' else 'yaxis').set_major_formatter(mpl.ticker.StrMethodFormatter('{x:,.0f}')) # Set the formatting of the labels getattr(ax, 'set_xticks' if index_axis == 'y' else 'set_yticks')(range(min_value, max_value + inc, inc), labels=range(min_value, max_value + inc, inc), fontsize = 10, fontname = font.normal) getattr(ax, 'set_yticklabels' if index_axis == 'y' else 'set_xticklabels')(labels=index_labels, rotation=label_rotation, fontsize=10)
[docs] def set_labels(ax:plt.axes, xlab:str, ylab:str) -> None: ''' Set the labels of the y and x axes. Parameters ---------- ax : plt.axes Matplotlib axes object. xlab : str Label of x axis. ylab : str Label of y axis. ''' if xlab != None: ax.set_xlabel(xlab, fontsize=9, fontweight='bold', horizontalalignment='right', x=0, labelpad=10, fontname = font.normal) if ylab != None: ax.set_ylabel(ylab, fontsize=9, fontweight='bold', horizontalalignment='right', y=1.0, labelpad=10, fontname = font.normal)
[docs] def add_bar_annotations(ax:plt.axes, df:pd.DataFrame, upper:float, precision:int, percent:bool, horizontal:bool, additional_annotations:dict) -> None: ''' Adds bar annotations to bar charts, and other annotations if specified. Parameters ---------- ax : plt.axes Matplotlib axes object corresponding to plot. df : pd.DataFrame Dataset being plotted. upper : float Bar value used for determining placement of annotation. precision : int Decimal points in the annotations. percent : bool Flag determining whether to show percentage change between baseline column (assumed to be the first column) and remaining columns. horizontal : bool Flag to indicate if this is a horizontal bar graph. additional_annotations : dict Dictionary with keys of type (int, int) and values of type (str), indicating the coordinates and annotation to be added. ''' bar_width = 1/(len(df.columns)+1) if precision < 1: df[df.columns] = df[df.columns].astype(int) if horizontal: horizontal_bar_annotations(df, ax, bar_width, upper, precision, percent) else: vertical_bar_annotations(df, ax, bar_width, upper, precision, percent) # TODO: make this more customizable - add another function possibly? if additional_annotations != None: for xy, text in additional_annotations.items(): ax.annotate(text=text, xy=xy, ha = 'left', color = 'k', fontname = font.normal, fontsize=10)
[docs] def horizontal_bar_annotations(df:pd.DataFrame, ax:plt.axes, bar_width:float, upper:float, precision:int, percent:bool) -> None: ''' Adds value annotations to horizontal grouped or regular bar charts. Parameters ---------- df : pd.DataFrame Input dataframe being plotted. ax : plt.axes Axis object corresponding to the plot. bar_width : float The width of each bar. upper : float Bar value used for determining placement of annotation. precision : int Decimal points in the annotations. percent : bool Flag determining whether to show percentage change between baseline column (assumed to be the first column) and remaining columns. ''' HORIZONTAL_CUTOFF = 0.2 * upper ANNOTATION_OFFSET = 0.015 * upper PERCENT_HRZNTL_OFFSET = 0.04 PERCENT_VRTCL_OFFSET = (0.15) if len(df.columns) == 2 else (-0.05) # Adding annotations for values of each bar for k in range(len(df.columns)): j = bar_width*(len(df.columns)-1-k) for i in df[df.columns[k]]: xy = (i + ANNOTATION_OFFSET, j-0.05) if i < HORIZONTAL_CUTOFF else (i - ANNOTATION_OFFSET, j-0.05) ha = 'left' if i < HORIZONTAL_CUTOFF else 'right' color = 'k' if i < HORIZONTAL_CUTOFF else 'w' ax.annotate(str(format(round(i,precision), ',')), xy=xy, ha=ha, color=color, fontname=font.normal, fontsize=10) j+=1 # Adding percentage difference between 'baseline' bar and all other bars (optional) if percent: df_percent = copy.deepcopy(df) for k, col in enumerate(reversed(df.columns[1:])): df_percent[f'percent{k}'] = 100 * (df[col] - df[df.columns[0]]) / df[df.columns[0]] # percent change = 100*(col-baseline)/baseline j = bar_width*k for index, row in df_percent.iterrows(): ax.annotate( ('+' if row[f'percent{k}'] > 0 else '') + str(format(int(round(row[f'percent{k}'])), ',')) + '%', # Rounds percentage to closest integer xy = (row[col] + (4*PERCENT_HRZNTL_OFFSET if row[col] < HORIZONTAL_CUTOFF else PERCENT_HRZNTL_OFFSET) * upper, j + PERCENT_VRTCL_OFFSET), # Placement of percentage annotation color = 'k', fontname = font.normal, fontsize=10 ) j += 1
[docs] def vertical_bar_annotations(df:pd.DataFrame, ax:plt.axes, bar_width:float, upper:float, precision:int, percent:bool) -> None: ''' Adds value annotations to vertical grouped or regular bar charts. Parameters ---------- df : pd.DataFrame Input dataframe being plotted. ax : plt.axes Axis object corresponding to the plot. bar_width : float The width of each bar. upper : float Bar value used for determining placement of annotation. precision : int Decimal points in the annotations. percent : bool Flag determining whether to show percentage change between baseline column (assumed to be the first column) and remaining columns. ''' VERTICAL_CUTOFF = 0.1 * upper ANNOTATION_OFFSET = 0.015 * upper PERCENT_OFFSET = 0.03 # Adding annotations for values of each bar for k in range(len(df.columns)): j = bar_width*k for i in df[df.columns[k]]: xy = (j, i + ANNOTATION_OFFSET) if i < VERTICAL_CUTOFF else (j, i - ANNOTATION_OFFSET) va = 'top' if i >= VERTICAL_CUTOFF else 'center' color = 'w' if i >= VERTICAL_CUTOFF else 'k' ax.annotate(str(format(round(i, precision), ',')), xy=xy, ha='center', va=va, color=color, fontname=font.normal, fontsize=10) j += 1 # Adding percentage difference between 'baseline' bar and all other bars (optional) if percent: df_percent = copy.deepcopy(df) for k, col in enumerate(df.columns[1:]): df_percent[f'percent{k}'] = 100 * (df[col] - df[df.columns[0]]) / df[df.columns[0]] # percent change = 100*(col-baseline)/baseline j = bar_width * (k + 1) for index, row in df_percent.iterrows(): ax.annotate( ('+' if row[f'percent{k}'] > 0 else '') + str(format(int(round(row[f'percent{k}'])), ',')) + '%', # Rounds percentage to closest integer xy = (j, row[col] + (4*PERCENT_OFFSET if row[col] < VERTICAL_CUTOFF else PERCENT_OFFSET) * upper), # Placement of percentage annotation ha='center', color = 'k', fontname = font.normal, fontsize=10 ) j += 1
[docs] def add_shaded_areas(ax:plt.axes, df:pd.DataFrame, shaded_areas: dict) -> None: ''' Adds shaded areas to plot if specified by user. Paramaters ---------- df : pd.DataFrame Data for the grouped bar chart. ax : plt.axes Axis object being used. shaded_areas : dict Dictionary with the following format: {(label, colour): (x_start, x_end)}. Start and end x coordinates indicate range of shaded region and must be specified. Label can be specified or left as None. Colour can be specified or left as None in which case light grey is used by default. ''' if shaded_areas==None: return colour_instance = colour() for (label, color), location in shaded_areas.items(): color=colour_instance.light_grey if color==None else color ax.axvspan(location[0], location[1], alpha=0.2, color=color) # Show the label if it is not empty if label != None: ax.text(s=label, x=location[0], y=ax.get_ylim()[1], ha='left', va='top', rotation=90, fontname=font.normal, fontsize=9 )
[docs] def general_grouped_bar_chart(data:pd.DataFrame, param_axis:str, index_axis:str, standard_plot_size:(int, int), horizontal:bool, grid_x:bool=True, grid_y:bool=True, **kwargs:dict) -> (plt.figure, plt.axes): ''' Creates a horizontal or vertical grouped bar chart. Number of bars in each group to plot is determined from the number of columns in input dataframe, while the number of groups is determined by the number of rows. Parameters ----------- data : pd.DataFrame Data for the grouped bar chart. param_axis : str Axis along which bars are plotted. index_axis : str Axis containing labels for bars. standard_plot_size : (int, int) The standard size depending on the type of graph (horizontal or vertical). horizontal : bool Flag indicating whether plot is horizontal. ylab : str, optional Label for the y axis. xlab : str, optional Label for the x axis. xmax or ymax : float, optional The max value of the x or y axis. xmin or ymin : float, optional The minimum value of the x or y axis Should include this if minimum < 0. xinc or yinc : float, optional The increment of ticks on the x or y axis. ax : plt.axes, optional The axis that the plot will be located on. plot_size : (int, int), optional Custom plot dimensions. precision : int, optional Decimal points in the annotations. percent : bool, optional Flag determining whether to show percentage change between baseline column (assumed to be the first column) and remaining columns. additional_annotations : dict, optional Dictionary with keys of type (int, int) and values of type (str), indicating the coordinates and annotation to be added. legend : list[str], optional A list of labels to be used for the legend. Returns -------- fig Matplotlib fig object ax Matplotlib ax object ''' BAR_WIDTH = 1/(len(data.columns)+1) TICK_OFFSET = (len(data.columns)-1) * BAR_WIDTH/2 func() max_value, min_value, inc, upper = calculate_params( df=data, param_axis=param_axis, **kwargs ) fig, ax = plot_grouped_bar_data( df=data, ax=kwargs.get('ax', None), legend=kwargs.get('legend', None), horizontal=horizontal ) set_plot_style( fig=fig, ax=ax, min_value=min_value, max_value=max_value, param_axis=param_axis, plot_size=kwargs.get('plot_size', standard_plot_size), grid_x=grid_x, grid_y=grid_y ) set_ticks( ax=ax, min_value=min_value, max_value=max_value, inc=inc, index_axis=index_axis, df=data, offset=TICK_OFFSET ) set_labels( ax=ax, xlab=kwargs.get('xlab', None), ylab=kwargs.get('ylab', None) ) add_bar_annotations( ax=ax, df=data, upper=upper, horizontal=horizontal, precision=kwargs.get('precision', -1), percent=kwargs.get('percent', False), additional_annotations=kwargs.get('annotations', None) ) return fig, ax