Visualization & Report

Pandas

Profilers 📦

summarytoolsfrom summarytools import dfSummary, tabset
from sklearn.datasets import load_wine, load_iris
X1, y = load_wine(return_X_y = True, as_frame=True)
X2, y = load_iris(return_X_y = True, as_frame=True)

# multiple tabs
tabset({'Wine': dfSummary(X1).to_html()
        , 'Iris': dfSummary(X2).to_html()})
ydata_profilingimport pandas as pd
from ydata_profiling import ProfileReport
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=10000, n_features=2, n_redundant=0,
    n_clusters_per_class=1, weights=[0.98,0.02], flip_y=0, random_state=1)
X = pd.DataFrame(X)
profile = ProfileReport(df, title="Profiling Report")
profile.to_notebook_iframe()
sweetviz 📦import sweetviz as sv
report = sv.analyze(df, target_feat=["target"])
report.show_html('sweetviz_report.html')

sv.compare_intra(df,
    mask_array, names=["A", "B"],
    target_feat=["target"])

Styler & dataframe_image

Styler

styler
styler = df.style.background_gradient(cmap='RdYlGn', axis=0)\
                .set_caption(f"{plant_id} 
Feature Importance Metrics")\ .format("{:.2f}")\ .set_caption(f"ICE classes {plant_id}")\ .set_table_styles([ {'selector': 'caption', 'props': "font-size: 20px; color: royalblue; background-color: gray;font-weight: bold;"}, {'selector': 'th','props': "font-size: 15px;border: 1px solid gray; background-color: black;"} ])\ .hide()
plotlyimport dataframe_image as dfi
dfi.export(styler, f"bqml_global_explains_{plant_id}.png",
            table_conversion='matplotlib')ExcelWriter
writer = pd.ExcelWriter(f"{file_name}.xlsx", engine="openpyxl", mode="a", if_sheet_exists="replace")
styler.to_excel(writer, sheet_name=f"tables", index=False, freeze_panes=(1, 3))
writer.close()

Plottable

plottablefrom plottable import Table, ColDef
from plottable.cmap import centered_cmap, normed_cmap
from plottable.plots import bar, percentile_bars, percentile_stars, progress_donut
from plottable.formatters import decimal_to_percent
from plottable.plots import circled_image
import matplotlib.pyplot as plt

df = pd.DataFrame({'team': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'],
                    "judge" : ["Char"]*3 +["Amy"]*2 + ["Paul"]*3,
                    'points': [18, 22, 19, 14, 14, 11, 20, 28],
                    'assists': [5, 7, 7, 9, 12, 9, 9, 4],
                    'rebounds': [11, 8, 10, 6, 6, 5, 9, 12],
                    "luck" : (np.random.rand(8)-0.5).round(2),
                    "penalty" : (np.random.randn(8)*2).round(2),
                    "coor": [str((0,int(np.random.randn(1).round(0)))) for i in range(8)]
                    }
                    )
df.set_index("team",inplace=True)
Tableindex_colparams = {
    "row_dividers":False, "odd_row_color":"white", "even_row_color":"snow",
    "textprops":{"ha": "center", "fontname": "Sans","fontweight":"normal"},
    "cell_kw":{ "edgecolor": "lightgray", "linewidth": 0.1,},
    "index_col":"team"
}
cmap = LinearSegmentedColormap.from_list(
            name="", colors=["blue", "gray", "green"], N=100)

tab = Table(df,**params,)
row_dividersodd_row_coloreven_row_colorfig, ax = plt.subplots(figsize=(7, 8)) tab = Table(df,**params) # column labels tab.col_label_row.set_facecolor("k") tab.col_label_row.set_fontcolor("w") # index labels tab.columns["team"].set_facecolor("lightgray") tab.columns["team"].set_linewidth(0);
ColDefcolumn_definitions = [
    ColDef("team", border="right",title="", width=0.6, textprops={"ha": "center"}),
    ColDef("points", title= "rank", formatter=lambda x : list(np.sort(-1*df["points"])).index(-x)+1,group="key stats"),
    ColDef("assists",cmap=centered_cmap(df["assists"],cmap="RdBu",num_stds=2.5,center=5),group="key stats"),
    ColDef("rebounds", cmap=normed_cmap(df["rebounds"],cmap="RdBu",num_stds=2.5),group="key stats"),
    ColDef("penalty", cmap=plt.cm.RdYlGn, textprops={"fontweight":"bold"}),
    ColDef("luck",text_cmap=cmap, formatter=lambda x : f"{x:+2.0%}",group="☯"),]

params["column_definitions"] = column_definitions
tab = Table(df,**params)
tab.autoset_fontcolors()plt.plot(np.random.rand(3));plt.savefig("1.png");plt.clf(); fig,ax = plt.subplots(figsize=(6,6))
df["rand2"] = df.apply(lambda x: np.random.rand(),axis=1)
df["rand3"] = df.apply(lambda x: np.random.rand(),axis=1)
df["judge"] = "1.png"

def custom_plot_fn(ax,val):
    ax.plot(eval(val)*np.random.randint(1,10))

params["column_definitions"] += [ColDef("judge",plot_fn=circled_image),
                        ColDef("luck", textprops={"ha": "center", "bbox": {"boxstyle": "circle", "pad": 0.15},}, cmap=normed_cmap(df["luck"],cmap="RdBu",num_stds=2.5)),
                        ColDef("coor", plot_fn= custom_plot_fn),
                        ColDef("rand2", plot_fn=bar,plot_kw={"cmap":plt.cm.RdYlGn,"plot_bg_bar": True, "height": 0.5, "lw": 0.5, "annotate": False,"formatter": decimal_to_percent}),
                        ColDef("rand3",plot_fn=progress_donut,plot_kw={"is_pct": True,"formatter": "{:.0%}"},)
                        ]

tab = Table(df,**params,)

Matplotlib

rcParams & cm & colors

rcParamsimport matplotlib.pyplot as plt
from matplotlib import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = "17"
plt.rcParams['lines.linewidth'] = 2
plt.rcParams['lines.markersize'] = 1
plt.rcParams["figure.figsize"] =  (20,10)

import seaborn as sns
sns.set_style('whitegrid')
matplotlib.cmimport matplotlib.cm as cm
print(cm.RdYlGn([-100,2,3,100],alpha=0.5))
plt.imshow(np.array([[100,1,2,3,-100]]),cmap=cm.RdYlGn_r)

colors.to_heximport matplotlib.colors as colors
colors.to_hex((1,0.5,1,0.5), keep_alpha=True) # '#ff80ff80'
colors.to_hex("red", keep_alpha=False) # '#ff0000'

pyplot 📦

subplotssuptitlefig, axes = plt.subplots(2,3) # 2 ROW 3 COL
fig.suptitle("2 by 3")

def f(ax):
    ax.plot([2,2])
    ax.set_title("small title")

f(axes[0,0])
f(axes[1,2])
plt.tight_layout()
Functionalx = range(1, 10)
y = np.random.randn(9)

plt.bar(x,y)
plt.text(3,-1.2,
            r"$\exp(-t)$",
            fontdict={"color":"royalblue"})

plt.xlabel("x",
            fontdict={"weight":"bold",
                        "color":"darkred"})
plt.xticks(range(1,10,2),
            range(11,20,2),
            rotation=45)
plt.ylabel("y",
            fontdict = {'family' : "serif"})
plt.title("this is title",
            fontdict = {'size' : 25});

plt.savefig("plot.png", dpi=60,bbox_inches='tight')
Object Oriented
fig = plt.figure()
ax = fig.add_axes([0, 0, 0.6, 0.6])

plt.plot(y, "r-.")

ax.set_ylim([-2,2]) # y axis range
ax.set_xticks(np.arange(0,8,2)) # ticks locs
ax.set_xticklabels([i*"*" for i in np.arange(0,8,2)],
                    rotation=45,
                    fontsize="large")
ax.set_title("Plotting")

ax.set_xlabel("X-axis")
ax.set_ylabel("Y-axis")
ax.annotate("Annotation",
    xy=(0,0), xytext=(0.5,0.5),
    arrowprops=dict(facecolor='black',
                        shrink=0.05))
ax2 = fig.add_axes([0.7, 0.7, 0.1, 0.1])
ax2.grid() # turn off grid
plotx = range(1, 10)
y = np.random.randn(9)

plt.plot(x,y,ds='steps',lw=5, marker = "+", mew=20)
plt.plot(x,y,ls='--', marker="1", mew=20 , lw=5)
barx = range(1, 10)
y = np.random.randn(9)

wide = np.random.uniform(0, 0.5, 9)
color = ['r', 'g']

plt.bar(x, y, width=wide, color=color, align='center')
imshowx = np.linspace(-8,8,8)
xs, ys = np.meshgrid(x,x)
z = np.sqrt(xs**2+ys**2)

plt.imshow(z,cmap=plt.cm.rainbow)
plt.colorbar() 

Seaborn 📦

Google Colab 🔗
pairwise n^2df = sns.load_dataset("penguins")
corr = df.corr(numeric_only=True)
sns.pairplot(df)
sns.clustermap(corr,annot=True,cmap="RdYlGn")
sns.heatmap(corr,annot=True,cmap="RdYlGn")
FacetGrid 2^4tips = sns.load_dataset("tips")
g = sns.FacetGrid(tips,col="time", row="sex")
g.map(sns.scatterplot, "total_bill", "tip")
boxplot nsns.boxplot(data=df, log_scale=True)
histplotsns.histplot(np.random.randn(100),bins="auto",kde=True)
regplotx = np.random.randn(100)
sns.regplot(x=x,
            y=3*x+np.random.randn(100),
            color="darkgray",
            scatter=True)

Celluloid & Imageio📦

Camerafrom celluloid import Camera

fig = plt.figure()
camera = Camera(fig)
for i in range(10):
    plt.plot([i] * 10)
    camera.snap()
animation = camera.animate()
animation.save('celluloid_minimal.gif', writer = "pillow")
Imageioimport imageio
images = []
for year in sorted(list(x for x in df.p_year.unique() if type(x) == np.int64)):
    images.append(imageio.imread(f"plots/year_{year}.png"))
imageio.mimsave("weather_stations.gif", images, duration=5)

Plotly

Favorites

pandas.DataFrame.plotfig = df.plot(x="A", y=["B","C"], backend="plotly",
    color="anomaly", kind="scatter",
    # facet_row="data_source",
    facet_col="Hour", facet_col_wrap=4,
    width=3000,height=1000)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

fig.add_hline(y=0, line_dash="dash", line_color="black")

import plotly.graph_objects as go
trace = go.Scatter(x=df["windSpd_binned"], y=df["mw"],
    mode="lines", line=dict(color="black", width=2),
    name="Average")
trace.update(legendgroup="Average", showlegend=False)
fig.add_trace(trace, row="all", col="all", exclude_empty_subplots=True)
trace.update(legendgroup="Average", showlegend=True)
fig.add_trace(trace, row=len(df["year"].unique()), col=len(df["month"].unique()))
fig.show()
secondary_yfig0 = make_subplots(specs=[[{"secondary_y": True}]])

# line
fig0.add_trace(go.Scatter(x=df["datetime_lcl"], y=df["predicted_mw"], mode='lines', name='predicted_mw', customdata=df[customdata], line_color='red', opacity=0.25),
                secondary_y=False)

fig0.add_trace(go.Scatter(x=df["datetime_lcl"], y=df["radSolar"], mode='lines', name='radSolar', opacity=0.25),
                secondary_y=True)

fig0.update_layout(title=f"{plant_id} actual vs predict by datetime_lcl", xaxis_title="datetime_lcl")
fig0.update_yaxes(title_text="power", secondary_y=False)
fig0.update_yaxes(title_text="radSolar", secondary_y=True)
fig0.show()
hovertemplatecustomdata  = ["HE",'baseline_prediction_value','top_feature', 'top_feature_value',  'second_feature', 'second_feature_value']

fig0.add_trace(go.Scatter(x=df["datetime_lcl"], y=df["predicted_mw"], mode='lines', customdata=df[customdata]))
fig0.update_traces(hovertemplate="
".join([ "datetime: %{x}", "HE: %{customdata[0]}", "---------------------", "mw_pred: %{y}", "baseline_prediction_value: %{customdata[1]}", "-----------------", "top_feature: %{customdata[2]}", "top_feature_value: %{customdata[3]}", "second_feature: %{customdata[4]}", "second_feature_value: %{customdata[5]}" ]), )

GeoSpatial Data

Geopandas & h3Pandas & Folium

GeoDataFrameimport geopandas as gpd
import h3pandas
import folium

data = {
    "City": ["Buenos Aires", "Brasilia", ],
    "Country": ["Argentina", "Brazil", ],
    "Latitude": [-34.58, -34.78, ],
    "Longitude": [-58.66, -58.91,],
    "Value": np.random.randint(0, 100, 2)
}

df = pd.DataFrame(data)
gdf = gpd.GeoDataFrame(
    data, geometry=gpd.points_from_xy(data.get("Longitude"), data.get("Latitude")))
gdf = gdf.set_crs("EPSG:4326")

fig, axes = plt.subplots(1, 3, figsize=(18, 9))
gdf.plot(column="Value", ax=axes[0])
df.h3.geo_to_h3(8,lng_col="Longitude",lat_col="Latitude").h3.h3_to_geo_boundary().plot(ax=axes[1],column="Value")
gdf.h3.geo_to_h3(8)[["Value"]].h3.k_ring_smoothing(
    weights=[0.75**i for i in range(5)]).plot(column="Value",  ax=axes[2])
foliumiframepopupmap = folium.Map(location=df[["Latitude","Longitude"]].mean(), zoom_start=11)
map.add_child(folium.LatLngPopup())

group1 = folium.FeatureGroup(name="group1",show=False)

df_agg = gdf.h3.geo_to_h3(8)[["Value"]].h3.k_ring_smoothing(
                weights=[0.75**i for i in range(5)]).reset_index()

cp = folium.Choropleth(
    geo_data=df_agg[["h3_hex_ring", "geometry"]],
    data=df_agg, columns=["h3_hex_ring", "Value"],
    key_on="feature.properties.h3_hex_ring",
    fill_color="Reds", fill_opacity=0.7, line_opacity=0.2,
    legend_name="Value", highlight=True,
)
for key in cp._children:
    if key.startswith('color_map'):
        del(cp._children[key])
cp.add_to(map)


for i,(lat,lng) in enumerate(zip(data["Latitude"],data["Longitude"])):
    iframe = folium.IFrame(html="Amenities", width="auto", height="auto")
    popup = folium.Popup(iframe,sticky=True)
    if i==0:
        folium.CircleMarker(location=[lat,lng], color="blue", radius=10, weight=2,popup=popup, ).add_to(group1)
    else:
        folium.Marker(location=[lat, lng], popup=popup, icon=folium.DivIcon(html=f"""âš¡""")).add_to(map)

folium.PolyLine(list(zip(data["Latitude"],data["Longitude"])),color="royalblue").add_to(group1)

group1.add_to(map)
map.add_child(folium.LayerControl())

map

Folium

Image in Popupimport base64
encoded = base64.b64encode(open('1.png', 'rb').read())
html = "< img src='data:image/png;base64,{}'>".format
popup = folium.Popup(
    folium.IFrame(
        html=html(encoded.decode('UTF-8')),
        width=700, height="50%",figsize=(12,9)),
    max_height="60%")
    
Fullscreenfrom folium.plugins import Fullscreen
Fullscreen().add_to(map)