Visualization & Report
Pandas
Profilers 📦
summarytools from summarytools import dfSummary, tabset
from sklearn.datasets import load_wine, load_iris
X1, y = load_wine(return_X_y = True, as_frame=True)
X2, y = load_iris(return_X_y = True, as_frame=True)
# multiple tabs
tabset({'Wine': dfSummary(X1).to_html()
, 'Iris': dfSummary(X2).to_html()})
ydata_profiling import pandas as pd
from ydata_profiling import ProfileReport
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=10000, n_features=2, n_redundant=0,
n_clusters_per_class=1, weights=[0.98,0.02], flip_y=0, random_state=1)
X = pd.DataFrame(X)
profile = ProfileReport(df, title="Profiling Report")
profile.to_notebook_iframe()
sweetviz 📦 import sweetviz as sv
report = sv.analyze(df, target_feat=["target"])
report.show_html('sweetviz_report.html')
sv.compare_intra(df,
mask_array, names=["A", "B"],
target_feat=["target"])
Styler & dataframe_image
Styler
styler
styler = df.style.background_gradient(cmap='RdYlGn', axis=0)\
.set_caption(f"{plant_id} Feature Importance Metrics")\
.format("{:.2f}")\
.set_caption(f"ICE classes {plant_id}")\
.set_table_styles([
{'selector': 'caption', 'props': "font-size: 20px; color: royalblue; background-color: gray;font-weight: bold;"},
{'selector': 'th','props': "font-size: 15px;border: 1px solid gray; background-color: black;"}
])\
.hide()
plotly import dataframe_image as dfi
dfi.export(styler, f"bqml_global_explains_{plant_id}.png",
table_conversion='matplotlib')ExcelWriter
writer = pd.ExcelWriter(f"{file_name}.xlsx", engine="openpyxl", mode="a", if_sheet_exists="replace")
styler.to_excel(writer, sheet_name=f"tables", index=False, freeze_panes=(1, 3))
writer.close()
Plottable
plottable from plottable import Table, ColDef
from plottable.cmap import centered_cmap, normed_cmap
from plottable.plots import bar, percentile_bars, percentile_stars, progress_donut
from plottable.formatters import decimal_to_percent
from plottable.plots import circled_image
import matplotlib.pyplot as plt
df = pd.DataFrame({'team': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'],
"judge" : ["Char"]*3 +["Amy"]*2 + ["Paul"]*3,
'points': [18, 22, 19, 14, 14, 11, 20, 28],
'assists': [5, 7, 7, 9, 12, 9, 9, 4],
'rebounds': [11, 8, 10, 6, 6, 5, 9, 12],
"luck" : (np.random.rand(8)-0.5).round(2),
"penalty" : (np.random.randn(8)*2).round(2),
"coor": [str((0,int(np.random.randn(1).round(0)))) for i in range(8)]
}
)
df.set_index("team",inplace=True)
Table index_col params = {
"row_dividers":False, "odd_row_color":"white", "even_row_color":"snow",
"textprops":{"ha": "center", "fontname": "Sans","fontweight":"normal"},
"cell_kw":{ "edgecolor": "lightgray", "linewidth": 0.1,},
"index_col":"team"
}
cmap = LinearSegmentedColormap.from_list(
name="", colors=["blue", "gray", "green"], N=100)
tab = Table(df,**params,)row_dividers odd_row_color even_row_color fig, ax = plt.subplots(figsize=(7, 8))
tab = Table(df,**params)
# column labels
tab.col_label_row.set_facecolor("k")
tab.col_label_row.set_fontcolor("w")
# index labels
tab.columns["team"].set_facecolor("lightgray")
tab.columns["team"].set_linewidth(0);
ColDef column_definitions = [
ColDef("team", border="right",title="", width=0.6, textprops={"ha": "center"}),
ColDef("points", title= "rank", formatter=lambda x : list(np.sort(-1*df["points"])).index(-x)+1,group="key stats"),
ColDef("assists",cmap=centered_cmap(df["assists"],cmap="RdBu",num_stds=2.5,center=5),group="key stats"),
ColDef("rebounds", cmap=normed_cmap(df["rebounds"],cmap="RdBu",num_stds=2.5),group="key stats"),
ColDef("penalty", cmap=plt.cm.RdYlGn, textprops={"fontweight":"bold"}),
ColDef("luck",text_cmap=cmap, formatter=lambda x : f"{x:+2.0%}",group="☯"),]
params["column_definitions"] = column_definitions
tab = Table(df,**params)
tab.autoset_fontcolors()plt.plot(np.random.rand(3));plt.savefig("1.png");plt.clf(); fig,ax = plt.subplots(figsize=(6,6))
df["rand2"] = df.apply(lambda x: np.random.rand(),axis=1)
df["rand3"] = df.apply(lambda x: np.random.rand(),axis=1)
df["judge"] = "1.png"
def custom_plot_fn(ax,val):
ax.plot(eval(val)*np.random.randint(1,10))
params["column_definitions"] += [ColDef("judge",plot_fn=circled_image),
ColDef("luck", textprops={"ha": "center", "bbox": {"boxstyle": "circle", "pad": 0.15},}, cmap=normed_cmap(df["luck"],cmap="RdBu",num_stds=2.5)),
ColDef("coor", plot_fn= custom_plot_fn),
ColDef("rand2", plot_fn=bar,plot_kw={"cmap":plt.cm.RdYlGn,"plot_bg_bar": True, "height": 0.5, "lw": 0.5, "annotate": False,"formatter": decimal_to_percent}),
ColDef("rand3",plot_fn=progress_donut,plot_kw={"is_pct": True,"formatter": "{:.0%}"},)
]
tab = Table(df,**params,)
Matplotlib
rcParams & cm & colors
rcParams import matplotlib.pyplot as plt
from matplotlib import rcParams
plt.style.use('seaborn-whitegrid')
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = "17"
plt.rcParams['lines.linewidth'] = 2
plt.rcParams['lines.markersize'] = 1
plt.rcParams["figure.figsize"] = (20,10)
import seaborn as sns
sns.set_style('whitegrid')
matplotlib.cm import matplotlib.cm as cm
print(cm.RdYlGn([-100,2,3,100],alpha=0.5))
plt.imshow(np.array([[100,1,2,3,-100]]),cmap=cm.RdYlGn_r)
colors.to_hex import matplotlib.colors as colors
colors.to_hex((1,0.5,1,0.5), keep_alpha=True) # '#ff80ff80'
colors.to_hex("red", keep_alpha=False) # '#ff0000'
pyplot 📦
subplots suptitle fig, axes = plt.subplots(2,3) # 2 ROW 3 COL
fig.suptitle("2 by 3")
def f(ax):
ax.plot([2,2])
ax.set_title("small title")
f(axes[0,0])
f(axes[1,2])
plt.tight_layout()
Functional x = range(1, 10)
y = np.random.randn(9)
plt.bar(x,y)
plt.text(3,-1.2,
r"$\exp(-t)$",
fontdict={"color":"royalblue"})
plt.xlabel("x",
fontdict={"weight":"bold",
"color":"darkred"})
plt.xticks(range(1,10,2),
range(11,20,2),
rotation=45)
plt.ylabel("y",
fontdict = {'family' : "serif"})
plt.title("this is title",
fontdict = {'size' : 25});
plt.savefig("plot.png", dpi=60,bbox_inches='tight')
Object Oriented
fig = plt.figure()
ax = fig.add_axes([0, 0, 0.6, 0.6])
plt.plot(y, "r-.")
ax.set_ylim([-2,2]) # y axis range
ax.set_xticks(np.arange(0,8,2)) # ticks locs
ax.set_xticklabels([i*"*" for i in np.arange(0,8,2)],
rotation=45,
fontsize="large")
ax.set_title("Plotting")
ax.set_xlabel("X-axis")
ax.set_ylabel("Y-axis")
ax.annotate("Annotation",
xy=(0,0), xytext=(0.5,0.5),
arrowprops=dict(facecolor='black',
shrink=0.05))
ax2 = fig.add_axes([0.7, 0.7, 0.1, 0.1])
ax2.grid() # turn off grid
plot x = range(1, 10)
y = np.random.randn(9)
plt.plot(x,y,ds='steps',lw=5, marker = "+", mew=20)
plt.plot(x,y,ls='--', marker="1", mew=20 , lw=5)
bar x = range(1, 10)
y = np.random.randn(9)
wide = np.random.uniform(0, 0.5, 9)
color = ['r', 'g']
plt.bar(x, y, width=wide, color=color, align='center')
imshow x = np.linspace(-8,8,8)
xs, ys = np.meshgrid(x,x)
z = np.sqrt(xs**2+ys**2)
plt.imshow(z,cmap=plt.cm.rainbow)
plt.colorbar()
Seaborn 📦
Google
Colab 🔗
pairwise n^2 df = sns.load_dataset("penguins")
corr = df.corr(numeric_only=True)
sns.pairplot(df)
sns.clustermap(corr,annot=True,cmap="RdYlGn")
sns.heatmap(corr,annot=True,cmap="RdYlGn")
FacetGrid 2^4 tips = sns.load_dataset("tips")
g = sns.FacetGrid(tips,col="time", row="sex")
g.map(sns.scatterplot, "total_bill", "tip")
boxplot n sns.boxplot(data=df, log_scale=True)
histplot sns.histplot(np.random.randn(100),bins="auto",kde=True)
regplot x = np.random.randn(100)
sns.regplot(x=x,
y=3*x+np.random.randn(100),
color="darkgray",
scatter=True)
Celluloid & Imageio📦
Camera from celluloid import Camera
fig = plt.figure()
camera = Camera(fig)
for i in range(10):
plt.plot([i] * 10)
camera.snap()
animation = camera.animate()
animation.save('celluloid_minimal.gif', writer = "pillow")
Imageio import imageio
images = []
for year in sorted(list(x for x in df.p_year.unique() if type(x) == np.int64)):
images.append(imageio.imread(f"plots/year_{year}.png"))
imageio.mimsave("weather_stations.gif", images, duration=5)
Plotly
Favorites
pandas.DataFrame.plot fig = df.plot(x="A", y=["B","C"], backend="plotly",
color="anomaly", kind="scatter",
# facet_row="data_source",
facet_col="Hour", facet_col_wrap=4,
width=3000,height=1000)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.add_hline(y=0, line_dash="dash", line_color="black")
import plotly.graph_objects as go
trace = go.Scatter(x=df["windSpd_binned"], y=df["mw"],
mode="lines", line=dict(color="black", width=2),
name="Average")
trace.update(legendgroup="Average", showlegend=False)
fig.add_trace(trace, row="all", col="all", exclude_empty_subplots=True)
trace.update(legendgroup="Average", showlegend=True)
fig.add_trace(trace, row=len(df["year"].unique()), col=len(df["month"].unique()))
fig.show()
secondary_y fig0 = make_subplots(specs=[[{"secondary_y": True}]])
# line
fig0.add_trace(go.Scatter(x=df["datetime_lcl"], y=df["predicted_mw"], mode='lines', name='predicted_mw', customdata=df[customdata], line_color='red', opacity=0.25),
secondary_y=False)
fig0.add_trace(go.Scatter(x=df["datetime_lcl"], y=df["radSolar"], mode='lines', name='radSolar', opacity=0.25),
secondary_y=True)
fig0.update_layout(title=f"{plant_id} actual vs predict by datetime_lcl", xaxis_title="datetime_lcl")
fig0.update_yaxes(title_text="power", secondary_y=False)
fig0.update_yaxes(title_text="radSolar", secondary_y=True)
fig0.show()
hovertemplate customdata = ["HE",'baseline_prediction_value','top_feature', 'top_feature_value', 'second_feature', 'second_feature_value']
fig0.add_trace(go.Scatter(x=df["datetime_lcl"], y=df["predicted_mw"], mode='lines', customdata=df[customdata]))
fig0.update_traces(hovertemplate=" ".join([
"datetime: %{x}", "HE: %{customdata[0]}",
"---------------------",
"mw_pred: %{y}", "baseline_prediction_value: %{customdata[1]}",
"-----------------",
"top_feature: %{customdata[2]}",
"top_feature_value: %{customdata[3]}",
"second_feature: %{customdata[4]}",
"second_feature_value: %{customdata[5]}"
]), )
GeoSpatial Data
Geopandas & h3Pandas & Folium
GeoDataFrame import geopandas as gpd
import h3pandas
import folium
data = {
"City": ["Buenos Aires", "Brasilia", ],
"Country": ["Argentina", "Brazil", ],
"Latitude": [-34.58, -34.78, ],
"Longitude": [-58.66, -58.91,],
"Value": np.random.randint(0, 100, 2)
}
df = pd.DataFrame(data)
gdf = gpd.GeoDataFrame(
data, geometry=gpd.points_from_xy(data.get("Longitude"), data.get("Latitude")))
gdf = gdf.set_crs("EPSG:4326")
fig, axes = plt.subplots(1, 3, figsize=(18, 9))
gdf.plot(column="Value", ax=axes[0])
df.h3.geo_to_h3(8,lng_col="Longitude",lat_col="Latitude").h3.h3_to_geo_boundary().plot(ax=axes[1],column="Value")
gdf.h3.geo_to_h3(8)[["Value"]].h3.k_ring_smoothing(
weights=[0.75**i for i in range(5)]).plot(column="Value", ax=axes[2])
folium iframe popup map = folium.Map(location=df[["Latitude","Longitude"]].mean(), zoom_start=11)
map.add_child(folium.LatLngPopup())
group1 = folium.FeatureGroup(name="group1",show=False)
df_agg = gdf.h3.geo_to_h3(8)[["Value"]].h3.k_ring_smoothing(
weights=[0.75**i for i in range(5)]).reset_index()
cp = folium.Choropleth(
geo_data=df_agg[["h3_hex_ring", "geometry"]],
data=df_agg, columns=["h3_hex_ring", "Value"],
key_on="feature.properties.h3_hex_ring",
fill_color="Reds", fill_opacity=0.7, line_opacity=0.2,
legend_name="Value", highlight=True,
)
for key in cp._children:
if key.startswith('color_map'):
del(cp._children[key])
cp.add_to(map)
for i,(lat,lng) in enumerate(zip(data["Latitude"],data["Longitude"])):
iframe = folium.IFrame(html="Amenities", width="auto", height="auto")
popup = folium.Popup(iframe,sticky=True)
if i==0:
folium.CircleMarker(location=[lat,lng], color="blue", radius=10, weight=2,popup=popup, ).add_to(group1)
else:
folium.Marker(location=[lat, lng], popup=popup, icon=folium.DivIcon(html=f"""âš¡""")).add_to(map)
folium.PolyLine(list(zip(data["Latitude"],data["Longitude"])),color="royalblue").add_to(group1)
group1.add_to(map)
map.add_child(folium.LayerControl())
map
Folium
Image in Popup import base64
encoded = base64.b64encode(open('1.png', 'rb').read())
html = "< img src='data:image/png;base64,{}'>".format
popup = folium.Popup(
folium.IFrame(
html=html(encoded.decode('UTF-8')),
width=700, height="50%",figsize=(12,9)),
max_height="60%")
Fullscreen from folium.plugins import Fullscreen
Fullscreen().add_to(map)