import lux
import pandas as pd
from pycaret.datasets import get_data
from lux.vis.VisList import VisList
lux.config.default_display = "lux"
# Record version of key libraries
from importlib.metadata import version
print('lux-api==%s' % version('lux-api'))
lux-api==0.3.2
# Select a pre-packaged data for testing
df = get_data('diabetes', verbose=False)
# Display general plots for a dataframe
# NOTE - Lux will give an option to replace standard pandas dataframe display with Lux EDA plots display
# NOTE - when using large data, Lux will automatically take a random sample of data. use lux.config.sampling = False to disable it
df
# Set an user intent column for Lux to do recommended plots - i.e. plots pivoted with other variables
# NOTE - Enhance tab adds an additional attribute to the current selection, essentially highlighting how additional variables affect the relationship
# NOTE - Filter tab adds a filter to the current selection, while keeping attributes (on the X and Y axes) fixed
# NOTE - Generalize tab removes an attribute to display a more general trend
df.intent = ['Class variable'] # can specify multiple columns
df
# Create specific plots
VisList(["Number of times pregnant=?","Age (years)"], df)
# Select plots to be exported
# NOTE - plot selection is done by selecting plots and clicking exports in Lux GUI
df
# Get selected plots in variables
vis = df.exported[0]
vis
# Export code to generate plot
print(vis.to_code("matplotlib"))
import matplotlib.pyplot as plt plt.rcParams.update( { "axes.titlesize": 20, "axes.titleweight": "bold", "axes.labelweight": "bold", "axes.labelsize": 16, "legend.fontsize": 14, "legend.title_fontsize": 15, "xtick.labelsize": 13, "ytick.labelsize": 13, } ) import numpy as np from math import nan df = pd.DataFrame({'Number of times pregnant': {3: 1, 5: 2, 0: 0, 7: 3, 9: 4, 1: 0, 11: 5, 13: 6, 2: 1, 6: 3}, 'Class variable': {3: 0, 5: 0, 0: 0, 7: 0, 9: 0, 1: 1, 11: 0, 13: 0, 2: 1, 6: 1}, 'Record': {3: 106.0, 5: 84.0, 0: 73.0, 7: 48.0, 9: 45.0, 1: 38.0, 11: 36.0, 13: 34.0, 2: 29.0, 6: 27.0}}) fig, ax = plt.subplots() bars = df['Number of times pregnant'] measurements = df['Record'] ax.barh(['1', '2', '0', '3', '4', '5', '6'],[106.0, 84.0, 73.0, 48.0, 45.0, 36.0, 34.0], label='0') ax.barh(['0', '1', '3'],[38.0, 29.0, 27.0], label='1') ax.legend(title='Class variable', bbox_to_anchor=(1.05, 1), loc='upper left', ncol=1, frameon=False) ax.set_xlabel('Record') ax.set_ylabel('Number of times pregnant') fig
# Save report as an HTML file
df.save_as_html('lux_report.html')
Saved HTML to lux_report.html