import pandas as pd
from pycaret.datasets import get_data

from pandas_profiling import ProfileReport


# Record version of key libraries
from importlib.metadata import version

print('pandas-profiling==%s' % version('pandas-profiling'))

pandas-profiling==3.0.0


# Select a pre-packaged data for testing
df = get_data('diabetes', verbose=True)


# Generate details EDA report on data - including uni/paired variables distribution, correlation and missing values
# NOTE - use minimal=True when the data is big in size
# NOTE - use explorativ=True when data contains text, image or file
# NOTE - use sensitive=True to assume no duplicates in data?
profile = ProfileReport(df, title="Pandas Profiling Report - Diabetes")


# Display report in Jupyter notebook
profile.to_notebook_iframe() # will be included in notebook
# profile.to_widgets() # only available interactively


# Save report to an HTML file
profile.to_file("pandas_profiling_report.html")

	Number of times pregnant	Plasma glucose concentration a 2 hours in an oral glucose tolerance test	Diastolic blood pressure (mm Hg)	Triceps skin fold thickness (mm)	2-Hour serum insulin (mu U/ml)	Body mass index (weight in kg/(height in m)^2)	Diabetes pedigree function	Age (years)	Class variable
0	6	148	72	35	0	33.6	0.627	50	1
1	1	85	66	29	0	26.6	0.351	31	0
2	8	183	64	0	0	23.3	0.672	32	1
3	1	89	66	23	94	28.1	0.167	21	0
4	0	137	40	35	168	43.1	2.288	33	1

(1) Import libraries¶

(2) Get data¶

(3) Perform EDA¶

(4) Save report¶