## (1) Import libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from pycaret.datasets import get_data
import sklearn.model_selection

import autokeras as ak

In [2]:
# Record version of key libraries
from importlib.metadata import version

print('autokeras==%s' % version('autokeras'))

autokeras==1.0.15


## (2) Get data

In [3]:
# Get a list of all pre-packaged data
# get_data('index')

# Select a pre-packaged data for testing
data = get_data('diamond')

Unnamed: 0,Carat Weight,Cut,Color,Clarity,Polish,Symmetry,Report,Price
0,1.1,Ideal,H,SI1,VG,EX,GIA,5169
1,0.83,Ideal,H,VS1,ID,ID,AGSL,3470
2,0.85,Ideal,H,SI1,EX,EX,GIA,3183
3,0.91,Ideal,E,SI1,VG,VG,GIA,4370
4,0.83,Ideal,G,SI1,EX,EX,GIA,3171


In [4]:
# Split data into X and y
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

# Specify types of features - can only be either categorical or numerical
# Must be specified for each column in a dictionary
# Categorical type can be in strings
column_types = {
    'Carat Weight':'numerical',
    'Cut':'categorical',
    'Color':'categorical',
    'Clarity':'categorical',
    'Polish':'categorical',
    'Symmetry':'categorical',
    'Report':'categorical'
}

# Split data into training and testing data
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=6)

display(X)
display(y)

Unnamed: 0,Carat Weight,Cut,Color,Clarity,Polish,Symmetry,Report
0,1.10,Ideal,H,SI1,VG,EX,GIA
1,0.83,Ideal,H,VS1,ID,ID,AGSL
2,0.85,Ideal,H,SI1,EX,EX,GIA
3,0.91,Ideal,E,SI1,VG,VG,GIA
4,0.83,Ideal,G,SI1,EX,EX,GIA
...,...,...,...,...,...,...,...
5995,1.03,Ideal,D,SI1,EX,EX,GIA
5996,1.00,Very Good,D,SI1,VG,VG,GIA
5997,1.02,Ideal,D,SI1,EX,EX,GIA
5998,1.27,Signature-Ideal,G,VS1,EX,EX,GIA


0        5169
1        3470
2        3183
3        4370
4        3171
        ...  
5995     6250
5996     5328
5997     6157
5998    11206
5999    30507
Name: Price, Length: 6000, dtype: int64

## (3) Setup model object

In [5]:
# Setup automl object
automl = ak.StructuredDataRegressor(
    column_names=X.columns.tolist(),
    column_types=column_types,
    loss="mean_squared_error",
    overwrite=True, 
    max_trials=100,
    project_name='diamond',
    seed=6
)

## (4) Fit model

In [6]:
# Fit and find best model
automl.fit(
    x=X_train,
    y=y_train
)

Trial 52 Complete [00h 08m 02s]
val_loss: 6535161.5

Best val_loss So Far: 1611195.375
Total elapsed time: 04h 22m 12s
INFO:tensorflow:Oracle triggered exit
Epoch 1/642
Epoch 2/642
Epoch 3/642
Epoch 4/642
Epoch 5/642
Epoch 6/642
Epoch 7/642
Epoch 8/642
Epoch 9/642
Epoch 10/642
Epoch 11/642
Epoch 12/642
Epoch 13/642
Epoch 14/642
Epoch 15/642
Epoch 16/642
Epoch 17/642
Epoch 18/642
Epoch 19/642
Epoch 20/642
Epoch 21/642
Epoch 22/642
Epoch 23/642
Epoch 24/642
Epoch 25/642
Epoch 26/642
Epoch 27/642
Epoch 28/642
Epoch 29/642
Epoch 30/642
Epoch 31/642
Epoch 32/642
Epoch 33/642
Epoch 34/642
Epoch 35/642
Epoch 36/642
Epoch 37/642
Epoch 38/642
Epoch 39/642
Epoch 40/642
Epoch 41/642
Epoch 42/642
Epoch 43/642
Epoch 44/642
Epoch 45/642
Epoch 46/642
Epoch 47/642
Epoch 48/642
Epoch 49/642
Epoch 50/642
Epoch 51/642
Epoch 52/642
Epoch 53/642
Epoch 54/642
Epoch 55/642
Epoch 56/642
Epoch 57/642
Epoch 58/642
Epoch 59/642
Epoch 60/642
Epoch 61/642
Epoch 62/642
Epoch 63/642
Epoch 64/642
Epoch 65/642
Epoch 6

## (5) Generate predictions & test results

In [8]:
# Predict with the best model
predicted_y = automl.predict(X_test)

# Test model results
print("Train loss and mean_squared_error:", automl.evaluate(x=X_train, y=y_train))
print("Test loss and mean_squared_error:", automl.evaluate(x=X_test, y=y_test))

Train loss and mean_squared_error: [916079.8125, 916079.8125]
Test loss and mean_squared_error: [1784458.125, 1784458.125]


## (6) Get best model

In [10]:
# Extract best model. This returns model as standard tensorflow object
best_model = automl.export_model()

# Check model structure
best_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 7)]               0         
_________________________________________________________________
multi_category_encoding (Mul (None, 7)                 0         
_________________________________________________________________
normalization (Normalization (None, 7)                 15        
_________________________________________________________________
dense (Dense)                (None, 1024)              8192      
_________________________________________________________________
re_lu (ReLU)                 (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               262400    
_________________________________________________________________
re_lu_1 (ReLU)               (None, 256)               0     

In [12]:
# Save model to file
best_model.save('best_model')

INFO:tensorflow:Assets written to: best_model/assets
