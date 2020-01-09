Discover, triage, and prioritize Python errors in real-time
sklearn.datasets
# importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
# importing dataset from sklearn
from sklearn.datasets import load_boston
boston_data = load_boston()
# initializing dataset
data_ = pd.DataFrame(boston_data.data)
### Top five rows of dataset
data_.head()
# Adding features names to the dataframe
data_.columns = boston_data.feature_names
data_.head()
Pre-processing the data
# Target feature of Boston Housing data
data_['PRICE'] = boston_data.target
# checking null values
data_.isnull().sum()
# checking if values are categorical or not
data_.info()
Creating model
# creating feature and target variable
X = data_.drop(['PRICE'], axis=1)
y = data_['PRICE']
# splitting into training and testing set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=1)
print("X training shape : ", X_train.shape )
print("X test shape : ", X_test.shape )
print("y training shape :“ , y_train.shape )
print("y test shape :”, y_test.shape )
# creating model
from sklearn.ensemble import RandomForestRegressor
classifier = RandomForestRegressor()
classifier.fit(X_train, y_train)
# Model evaluation for training data
prediction = classifier.predict(X_train)
print("r^2 : ", metrics.r2_score(y_train, prediction))
print("Mean Absolute Error: ", metrics.mean_absolute_error(y_train, prediction))
print("Mean Squared Error: ", metrics.mean_squared_error(y_train, prediction))
print("Root Mean Squared Error : ", np.sqrt(metrics.mean_squared_error(y_train, prediction)))
# Model evaluation for testing data
prediction_test = classifier.predict(X_test)
print("r^2 : ", metrics.r2_score(y_test, prediction_test))
print("Mean Absolute Error : ", metrics.mean_absolute_error(y_test, prediction_test))
print("Mean Squared Error : ", metrics.mean_squared_error(y_test, prediction_test))
print("Root Mean Absolute Error : ", np.sqrt(metrics.mean_squared_error(y_test, prediction_test)))
Serialization and DeSerialization mechanism helps to save the machine learning object model into byte stream and vice versa. The model will be saved under
pickle
folder.
model
# saving the model
import pickle
with open('model/model.pkl','wb') as file:
pickle.dump(classifier, file)
# saving the columns
model_columns = list(X.columns)
with open('model/model_columns.pkl','wb') as file:
pickle.dump(model_columns, file)
>> pip install Flask
from flask import Flask
app = Flask(__name__)
@app.route('/', methods=['GET', 'POST'])
def main():
return "Boston House Price Prediction”
if __name__ == "__main__":
app.run()
Running the app
>> export FLASK_APP=app.py
>> export FLASK_ENV=development
>> flask run
.py
.py
file should look like:
app.py
from flask import render_template, request, jsonify
import flask
import numpy as np
import traceback
import pickle
import pandas as pd
# App definition
app = Flask(__name__,template_folder='templates')
# importing models
with open('webapp/model/model.pkl', 'rb') as f:
classifier = pickle.load (f)
with open('webapp/model/model_columns.pkl', 'rb') as f:
model_columns = pickle.load (f)
@app.route('/')
def welcome():
return "Boston Housing Price Prediction"
@app.route('/predict', methods=['POST','GET'])
def predict():
if flask.request.method == 'GET':
return "Prediction page"
if flask.request.method == 'POST':
try:
json_ = request.json
print(json_)
query_ = pd.get_dummies(pd.DataFrame(json_))
query = query_.reindex(columns = model_columns, fill_value= 0)
prediction = list(classifier.predict(query))
return jsonify({
"prediction":str(prediction)
})
except:
return jsonify({
"trace": traceback.format_exc()
})
if __name__ == "__main__":
app.run()