Hi,
I am trying to predict the survivor for the Titanic Data Set using ANN and I am getting the accuracy of around 60 % . Below is my code :-
#!/usr/bin/env python
coding: utf-8
In[1]:
import pandas as pd
import numpy as np
In[2]:
train_data = pd.read_csv(’/cxldata/datasets/project/titanic/train.csv’)
test_data = pd.read_csv(’/cxldata/datasets/project/titanic/test.csv’)
In[3]:
train_data.info()
In[4]:
test_data
# Droping the traget column
In[5]:
train_data_final = train_data.drop([“Survived”], axis = 1)
y = train_data[“Survived”]
In[6]:
train_data_final.shape
# Replacing Null values with median and coverting Categorical columns using one hot coding
In[7]:
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import FeatureUnion
In[8]:
class DataFrameSelector(BaseEstimator, TransformerMixin):
def init(self, attribute_names):
self.attribute_names = attribute_names
def fit(self, X, y=None):
return self
def transform(self, X):
return X[self.attribute_names]
In[9]:
num_pipeline = Pipeline([
(“select_numeric”, DataFrameSelector([“Age”, “SibSp”, “Parch”, “Fare”])),
(“imputer”, SimpleImputer(strategy=“median”)),
])
In[10]:
num_pipeline.fit_transform(train_data_final)
In[11]:
cat_pipeline = Pipeline([
(“select_cat”, DataFrameSelector([“Pclass”, “Sex”, “Embarked”])),
(“imputer”, SimpleImputer(strategy=“most_frequent”)),
(“cat_encoder”, OneHotEncoder(sparse=False)),
])
In[12]:
cat_pipeline.fit_transform(train_data_final)
In[13]:
preprocess_pipeline = FeatureUnion(transformer_list=[
(“num_pipeline”, num_pipeline),
(“cat_pipeline”, cat_pipeline),
])
In[14]:
X = preprocess_pipeline.fit_transform(train_data_final)
In[15]:
X
# Importing keras and coverting NP array to tenserflow array
In[16]:
from sklearn.model_selection import train_test_split
In[17]:
import tensorflow as tf
from tensorflow import keras
# Splitting the Train data into train set and validate set
In[18]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=42)
In[19]:
X_train = tf.convert_to_tensor(X_train)
In[20]:
X_valid = tf.convert_to_tensor(X_valid)
In[21]:
y_train = tf.convert_to_tensor(y_train)
In[22]:
y_valid = tf.convert_to_tensor(y_valid)
In[23]:
X_train.shape[1:]
In[24]:
np.random.seed(42) # generating random see
tf.random.set_seed(42) # setting random seed|
tf.keras.backend.clear_session()
In[25]:
tf.keras.backend.clear_session()
model = keras.models.Sequential([
keras.layers.Dropout(.1, input_shape=X_train.shape[1:]),
keras.layers.Dense(60, activation=“relu”),
keras.layers.Dense(1)
])
In[26]:
model.compile(loss=“mean_squared_error”, optimizer=keras.optimizers.SGD(lr=1e-4), metrics=[“accuracy”]) # compiling the model
In[27]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))
In[28]:
mse_test = model.evaluate(X_valid, y_valid)
In[29]:
model.summary()
Please let me know what I need to do so that the accuracy can increase