Sunday, January 02, 2022

Kaggle ( Titanic ML competition) - Kapil Sharma (Class-103/S2022)


import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.ensemble import RandomForestClassifier


train_data = pd.read_csv("/kaggle/input/titanic/train.csv") 

test_data = pd.read_csv("/kaggle/input/titanic/test.csv")


train_data.head()


women = train_data.loc[train_data.Sex == 'female']["Survived"]

rate_women = sum(women)/len(women)


print("% of women who survived:", rate_women)


train_data[train_data.Sex == 'male']


men = train_data.loc[train_data.Sex == 'male']["Survived"]

rate_men = sum(men)/len(men)


print("% of men who survived:", rate_men)


target = train_data["Survived"]


features = ["Sex","Parch","SibSp"]


X = pd.get_dummies(train_data[features])

X_test = pd.get_dummies(test_data[features])


model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)

model.fit(X, target)

predictions = model.predict(X_test)


output = pd.DataFrame({"PassengerId": test_data.PassengerId, "Survived": predictions})

output.to_csv("my_submission.csv", index=False)


output.head()


No comments: