import pandas as pd
import numpy as np

crop = pd.read_csv("Crop_recommendation.csv")
crop.head()

crop.info()

# to check null value is present or not
crop.isnull().sum()

# to check duplicate value is present or not
crop.duplicated().sum()

# describe all the mathematical info of only numerical data
# 25 % === percentile
crop.describe()



#Exploring Data correlation
# corr = crop.corr()
# corr
# Select only numeric columns for correlation computation
numeric_columns = crop.select_dtypes(include=['number'])
# Compute the correlation matrix
corr = numeric_columns.corr()
corr




import seaborn as sns
sns.heatmap(corr , annot = True , cmap = 'coolwarm')



crop['label'].value_counts()




import matplotlib.pyplot as plt
sns.distplot(crop['N'])
plt.show()





#Encoding
crop_dict = {
    'rice': 1,
    'maize': 2,
    'jute': 3,
    'cotton': 4,
    'coconut': 5,
    'papaya': 6,
    'orange': 7,
    'apple': 8,
    'muskmelon': 9,
    'watermelon': 10,
    'grapes': 11,
    'mango': 12,
    'banana': 13,
    'pomegranate': 14,
    'lentil': 15,
    'blackgram': 16,
    'mungbean': 17,
    'mothbeans': 18,
    'pigeonpeas': 19,
    'kidneybeans': 20,
    'chickpea': 21,
    'coffee': 22
}
crop['crop_num'] = crop['label'].map(crop_dict)   # 'crop_num' kuch v name de sakte




crop['crop_num'].value_counts()





crop.drop('label' , axis = 1 , inplace = True)        # no need to do this
crop.head(500)





x = crop.drop('crop_num' , axis = 1)
y = crop['crop_num']



# Train Test Split
from sklearn.model_selection import train_test_split
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size = 0.2 , random_state = 42)




x_train.shape


x_test.shape



# Scale the features using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
ms = MinMaxScaler()

# ms.fit(x_train)
x_train = ms.fit_transform(x_train)
x_test = ms.transform(x_test)







# Standardization

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

# sc.fit(x_train)

x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)







# Training Models


from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

# create instances of all models
models = {
    'Logistic Regression': LogisticRegression(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Bagging': BaggingClassifier(),
    'AdaBoost': AdaBoostClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'Extra Trees': ExtraTreeClassifier(),
    'Naive Bayes': GaussianNB(),
    'Random Forest': RandomForestClassifier()
}

# md = model
for name, md in models.items():
    md.fit(x_train,y_train)
    ypred = md.predict(x_test)
    
    print(f"{name}  with accuracy : {accuracy_score(y_test,ypred)}")










rfc = RandomForestClassifier()
rfc.fit(x_train,y_train)
ypred = rfc.predict(x_test)
accuracy_score(y_test,ypred)






# Predictive System


def recommendation(N,P,k,temperature,humidity,ph,rainfal):
    features = np.array([[N,P,k,temperature,humidity,ph,rainfal]])
    transformed_features = ms.transform(features)
    transformed_features = sc.transform(transformed_features)
    prediction = rfc.predict(transformed_features).reshape(1,-1)  # .reshape(1,-1) karne se single row ka o/p dega
    
    return prediction[0]       # returns {1,2,3,....,22}


#The .reshape(1, -1) part reshapes the prediction array into a 2-dimensional array with 1 row and as many columns as necessary to fit the data
  








# N = 40
# P = 50
# k = 50
# temperature = 40.0
# humidity = 20.0
# ph = 100.0
# rainfall = 100.0


# N = 30
# P = 10
# k = 100
# temperature = 100.0
# humidity = 210.0
# ph = 100.0
# rainfall = 23.0

N = 30
P = 20
k = 150
temperature = 23      # Best for apple
humidity = 60
ph = 5.5
rainfall = 900

predict = recommendation(N,P,k,temperature,humidity,ph,rainfall)


crop_dict = {1: "Rice", 2: "Maize", 3: "Jute", 4: "Cotton", 5: "Coconut", 6: "Papaya", 7: "Orange",
                 8: "Apple", 9: "Muskmelon", 10: "Watermelon", 11: "Grapes", 12: "Mango", 13: "Banana",
                 14: "Pomegranate", 15: "Lentil", 16: "Blackgram", 17: "Mungbean", 18: "Mothbeans",
                 19: "Pigeonpeas", 20: "Kidneybeans", 21: "Chickpea", 22: "Coffee"}

if predict[0] in crop_dict:
    crop = crop_dict[predict[0]]
    print("{} is a best crop to be cultivated ".format(crop))
else:
    print("Sorry are not able to recommend a proper crop for this environment")







import pickle
pickle.dump(rfc , open('model.pkl' , 'wb'))           # wb = write binary
# now 'model.pkl' is our model which can be used anywhere