import pandas as pd import numpy as np crop = pd.read_csv("Crop_recommendation.csv") crop.head() crop.info() # to check null value is present or not crop.isnull().sum() # to check duplicate value is present or not crop.duplicated().sum() # describe all the mathematical info of only numerical data # 25 % === percentile crop.describe() #Exploring Data correlation # corr = crop.corr() # corr # Select only numeric columns for correlation computation numeric_columns = crop.select_dtypes(include=['number']) # Compute the correlation matrix corr = numeric_columns.corr() corr import seaborn as sns sns.heatmap(corr , annot = True , cmap = 'coolwarm') crop['label'].value_counts() import matplotlib.pyplot as plt sns.distplot(crop['N']) plt.show() #Encoding crop_dict = { 'rice': 1, 'maize': 2, 'jute': 3, 'cotton': 4, 'coconut': 5, 'papaya': 6, 'orange': 7, 'apple': 8, 'muskmelon': 9, 'watermelon': 10, 'grapes': 11, 'mango': 12, 'banana': 13, 'pomegranate': 14, 'lentil': 15, 'blackgram': 16, 'mungbean': 17, 'mothbeans': 18, 'pigeonpeas': 19, 'kidneybeans': 20, 'chickpea': 21, 'coffee': 22 } crop['crop_num'] = crop['label'].map(crop_dict) # 'crop_num' kuch v name de sakte crop['crop_num'].value_counts() crop.drop('label' , axis = 1 , inplace = True) # no need to do this crop.head(500) x = crop.drop('crop_num' , axis = 1) y = crop['crop_num'] # Train Test Split from sklearn.model_selection import train_test_split x_train , x_test , y_train , y_test = train_test_split(x , y , test_size = 0.2 , random_state = 42) x_train.shape x_test.shape # Scale the features using MinMaxScaler from sklearn.preprocessing import MinMaxScaler ms = MinMaxScaler() # ms.fit(x_train) x_train = ms.fit_transform(x_train) x_test = ms.transform(x_test) # Standardization from sklearn.preprocessing import StandardScaler sc = StandardScaler() # sc.fit(x_train) x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) # Training Models from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.tree import ExtraTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import BaggingClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.ensemble import AdaBoostClassifier from sklearn.metrics import accuracy_score # create instances of all models models = { 'Logistic Regression': LogisticRegression(), 'Support Vector Machine': SVC(), 'K-Nearest Neighbors': KNeighborsClassifier(), 'Decision Tree': DecisionTreeClassifier(), 'Bagging': BaggingClassifier(), 'AdaBoost': AdaBoostClassifier(), 'Gradient Boosting': GradientBoostingClassifier(), 'Extra Trees': ExtraTreeClassifier(), 'Naive Bayes': GaussianNB(), 'Random Forest': RandomForestClassifier() } # md = model for name, md in models.items(): md.fit(x_train,y_train) ypred = md.predict(x_test) print(f"{name} with accuracy : {accuracy_score(y_test,ypred)}") rfc = RandomForestClassifier() rfc.fit(x_train,y_train) ypred = rfc.predict(x_test) accuracy_score(y_test,ypred) # Predictive System def recommendation(N,P,k,temperature,humidity,ph,rainfal): features = np.array([[N,P,k,temperature,humidity,ph,rainfal]]) transformed_features = ms.transform(features) transformed_features = sc.transform(transformed_features) prediction = rfc.predict(transformed_features).reshape(1,-1) # .reshape(1,-1) karne se single row ka o/p dega return prediction[0] # returns {1,2,3,....,22} #The .reshape(1, -1) part reshapes the prediction array into a 2-dimensional array with 1 row and as many columns as necessary to fit the data # N = 40 # P = 50 # k = 50 # temperature = 40.0 # humidity = 20.0 # ph = 100.0 # rainfall = 100.0 # N = 30 # P = 10 # k = 100 # temperature = 100.0 # humidity = 210.0 # ph = 100.0 # rainfall = 23.0 N = 30 P = 20 k = 150 temperature = 23 # Best for apple humidity = 60 ph = 5.5 rainfall = 900 predict = recommendation(N,P,k,temperature,humidity,ph,rainfall) crop_dict = {1: "Rice", 2: "Maize", 3: "Jute", 4: "Cotton", 5: "Coconut", 6: "Papaya", 7: "Orange", 8: "Apple", 9: "Muskmelon", 10: "Watermelon", 11: "Grapes", 12: "Mango", 13: "Banana", 14: "Pomegranate", 15: "Lentil", 16: "Blackgram", 17: "Mungbean", 18: "Mothbeans", 19: "Pigeonpeas", 20: "Kidneybeans", 21: "Chickpea", 22: "Coffee"} if predict[0] in crop_dict: crop = crop_dict[predict[0]] print("{} is a best crop to be cultivated ".format(crop)) else: print("Sorry are not able to recommend a proper crop for this environment") import pickle pickle.dump(rfc , open('model.pkl' , 'wb')) # wb = write binary # now 'model.pkl' is our model which can be used anywhere