In [58]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


data = pd.read_csv("irisdata.csv")

print(data)
     sepal-length  sepal-width  petal-length  petal-width          species
0             5.1          3.5           1.4          0.2      Iris-setosa
1             4.9          3.0           1.4          0.2      Iris-setosa
2             4.7          3.2           1.3          0.2      Iris-setosa
3             4.6          3.1           1.5          0.2      Iris-setosa
4             5.0          3.6           1.4          0.2      Iris-setosa
5             5.4          3.9           1.7          0.4      Iris-setosa
6             4.6          3.4           1.4          0.3      Iris-setosa
7             5.0          3.4           1.5          0.2      Iris-setosa
8             4.4          2.9           1.4          0.2      Iris-setosa
9             4.9          3.1           1.5          0.1      Iris-setosa
10            5.4          3.7           1.5          0.2      Iris-setosa
11            4.8          3.4           1.6          0.2      Iris-setosa
12            4.8          3.0           1.4          0.1      Iris-setosa
13            4.3          3.0           1.1          0.1      Iris-setosa
14            5.8          4.0           1.2          0.2      Iris-setosa
15            5.7          4.4           1.5          0.4      Iris-setosa
16            5.4          3.9           1.3          0.4      Iris-setosa
17            5.1          3.5           1.4          0.3      Iris-setosa
18            5.7          3.8           1.7          0.3      Iris-setosa
19            5.1          3.8           1.5          0.3      Iris-setosa
20            5.4          3.4           1.7          0.2      Iris-setosa
21            5.1          3.7           1.5          0.4      Iris-setosa
22            4.6          3.6           1.0          0.2      Iris-setosa
23            5.1          3.3           1.7          0.5      Iris-setosa
24            4.8          3.4           1.9          0.2      Iris-setosa
25            5.0          3.0           1.6          0.2      Iris-setosa
26            5.0          3.4           1.6          0.4      Iris-setosa
27            5.2          3.5           1.5          0.2      Iris-setosa
28            5.2          3.4           1.4          0.2      Iris-setosa
29            4.7          3.2           1.6          0.2      Iris-setosa
30            4.8          3.1           1.6          0.2      Iris-setosa
31            5.4          3.4           1.5          0.4      Iris-setosa
32            5.2          4.1           1.5          0.1      Iris-setosa
33            5.5          4.2           1.4          0.2      Iris-setosa
34            4.9          3.1           1.5          0.1      Iris-setosa
35            5.0          3.2           1.2          0.2      Iris-setosa
36            5.5          3.5           1.3          0.2      Iris-setosa
37            4.9          3.1           1.5          0.1      Iris-setosa
38            4.4          3.0           1.3          0.2      Iris-setosa
39            5.1          3.4           1.5          0.2      Iris-setosa
40            5.0          3.5           1.3          0.3      Iris-setosa
41            4.5          2.3           1.3          0.3      Iris-setosa
42            4.4          3.2           1.3          0.2      Iris-setosa
43            5.0          3.5           1.6          0.6      Iris-setosa
44            5.1          3.8           1.9          0.4      Iris-setosa
45            4.8          3.0           1.4          0.3      Iris-setosa
46            5.1          3.8           1.6          0.2      Iris-setosa
47            4.6          3.2           1.4          0.2      Iris-setosa
48            5.3          3.7           1.5          0.2      Iris-setosa
49            5.0          3.3           1.4          0.2      Iris-setosa
50            7.0          3.2           4.7          1.4  Iris-versicolor
51            6.4          3.2           4.5          1.5  Iris-versicolor
52            6.9          3.1           4.9          1.5  Iris-versicolor
53            5.5          2.3           4.0          1.3  Iris-versicolor
54            6.5          2.8           4.6          1.5  Iris-versicolor
55            5.7          2.8           4.5          1.3  Iris-versicolor
56            6.3          3.3           4.7          1.6  Iris-versicolor
57            4.9          2.4           3.3          1.0  Iris-versicolor
58            6.6          2.9           4.6          1.3  Iris-versicolor
59            5.2          2.7           3.9          1.4  Iris-versicolor
60            5.0          2.0           3.5          1.0  Iris-versicolor
61            5.9          3.0           4.2          1.5  Iris-versicolor
62            6.0          2.2           4.0          1.0  Iris-versicolor
63            6.1          2.9           4.7          1.4  Iris-versicolor
64            5.6          2.9           3.6          1.3  Iris-versicolor
65            6.7          3.1           4.4          1.4  Iris-versicolor
66            5.6          3.0           4.5          1.5  Iris-versicolor
67            5.8          2.7           4.1          1.0  Iris-versicolor
68            6.2          2.2           4.5          1.5  Iris-versicolor
69            5.6          2.5           3.9          1.1  Iris-versicolor
70            5.9          3.2           4.8          1.8  Iris-versicolor
71            6.1          2.8           4.0          1.3  Iris-versicolor
72            6.3          2.5           4.9          1.5  Iris-versicolor
73            6.1          2.8           4.7          1.2  Iris-versicolor
74            6.4          2.9           4.3          1.3  Iris-versicolor
75            6.6          3.0           4.4          1.4  Iris-versicolor
76            6.8          2.8           4.8          1.4  Iris-versicolor
77            6.7          3.0           5.0          1.7  Iris-versicolor
78            6.0          2.9           4.5          1.5  Iris-versicolor
79            5.7          2.6           3.5          1.0  Iris-versicolor
80            5.5          2.4           3.8          1.1  Iris-versicolor
81            5.5          2.4           3.7          1.0  Iris-versicolor
82            5.8          2.7           3.9          1.2  Iris-versicolor
83            6.0          2.7           5.1          1.6  Iris-versicolor
84            5.4          3.0           4.5          1.5  Iris-versicolor
85            6.0          3.4           4.5          1.6  Iris-versicolor
86            6.7          3.1           4.7          1.5  Iris-versicolor
87            6.3          2.3           4.4          1.3  Iris-versicolor
88            5.6          3.0           4.1          1.3  Iris-versicolor
89            5.5          2.5           4.0          1.3  Iris-versicolor
90            5.5          2.6           4.4          1.2  Iris-versicolor
91            6.1          3.0           4.6          1.4  Iris-versicolor
92            5.8          2.6           4.0          1.2  Iris-versicolor
93            5.0          2.3           3.3          1.0  Iris-versicolor
94            5.6          2.7           4.2          1.3  Iris-versicolor
95            5.7          3.0           4.2          1.2  Iris-versicolor
96            5.7          2.9           4.2          1.3  Iris-versicolor
97            6.2          2.9           4.3          1.3  Iris-versicolor
98            5.1          2.5           3.0          1.1  Iris-versicolor
99            5.7          2.8           4.1          1.3  Iris-versicolor
100           6.3          3.3           6.0          2.5   Iris-virginica
101           5.8          2.7           5.1          1.9   Iris-virginica
102           7.1          3.0           5.9          2.1   Iris-virginica
103           6.3          2.9           5.6          1.8   Iris-virginica
104           6.5          3.0           5.8          2.2   Iris-virginica
105           7.6          3.0           6.6          2.1   Iris-virginica
106           4.9          2.5           4.5          1.7   Iris-virginica
107           7.3          2.9           6.3          1.8   Iris-virginica
108           6.7          2.5           5.8          1.8   Iris-virginica
109           7.2          3.6           6.1          2.5   Iris-virginica
110           6.5          3.2           5.1          2.0   Iris-virginica
111           6.4          2.7           5.3          1.9   Iris-virginica
112           6.8          3.0           5.5          2.1   Iris-virginica
113           5.7          2.5           5.0          2.0   Iris-virginica
114           5.8          2.8           5.1          2.4   Iris-virginica
115           6.4          3.2           5.3          2.3   Iris-virginica
116           6.5          3.0           5.5          1.8   Iris-virginica
117           7.7          3.8           6.7          2.2   Iris-virginica
118           7.7          2.6           6.9          2.3   Iris-virginica
119           6.0          2.2           5.0          1.5   Iris-virginica
120           6.9          3.2           5.7          2.3   Iris-virginica
121           5.6          2.8           4.9          2.0   Iris-virginica
122           7.7          2.8           6.7          2.0   Iris-virginica
123           6.3          2.7           4.9          1.8   Iris-virginica
124           6.7          3.3           5.7          2.1   Iris-virginica
125           7.2          3.2           6.0          1.8   Iris-virginica
126           6.2          2.8           4.8          1.8   Iris-virginica
127           6.1          3.0           4.9          1.8   Iris-virginica
128           6.4          2.8           5.6          2.1   Iris-virginica
129           7.2          3.0           5.8          1.6   Iris-virginica
130           7.4          2.8           6.1          1.9   Iris-virginica
131           7.9          3.8           6.4          2.0   Iris-virginica
132           6.4          2.8           5.6          2.2   Iris-virginica
133           6.3          2.8           5.1          1.5   Iris-virginica
134           6.1          2.6           5.6          1.4   Iris-virginica
135           7.7          3.0           6.1          2.3   Iris-virginica
136           6.3          3.4           5.6          2.4   Iris-virginica
137           6.4          3.1           5.5          1.8   Iris-virginica
138           6.0          3.0           4.8          1.8   Iris-virginica
139           6.9          3.1           5.4          2.1   Iris-virginica
140           6.7          3.1           5.6          2.4   Iris-virginica
141           6.9          3.1           5.1          2.3   Iris-virginica
142           5.8          2.7           5.1          1.9   Iris-virginica
143           6.8          3.2           5.9          2.3   Iris-virginica
144           6.7          3.3           5.7          2.5   Iris-virginica
145           6.7          3.0           5.2          2.3   Iris-virginica
146           6.3          2.5           5.0          1.9   Iris-virginica
147           6.5          3.0           5.2          2.0   Iris-virginica
148           6.2          3.4           5.4          2.3   Iris-virginica
149           5.9          3.0           5.1          1.8   Iris-virginica
In [60]:
# Dimensions of dataset
dimensions = (data.shape)

print(dimensions)
(150, 5)
In [61]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal-length  150 non-null    float64
 1   sepal-width   150 non-null    float64
 2   petal-length  150 non-null    float64
 3   petal-width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
In [62]:
#Peek at the first 5 Rows
print(data.head())

print("\n")
#Peek at the Last 5 Rows
print(data.tail())
   sepal-length  sepal-width  petal-length  petal-width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


     sepal-length  sepal-width  petal-length  petal-width         species
145           6.7          3.0           5.2          2.3  Iris-virginica
146           6.3          2.5           5.0          1.9  Iris-virginica
147           6.5          3.0           5.2          2.0  Iris-virginica
148           6.2          3.4           5.4          2.3  Iris-virginica
149           5.9          3.0           5.1          1.8  Iris-virginica
In [63]:
# Check for missing values
missing_values = data.isnull().sum()
print("Missing Values:")
print(missing_values)

# Check for duplicate rows
duplicates = data.duplicated().sum()
print("\nNumber of Duplicate Rows:", duplicates)

# Data Cleansing: Remove Duplicate Rows
dataset = data.drop_duplicates()


# Display the first few rows of the cleaned dataset
print("\nCleaned Dataset (First few rows):")
print(dataset.head())

print("\n")

# # Set display options to show all rows and columns
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)

# # Print the entire DataFrame
# print(data)
Missing Values:
sepal-length    0
sepal-width     0
petal-length    0
petal-width     0
species         0
dtype: int64

Number of Duplicate Rows: 3

Cleaned Dataset (First few rows):
   sepal-length  sepal-width  petal-length  petal-width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [64]:
#Statistical summary of attributes
data.describe()
Out[64]:
sepal-length sepal-width petal-length petal-width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667
std 0.828066 0.433594 1.764420 0.763161
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
In [67]:
# Group the data by the "species" column and calculate summary statistics
species_summary = data.groupby("species").describe()

# Print the summary statistics
print(species_summary)
                sepal-length                                              \
                       count   mean       std  min    25%  50%  75%  max   
species                                                                    
Iris-setosa             50.0  5.006  0.352490  4.3  4.800  5.0  5.2  5.8   
Iris-versicolor         50.0  5.936  0.516171  4.9  5.600  5.9  6.3  7.0   
Iris-virginica          50.0  6.588  0.635880  4.9  6.225  6.5  6.9  7.9   

                sepal-width                                                \
                      count   mean       std  min    25%  50%    75%  max   
species                                                                     
Iris-setosa            50.0  3.418  0.381024  2.3  3.125  3.4  3.675  4.4   
Iris-versicolor        50.0  2.770  0.313798  2.0  2.525  2.8  3.000  3.4   
Iris-virginica         50.0  2.974  0.322497  2.2  2.800  3.0  3.175  3.8   

                petal-length                                               \
                       count   mean       std  min  25%   50%    75%  max   
species                                                                     
Iris-setosa             50.0  1.464  0.173511  1.0  1.4  1.50  1.575  1.9   
Iris-versicolor         50.0  4.260  0.469911  3.0  4.0  4.35  4.600  5.1   
Iris-virginica          50.0  5.552  0.551895  4.5  5.1  5.55  5.875  6.9   

                petal-width                                            
                      count   mean       std  min  25%  50%  75%  max  
species                                                                
Iris-setosa            50.0  0.244  0.107210  0.1  0.2  0.2  0.3  0.6  
Iris-versicolor        50.0  1.326  0.197753  1.0  1.2  1.3  1.5  1.8  
Iris-virginica         50.0  2.026  0.274650  1.4  1.8  2.0  2.3  2.5  
In [71]:
# Set the style for Seaborn
sns.set(style="whitegrid")

# Create subplots for univariate plots
plt.figure(figsize=(12, 6))
plt.subplot(2, 2, 1)
sns.histplot(data['sepal-length'], kde=True, color='b')
plt.title('Sepal Length')

plt.subplot(2, 2, 2)
sns.histplot(data['sepal-width'], kde=True, color='g')
plt.title('Sepal Width')

plt.subplot(2, 2, 3)
sns.histplot(data['petal-length'], kde=True, color='r')
plt.title('Petal Length')

plt.subplot(2, 2, 4)
sns.histplot(data['petal-width'], kde=True, color='purple')
plt.title('Petal Width')

plt.tight_layout()
plt.show()
In [70]:
# Create subplots for histograms
plt.figure(figsize=(12, 6))

plt.subplot(2, 2, 1)
plt.hist(data['sepal-length'], bins=15, color='b', alpha=0.7)
plt.title('Sepal Length Histogram')
plt.xlabel('Sepal Length')
plt.ylabel('Frequency')

plt.subplot(2, 2, 2)
plt.hist(data['sepal-width'], bins=15, color='g', alpha=0.7)
plt.title('Sepal Width Histogram')
plt.xlabel('Sepal Width')
plt.ylabel('Frequency')

plt.subplot(2, 2, 3)
plt.hist(data['petal-length'], bins=15, color='r', alpha=0.7)
plt.title('Petal Length Histogram')
plt.xlabel('Petal Length')
plt.ylabel('Frequency')

plt.subplot(2, 2, 4)
plt.hist(data['petal-width'], bins=15, color='purple', alpha=0.7)
plt.title('Petal Width Histogram')
plt.xlabel('Petal Width')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()
In [72]:
# Load the Iris dataset
iris = sns.load_dataset("iris")

# Create pair plots for multivariate analysis
sns.set(style="whitegrid")
sns.pairplot(iris, hue="species", size=3)
plt.show()
C:\Users\tommy\anaconda3\Lib\site-packages\seaborn\axisgrid.py:2095: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
  warnings.warn(msg, UserWarning)
In [103]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, plot_tree

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Create a decision tree classifier
clf = DecisionTreeClassifier()
clf.fit(X, y)

# Visualize the decision tree
plt.figure(figsize=(12, 8))
plot_tree(clf, filled=True, feature_names=iris.feature_names, class_names=list(iris.target_names))
plt.title("Decision Tree for Iris Dataset")
plt.show()
In [105]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data[:, :2]  # Use only the first two features
y = iris.target

# Create a meshgrid for the feature space
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))

# Create a K-NN classifier
knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

# Predict for each point in the mesh
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Create a color map for the plot
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])

# Plot the decision boundaries
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)

# Plot the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20)
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('K-Nearest Neighbors (K-NN) Decision Boundaries (k=5)')

plt.show()
In [107]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import seaborn as sns

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Use only the first two features (sepal length and sepal width)
X = X[:, :2]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Naive Bayes model
gnb = GaussianNB()

# Train the model
gnb.fit(X_train, y_train)

# Generate some data points for visualization
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))

# Predict the class probabilities for each point in the mesh
Z = gnb.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Create a contour plot
plt.contourf(xx, yy, Z, cmap='YlGnBu', alpha=0.8)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=iris.target_names[y], palette="Set1")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.title('Naive Bayes Classification')
plt.show()
In [108]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import seaborn as sns

# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Use only the first two features (sepal length and sepal width)
X = X[:, :2]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier with multiple decision trees
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Random Forest classifier
rf_classifier.fit(X_train, y_train)

# Generate some data points for visualization
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))

# Predict the class probabilities for each point in the mesh
Z = rf_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Create a contour plot
plt.contourf(xx, yy, Z, cmap='YlGnBu', alpha=0.8)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=iris.target_names[y], palette="Set1")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.title('Random Forests Classification')
plt.show()
In [109]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import seaborn as sns

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data[:, :2]  # Consider only the first two features for visualization
y = iris.target

# Split the data into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create SVM and Logistic Regression classifiers
svm_classifier = SVC(kernel='linear', C=1, random_state=42)
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', C=1, random_state=42)

# Fit the models to the training data
svm_classifier.fit(X_train, y_train)
logistic_regression.fit(X_train, y_train)

# Create a meshgrid for visualization
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), np.arange(y_min, y_max, 0.02))

# Predict the class labels for the meshgrid points
Z_svm = svm_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z_svm = Z_svm.reshape(xx.shape)
Z_logistic = logistic_regression.predict(np.c_[xx.ravel(), yy.ravel()])
Z_logistic = Z_logistic.reshape(xx.shape)

# Create subplots to visualize both models
plt.figure(figsize=(12, 5))

# SVM decision boundary
plt.subplot(1, 2, 1)
plt.contourf(xx, yy, Z_svm, cmap=plt.cm.YlGnBu, alpha=0.8)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=iris.target_names[y], palette="Set1")
plt.title("SVM Decision Boundary")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])

# Multinomial Logistic Regression decision boundary
plt.subplot(1, 2, 2)
plt.contourf(xx, yy, Z_logistic, cmap=plt.cm.YlGnBu, alpha=0.8)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=iris.target_names[y], palette="Set1")
plt.title("Multinomial Logistic Regression Decision Boundary")
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])

plt.tight_layout()
plt.show()
In [ ]:
 
In [ ]: