#Import the dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns


df = pd.read_csv("/content/winequality-red.csv")
df.head()


#check the data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1599 non-null   float64
 1   volatile acidity      1599 non-null   float64
 2   citric acid           1599 non-null   float64
 3   residual sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free sulfur dioxide   1599 non-null   float64
 6   total sulfur dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


df.nunique()

fixed acidity            96
volatile acidity        143
citric acid              80
residual sugar           91
chlorides               153
free sulfur dioxide      60
total sulfur dioxide    144
density                 436
pH                       89
sulphates                96
alcohol                  65
quality                   6
dtype: int64


good_q= df[df['residual sugar']==2.0][0:200]
bad_q= df[df['residual sugar']==9.0][0:200]


    
# plotting the bubble chart
axes = good_q.plot(kind='scatter', x="pH", y="total sulfur dioxide", color='red', label='good quality')
bad_q.plot(kind='scatter', x="pH", y="total sulfur dioxide", color='blue', label='bad quality', ax=axes)
  


# showing the plot

/usr/local/lib/python3.9/dist-packages/pandas/plotting/_matplotlib/core.py:1114: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  scatter = ax.scatter(

<Axes: xlabel='pH', ylabel='total sulfur dioxide'>


df.dtypes

#There are no unwanted columns

fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object


x= df.drop("quality", axis=1)
y=df['quality']


from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test= train_test_split(x, y, test_size =0.2, random_state=1)


x_train.shape

(1279, 11)


y_train.shape

(1279,)


x_test.shape

(320, 11)


y_test.shape

(320,)


from sklearn import svm

classifier=svm.SVC(kernel ='linear', gamma='auto', C=1.5)
#fit the model
classifier.fit(x_train, y_train)
y_predict=classifier.predict(x_test)


#print the first 30 true and predicted responses
print ('actual values:', y_test.values[:40])
print ('predicted values:', y_predict[:40])

actual values: [5 6 6 6 6 6 6 5 5 5 6 6 6 6 6 5 6 5 5 5 6 6 5 6 6 6 6 6 6 7 6 6 5 6 5 6 5
 7 6 5]
predicted values: [5 5 6 6 6 6 6 5 6 5 6 5 5 6 6 5 6 5 5 5 5 6 5 6 5 6 6 6 5 5 5 6 5 6 5 6 5
 5 6 5]


dfms= pd.DataFrame(data={"actual values": y_test, "predicted values":y_predict})

dfms
#Let's compute the differences
dfms["differences"]=dfms["predicted values"] - dfms["actual values"]
dfmsdfms= pd.DataFrame(data={"actual values": y_test, "predicted values":y_predict})

dfms
#Let's compute the differences
dfms["differences"]=dfms["predicted values"] - dfms["actual values"]
dfms


from sklearn.metrics import classification_report
print(classification_report(y_test,y_predict))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        13
           5       0.62      0.78      0.69       140
           6       0.56      0.60      0.58       134
           7       0.00      0.00      0.00        30
           8       0.00      0.00      0.00         2

    accuracy                           0.59       320
   macro avg       0.20      0.23      0.21       320
weighted avg       0.51      0.59      0.55       320

/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))


#save the existing model to file
import pickle

filename='WINE_QUALITY_MODEL.pkl'
pickle.dump(classifier, open(filename,"wb"))


#Load a saved model
loaded_model = pickle.load(open("/content/WINE_QUALITY_MODEL.pkl" ,"rb"))


#Make predictions

input_data = pd.DataFrame([5.6,1.30,0.006,12.5,21.8,6,5.9,0.9,1.5,16.8,5.9])

#changing the input_data to numpy array
input_data_as_numpy_array =np.asarray(input_data)

#reshape the array as we are predicting for one instance
input_data_reshaped= input_data_as_numpy_array.reshape(1,-1)

prediction = loaded_model.predict(input_data_reshaped)

if (prediction[0] >= 5):
    print('The wine is of a good quality')
else:
    print('The wine is of a bad quality')

The wine is of a good quality

/usr/local/lib/python3.9/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but SVC was fitted with feature names
  warnings.warn(

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality
0	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5
1	7.8	0.88	0.00	2.6	0.098	25.0	67.0	0.9968	3.20	0.68	9.8	5
2	7.8	0.76	0.04	2.3	0.092	15.0	54.0	0.9970	3.26	0.65	9.8	5
3	11.2	0.28	0.56	1.9	0.075	17.0	60.0	0.9980	3.16	0.58	9.8	6
4	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5

	actual values	predicted values	differences
75	5	5	0
1283	6	5	-1
408	6	6	0
1281	6	6	0
1118	6	6	0
...	...	...	...
890	5	6	1
146	5	5	0
1551	5	5	0
1209	7	6	-1
1220	6	6	0