PYTHON 25
AnalysisPCA.py Guest on 12th August 2020 07:47:37 AM
  1. # Load the Pandas libraries with alias 'pd'
  2. import pandas as pd
  3. from keras.models import Sequential
  4. from keras.layers import Dense, Activation, Dropout
  5. import matplotlib.pyplot as plt
  6. import csv
  7. import numpy
  8. from keras.models import model_from_json
  9. from sklearn.metrics import roc_curve, auc
  10. from sklearn import datasets, linear_model
  11. from sklearn.metrics import mean_squared_error, r2_score
  12. from sklearn.decomposition import PCA
  13. import time
  14. from mpl_toolkits.mplot3d import Axes3D
  15.  
  16. # Read data from file 'filename.csv'
  17. # (in the same directory that your python process is based)
  18. # Control delimiters, rows, column names with read_csv (see later)
  19. #data = pd.read_csv("BlackFriday.csv")
  20. data = pd.read_csv("RedWineQuality.csv")
  21.  
  22. # Preview the first 5 lines of the loaded data
  23. print(data.head())
  24.  
  25. label_qual = data['quality']
  26. label = []
  27.  
  28. for i in range(0,len(label_qual)):
  29.   if(label_qual[i]>4):
  30.     label.append(1)
  31.   else:
  32.     label.append(0)
  33.  
  34. label_train = label_qual[:len(label)//2]
  35. label_test = label_qual[len(label)//2:]
  36.  
  37. x_values = data.drop(['quality'], axis=1)
  38.  
  39. x_values_train = x_values[:len(x_values)//2]
  40. x_values_test = x_values[len(x_values)//2:]
  41.  
  42. pca = PCA(n_components=3)
  43. principalComponents = pca.fit_transform(x_values)
  44. principalDf = pd.DataFrame(data = principalComponents
  45.              , columns = ['principal component 1', 'principal component 2','principal component 3'])
  46.  
  47. finalDf = pd.concat([principalDf, data[['quality']]], axis = 1)
  48.  
  49. fig = plt.figure(figsize = (8,8))
  50. ax = fig.add_subplot(1,1,1, projection='3d')
  51. ax.set_xlabel('Principal Component 1', fontsize = 15)
  52. ax.set_ylabel('Principal Component 2', fontsize = 15)
  53. ax.set_zlabel('Principal Component 3', fontsize = 15)
  54. ax.set_title('3 component PCA', fontsize = 20)
  55. targets = [4, 5, 6]
  56. colors = ['r', 'g', 'b']
  57. for target, color in zip(targets,colors):
  58.     indicesToKeep = finalDf['quality'] == target
  59.     ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
  60.                , finalDf.loc[indicesToKeep, 'principal component 2']
  61.                , finalDf.loc[indicesToKeep, 'principal component 3']
  62.                , c = color
  63.                , s = 50)
  64.  
  65.                
  66. ax.legend(targets)
  67. ax.grid()
  68.  
  69.  
  70. fig.canvas.draw()
  71. fig.savefig("filename.png")
  72.  
  73. fig1 = plt.figure(figsize = (8,8))
  74. ax = fig1.add_subplot(1,1,1)
  75. ax.set_xlabel('Principal Component 1', fontsize = 15)
  76. targets = [4, 5, 6]
  77.  
  78. ax.plot(finalDf.loc[:,'principal component 1']
  79.         ,finalDf.loc[:,'quality'], 'bo')
  80.  
  81. ax.grid()
  82.  
  83. fig1.canvas.draw()
  84. fig1.savefig("filename.png")
  85.  
  86. print(pca.explained_variance_ratio_)

Paste is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Raw Paste

Login or Register to edit or fork this paste. It's free.