Commit 685c940e authored by Wolf's avatar Wolf
Browse files

Upload New File

parent 34c27045
# load libraries
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score,f1_score,roc_auc_score
# load data set
data = np.loadtxt('crimerate_binary.csv', delimiter=',')
[n,p] = data.shape
# split data into a training set and a testing set
size_train = int(0.75*n) # we use first 75% data for training, the rest for testing
sample_train = data[0:size_train,0:-1]
label_train = data[0:size_train,-1]
sample_test = data[size_train:,0:-1]
label_test = data[size_train:,-1]
# ------------------------------------
# Spectral-based anomaly detection
# tutorial slides, page 95 - 100
# ------------------------------------
# step 1. choose number of PC components and construct a PCA model
num_component = 2
model = PCA(n_components=num_component)
# step 2. estimate PCA model using ONLY NORMAL examples
model.fit(sample_train[label_train==0,:])
# the following code trains the model using both normal and abnormal examples
# model.fit(sample_train)
# step 3. use the PCA model to project testing exampels onto low dimensional feature space
sample_test_pca = model.transform(sample_test)
# step 4. reconstruct testing examples from the low dimensional space back to the original space
sample_test_recovered = model.inverse_transform(sample_test_pca)
# step 5. compute reconstruction error and treat them as anomalous score
dif = np.subtract(sample_test, sample_test_recovered)
adscore = np.sum(dif**2,axis=1)**(1/2)
# evaluate AUC score
auc_score = roc_auc_score(label_test, adscore)
# to get detection error and f1-score, we need to threshold anomalous score
# the range of adscore is [0.6, 3]
threshold = 1.5
adscore[adscore <= threshold] = 0
adscore[adscore > threshold] = 1
# evaluate detection error and f1-score
# now evaluate error and f1-score
err = 1 - accuracy_score(label_test,adscore)
f1score = f1_score(label_test,adscore)
# step 4. print results
print('\nSpectral-based Approach (PCA)')
print('Detection Error = %.4f' % err)
print('F1 Score = %.4f' % f1score)
print('AUC Score = %.4f' % auc_score)
# -----------
# Assignment
# -----------
# 1. train PCA model using both normal and abnormal examples (replace line 28 with line 30), what do you observe?
# 2. play with different number of PC components (line 24), what do you observe?
# 3. play with different thresholds (line 45), what do you observe?
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment