Commit 890f7779 authored by Wolf's avatar Wolf
Browse files

Upload New File

parent 8060935c
# load libraries
import numpy as np
from sklearn import linear_model
from sklearn.metrics import accuracy_score,f1_score,roc_auc_score
# load data set
data = np.loadtxt('crimerate_binary.csv', delimiter=',')
[n,p] = data.shape
# split data into a training set and a testing set
size_train = int(0.75*n) # we use first 75% data for training, the rest for testing
sample_train = data[0:size_train,0:-1]
label_train = data[0:size_train,-1]
sample_test = data[size_train:,0:-1]
label_test = data[size_train:,-1]
# ----------------------------------------
# classification-based anomaly detection
# tutorial slides, page 49 - 59
# use Logistic Regression model for detection (no need to threshold its output, page 54)
# ----------------------------------------
# step 1. choose a classification model (logistic regression)
model = linear_model.LogisticRegression(C=1)
# step 2. train the model using examples
model.fit(sample_train, label_train)
# step 3. apply model to predict whether an example is normal or anomaly
label_pred = model.predict(sample_test)
# directly get detection error and f1-score
err = 1 - accuracy_score(label_test,label_pred)
f1score = f1_score(label_test,label_pred)
# to get AUC score, we need to first compute anomalous score
# here we use prediction probability as anomalous score
adscore = model.predict_proba(sample_test)
adscore = adscore[:,1]
auc_score = roc_auc_score(label_test, adscore)
# step 4. print results
print('\nClassification-based Approach (Logistic Regression Model)')
print('Detection Error = %.4f' % err)
print('F1 Score = %.4f' % f1score)
print('AUC Score = %.4f' % auc_score)
# -----------
# Assignment
# -----------
# play with different hyper-parameter C (line 25), what do you observe?
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment