Anomaly Detection by PCA in PyOD
Libraries¶
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# PyOD
from pyod.utils.data import generate_data, get_outliers_inliers
from pyod.models.pca import PCA
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
Create a data¶
X_train, y_train = generate_data(behaviour='new', n_features=5, train_only=True)
df_train = pd.DataFrame(X_train)
df_train['y'] = y_train
df_train.head()
sns.scatterplot(x=0, y=1, hue='y', data=df_train);
plt.title('Ground Truth');
Train an unsupervised PCA¶
clf = PCA()
clf.fit(X_train)
Evaluate training score¶
y_train_pred = clf.labels_
y_train_scores = clf.decision_scores_
sns.scatterplot(x=0, y=1, hue=y_train_scores, data=df_train, palette='RdBu_r');
plt.title('Anomaly Scores by PCA');