$$ \mathcal{D} = \{(X_i, y_i)\}_{i=1}^N, \quad X_i \in \mathbb{R}^d, \quad y_i \in \{1,\dots,K\} $$
$$ \mu_k = \frac{1}{n_k}\sum_{i = 1}^{n_k} x_i $$
$$ \Sigma_k = \frac{1}{n_k}\sum_{i=1}^{n_k}(x_i - \mu_k)(x_i - \mu_k)^T$$
$$ \pi_k = \frac{n_k}{N}$$
$$\Sigma = \sum_{k=1}^K \pi_k \Sigma_k$$
$$\Sigma^{-1}$$
$$w_k = \Sigma^{-1} \mu_k$$
$$ b_k =-\frac12 \mu_k^T \Sigma^{-1} \mu_k + \ln(\pi_k) $$
$$ \delta_k(x) = x^T \cdot w_k + b_k = x^T \Sigma^{-1} \mu_k - \frac12 \mu_k^T \Sigma^{-1} \mu_k + \ln(\pi_k) $$
$$ \hat y = \arg\max_k \delta_k(x) $$
$$ P(y=k \mid x) = \frac{\exp(\delta_k(x))} {\sum_{j=1}^K \exp(\delta_j(x))}$$
| x1 | x2 | y |
|---|---|---|
| 1 | 2 | c1 |
| 2 | 1 | c1 |
| 2 | 2 | c1 |
| 6 | 5 | c2 |
| 7 | 6 | c2 |
| 6 | 6 | c2 |
| 3 | 7 | c3 |
| 4 | 8 | c3 |
| 3 | 8 | c3 |
| c1 | c2 | c3 | |||
|---|---|---|---|---|---|
| x1 | x2 | x1 | x2 | x1 | x2 |
| 1 | 2 | 6 | 5 | 3 | 7 |
| 2 | 1 | 7 | 6 | 4 | 8 |
| 2 | 2 | 6 | 6 | 3 | 8 |
| 2 | 3 | 7 | 5 | ||
| 6 | 4 | ||||
$$\mu_1 = \left(\frac{1 + 2 + 2 + 2}{4}, \frac{2 + 1 + 2 + 3}{4}\right) = \left(1.75, 2\right)$$
$$\mu_2 = \left(\frac{6 + 7 + 6 + 7 + 6}{5}, \frac{5 + 6 + 6 + 5 + 4}{5}\right) = \left(6.4, 5.2\right)$$
$$\mu_3 = \left(\frac{3 + 4 + 3}{3}, \frac{7 + 8 + 8}{3}\right) = \left(3.3333, 7.6667\right)$$
$$X_k - \mu_k$$| c1 | c2 | c3 | |||
|---|---|---|---|---|---|
| x1 | x2 | x1 | x2 | x1 | x2 |
| $$X_1 - \mu_1$$ | $$X_2 - \mu_2$$ | $$X_2 - \mu_3$$ | |||
| -0.75 | 0 | -0.4 | -0.2 | -0.3333 | -0.6667 |
| 0.25 | -1 | 0.6 | 0.8 | 0.6667 | 0.3333 |
| 0.25 | 0 | -0.4 | 0.8 | -0.3333 | 0.3333 |
| 0.25 | 1 | 0.6 | -0.2 | ||
| -0.4 | -1.2 | ||||
$$\begin{aligned} \Sigma_1 = \frac{1}{4} \begin{bmatrix} -0.75 & 0.25 & 0.25 & 0.25 \\ 0 & -1 & 0 & 1 \\ \end{bmatrix} \begin{bmatrix} -0.75 & 0 \\ 0.25 & -1 \\ 0.25 & 0 \\ 0.25 & 1 \end{bmatrix} &= \begin{bmatrix} 0.1875 & 0 \\ 0 & 0.5 \end{bmatrix} \end{aligned}$$
$$\begin{aligned} \Sigma_2 = \begin{bmatrix} 0.24 & 0.12 \\ 0.12 & 0.56 \end{bmatrix} \end{aligned}$$
$$\begin{aligned} \Sigma_3 = \begin{bmatrix} 0.2222 & 0.1111 \\ 0.1111 & 0.2222 \end{bmatrix} \end{aligned}$$
$$\Sigma = \frac{4}{12} \begin{bmatrix}0.1875 & 0 \\ 0 & 0.5 \end{bmatrix} + \frac{5}{12} \begin{bmatrix}0.24 & 0.12 \\ 0.12 & 0.56 \end{bmatrix} + \frac{3}{12} \begin{bmatrix}0.2222 & 0.1111 \\ 0.1111 & 0.2222 \end{bmatrix} = \begin{bmatrix}0.2181 & 0.07778 \\ 0.07778 & 0.4556 \end{bmatrix} $$
$$\Sigma^{-1} = \begin{bmatrix} 4.8834 & -0.83375 \\ -0.8337 & 2.3375 \end{bmatrix}$$
$$w_1 = \begin{bmatrix} 4.8834 & -0.83375 \\ -0.8337 & 2.3375 \end{bmatrix} * \begin{bmatrix} 1.75 \\ 2 \end{bmatrix} = \begin{bmatrix} 6.8784 \\ 3.2159 \end{bmatrix} $$
$$w_2 = \begin{bmatrix} 4.8834 & -0.83375 \\ -0.8337 & 2.3375 \end{bmatrix} * \begin{bmatrix} 6.4 \\ 5.2 \end{bmatrix} = \begin{bmatrix} 26.9181 \\ 6.8189 \end{bmatrix} $$
$$w_3 = \begin{bmatrix} 4.8834 & -0.83375 \\ -0.8337 & 2.3375 \end{bmatrix} * \begin{bmatrix} 3.3333 \\ 7.6667 \end{bmatrix} = \begin{bmatrix} 9.8859 \\ 15.1414 \end{bmatrix} $$
$$ b_1 =-\frac12 \begin{bmatrix} 1.75 & 2 \end{bmatrix} * \begin{bmatrix} 4.8834 & -0.83375 \\ -0.8337 & 2.3375 \end{bmatrix} * \begin{bmatrix} 1.75 \\ 2 \end{bmatrix} + \log(\frac{4}{12}) = -10.3331 $$
$$ b_2 =-\frac12 \begin{bmatrix} 6.4 & 5.2 \end{bmatrix} * \begin{bmatrix} 4.8834 & -0.83375 \\ -0.8337 & 2.3375 \end{bmatrix} * \begin{bmatrix} 6.4 \\ 5.2 \end{bmatrix} + \log(\frac{5}{12}) = -104.9656 $$
$$ b_3 =-\frac12 \begin{bmatrix} 3.3333 & 7.6667 \end{bmatrix} * \begin{bmatrix} 4.8834 & -0.83375 \\ -0.8337 & 2.3375 \end{bmatrix} * \begin{bmatrix} 3.3333 \\ 7.6667 \end{bmatrix} + \log(\frac{3}{12}) = -75.6172 $$
$$ \delta_k(x) = x^T \Sigma^{-1} \mu_k - \frac12 \mu_k^T \Sigma^{-1} \mu_k + \log(\pi_k) $$
Dự đoán: x = (3, 4)
$$ \delta_1(x) = \begin{bmatrix} 3 & 4 \end{bmatrix} * \begin{bmatrix} 6.8784 \\ 3.2159 \end{bmatrix} -10.3331 = 23.1657$$
$$ \delta_2(x) = \begin{bmatrix} 3 & 4 \end{bmatrix} * \begin{bmatrix} 26.9181 \\ 6.8189 \end{bmatrix} - 104.9656 = 3.0642$$
$$ \delta_3(x) = \begin{bmatrix} 3 & 4 \end{bmatrix} \begin{bmatrix} 9.8859 \\ 15.1414 \end{bmatrix} - 75.6172 = 14.6061$$
$$ P(y=k \mid x) = \frac{\exp(\delta_k(x))} {\sum_{j=1}^K \exp(\delta_j(x))}$$
$$ P(y=c_1 \mid x) = \frac{\exp(23.1657)} {\exp(23.1657) + \exp(3.0642) + \exp(14.6061)} = 0.9998$$
$$ P(y=c_2 \mid x) = \frac{\exp(3.0642)} {\exp(23.1657) + \exp(3.0642) + \exp(14.6061)} = 1.86188E-09$$
$$ P(y=c_3 \mid x) = \frac{\exp(14.6061)} {\exp(23.1657) + \exp(3.0642) + \exp(14.6061)} = 0.0002$$
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import numpy as np
import pandas as pd
df = pd.DataFrame([
[1, 2, 'c1'],
[2, 1, 'c1'],
[2, 2, 'c1'],
[2, 3, 'c1'],
[6, 5, 'c2'],
[7, 6, 'c2'],
[7, 5, 'c2'],
[6, 6, 'c2'],
[6, 4, 'c2'],
[3, 7, 'c3'],
[4, 8, 'c3'],
[3, 8, 'c3'],
], columns=['x1', 'x2', 'class'])
X = df[['x1', 'x2']].values
y = df['class'].values
lda = LinearDiscriminantAnalysis(n_components=2, solver='lsqr', store_covariance=True)
lda.fit(X, y)
print(lda.coef_, lda.intercept_)
print(lda.predict([[3, 4]]), lda.predict_proba([[3, 4]]))