import numpy as np
import matplotlib.pyplot as plt

def get_dataset(a , b, n = 100, bruit = 5, xmax = 10) :
    x = xmax * np.random.rand(n, 1)
    y = (a * x + b) + bruit * np.random.rand(n, 1)
    return x, y

data_x, data_y = get_dataset(2, 10)
plt.scatter(data_x, data_y)
plt.show()

a, b = np.polyfit(data_x[:,0], data_y[:,0], 1)
print("a :",a," b :",b)
plt.scatter(data_x, data_y)
r = np.array([np.min(data_x), np.max(data_x)])
plt.plot(r, a*r + b, c='red')
plt.show()

a : 2.073236306628855  b : 12.22713619269508

predictions = a * data_x  + b
errors = (data_y - predictions) ** 2
cost = np.mean(errors)
print("Perte (moyenne quadratique des erreurs) :", cost)

Perte (moyenne quadratique des erreurs) : 2.073923560509483

def gradient_descent(X, Y, a_init = 0, b_init = 0, lr = 0.01, epochs = 1000):
    a, b = a_init, b_init
    m = len(Y)
    cost_history = []
    predictions = a * X + b
    cost_history.append((1 / (2 * m)) * np.sum((predictions - Y) ** 2))
    for epoch in range(epochs):
        predictions = a * X + b
        error = predictions - Y
        a_gradient = - (1 / m) * np.sum(error * X) #Le coeur de la descente de gradient, la dérivée partielle des erreurs pour a
        b_gradient = - (1 / m) * np.sum(error)  #Le coeur de la descente de gradient, la dérivée partielle des erreurs pour b
        a = a + (lr * a_gradient)
        b = b + (lr * b_gradient)
        pre = a * X + b
        cost_history.append((1 / m) * np.sum((pre - Y) ** 2))
    return a, b, cost_history

a_est, b_est, cost_history = gradient_descent(data_x, data_y)
print('a par gradient : ', a_est, 'b par gradient :', b_est)

a par gradient :  2.1896795944921226 b par gradient : 11.468248819621769

plt.scatter(data_x, data_y)
r = np.array([np.min(data_x), np.max(data_x)])
plt.plot(r, a_est*r + b_est, c='orange', linestyle="dashed", label='RL par descente de gradient')
plt.plot(r, a*r + b, c='red', label='Régression linéaire (optimale)')
plt.legend()
plt.show()

print("Perte finale (moyenne quadratique des erreurs) :", cost_history[-1])
plt.scatter(list(range(len(cost_history))), cost_history, s=10)
plt.show()

Perte finale (moyenne quadratique des erreurs) : 2.2346248773108623

import numpy as np
import matplotlib.pyplot as plt

# premiere version de la fonction get_dataset_binaire
def get_dataset_binaire(n = 50) :
    x1 = (np.random.rand(n, 1)) 
    y1 = (np.random.rand(n, 1)) 
    x2 = np.random.rand(n, 1) - 1
    y2 = np.random.rand(n, 1)  - 1
    x = np.vstack([x1, x2])
    y = np.vstack([y1, y2])
    bin = np.concatenate((np.zeros(n), np.zeros(n) + 1))
    return x, y, bin

data_x, data_y, z = get_dataset_binaire()
plt.scatter(data_x, data_y, c=z, cmap=plt.cm.Spectral)
plt.show()

# version finale de la fonction get_dataset_binaire
def get_dataset_binaire(n = 50, bruit = 0, xmax = 2) :
    x1 = (np.random.rand(n, 1)) * xmax  - bruit / 2 * xmax
    y1 = (np.random.rand(n, 1)) * xmax  - bruit / 2 * xmax
    x2 = - xmax * (1 - bruit / 2) + ( np.random.rand(n, 1) * xmax ) 
    y2 = - xmax * (1 - bruit / 2)  + ( np.random.rand(n, 1) * xmax )
    x = np.vstack([x1, x2])
    y = np.vstack([y1, y2])
    bin = np.concatenate((np.zeros(n), np.zeros(n) + 1))
    return x, y, bin

data_x, data_y, z = get_dataset_binaire(bruit = .5, xmax = 4)
plt.scatter(data_x, data_y, c=z, cmap=plt.cm.Spectral)
plt.show()

def init_variables():
    weights = np.random.normal(size=2)
    bias = 0
    return weights, bias

data_x, data_y, z = get_dataset_binaire(xmax = 4) #Jeu de données sans bruit pour tester le modèle
w, b = init_variables()
print(w, b)

[ 0.36145548 -0.48962197] 0

def pre_activation(features, weights, bias):
    return np.dot(features, weights) + bias

z1 = pre_activation(np.column_stack((data_x,data_y)),w,b)

print("Comparaison du premier résultat obtenu avec pre-activation (z1[0]) et le détail de ce calcul : ", z1[0]) 
print(data_x[0], '*', w[0], '+', data_y[0], '*', w[1], '+', b, ' = ', 
      data_x[0] * w[0] + data_y[0] * w[1] + b)

Comparaison du premier résultat obtenu avec pre-activation (z1[0]) et le détail de ce calcul :  -0.1925033891049781
[2.25661875] * 0.36145548077204015 + [2.05907958] * -0.4896219723591414 + 0  =  [-0.19250339]

def activation(z):
    return 1 / (1 + np.exp(-z)) #La fonction sigmoide

z2 = activation(z1)
print("Valeur obtenue après transformation par la sigmoide de la valeur z1[0] affichée plus haut :", z2[0])
print("Prédiction associée après arrondi :", np.round(z2[0]))
print("Valeur réelle :", z[0])
#Estimation de la prediction
print("Précision (% de bonnes prédictions après activation et arrondi) : ", np.sum(z == np.round(z2)) / len(z) * 100, "%")
print("Coût :" , np.mean((np.round(z2) - z)**2))

Valeur obtenue après transformation par la sigmoide de la valeur z1[0] affichée plus haut : 0.4520222228880919
Prédiction associée après arrondi : 0.0
Valeur réelle : 0.0
Précision (% de bonnes prédictions après activation et arrondi) :  69.0 %
Coût : 0.31

def train(features, z, weights, bias):
    epochs = 100
    lr = 0.1
    for epoch in range(epochs):
        # Initialisation des gradients
        gradient_w = np.zeros(weights.shape)
        gradient_b = 0.
        for feature, target in zip(features, z):
            # prediction initiale
            z1 = pre_activation(feature, weights, bias)
            z2 = activation(z1)
            # MAJ gradients (le coeur de la méthode de la descente de gradients)
            gradient_w  += (z2 - target) * (z2 * (1 - z2 )) * feature # (z2 * (1 - z2 )) DERIVATION
            gradient_b  += (z2 - target) * (z2 * (1 - z2 ))  # (z2 * (1 - z2 )) DERIVATION
        # MAJ variables
        weights -= lr * gradient_w
        bias -= lr * gradient_b
        # Nouvelle prediction avec MAJ des variables (poids et biais)
        z1 = pre_activation(features, weights, bias)
        z2 = activation(z1)
        predictions = np.round(z2)
    # Résultat final et affichage de la précision
    z1 = pre_activation(features, weights, bias)
    z2 = activation(z1)
    predictions = np.round(z2)
    print("Précision : " , np.mean(predictions == z))
    return predictions

pred = train(np.column_stack((data_x,data_y)), z, w, b)
print(pred)

Précision :  1.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]

data_x, data_y, z = get_dataset_binaire(xmax = 4, bruit = .5)
w, b = init_variables()
pred = train(np.column_stack((data_x,data_y)), z, w, b)

Précision :  0.91

import numpy as np
import matplotlib.pyplot as plt

def get_dataset_xor(n = 50) :
    x1 = (np.random.rand(n, 1)) 
    y1 = (np.random.rand(n, 1)) 
    x2 = np.random.rand(n, 1) - 1
    y2 = np.random.rand(n, 1)  - 1
    x3 = np.random.rand(n, 1)
    y3 = np.random.rand(n, 1)  - 1
    x4 = np.random.rand(n, 1) - 1
    y4 = np.random.rand(n, 1) 
    x = np.vstack([x1, x2, x3, x4])
    y = np.vstack([y1, y2, y3, y4])
    bin = np.concatenate((np.zeros(2*n), np.zeros(2*n) + 1))
    return x, y, bin

data_x, data_y, z = get_dataset_xor()
plt.scatter(data_x, data_y, c=z, cmap=plt.cm.Spectral)
plt.show()

w, b = init_variables()
pred = train(np.column_stack((data_x,data_y)), z, w, b)

Précision :  0.545

pip install tensorflow

import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(6, activation='sigmoid', input_shape=(2,)),  # 6 neurones cachés
    tf.keras.layers.Dense(1, activation='sigmoid')  # 1 neurone de sortie
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(np.column_stack((data_x,data_y)), z, epochs=1000, verbose=0)

<keras.src.callbacks.history.History at 0x22ccb24ebd0>

loss, accuracy = model.evaluate(np.column_stack((data_x,data_y)), z, verbose=0)
print(f"Perte : {loss:.4f} - Précision : {accuracy:.4f}")

Perte : 0.5612 - Précision : 0.7700

predictions = model.predict(np.column_stack((data_x,data_y)))
print("Prédictions :", (predictions > 0.5).astype(int).flatten())
plt.scatter(data_x, data_y, c=np.round(predictions), cmap=plt.cm.Spectral)
plt.show()

7/7 ━━━━━━━━━━━━━━━━━━━━ 0s 8ms/step 
Prédictions : [0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 0
 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1]

fig, ax = plt.subplots()
xx, yy = np.meshgrid(np.linspace(-1, 1, 100), np.linspace(-1, 1, 100))
grid = np.c_[xx.ravel(), yy.ravel()]
predictions = model.predict(grid).reshape(xx.shape)

ax.clear()
ax.contourf(xx, yy, predictions, alpha=0.5, cmap='coolwarm')
ax.scatter(data_x, data_y, c=z, s=20, edgecolors='k', cmap='coolwarm')
ax.set_title("Frontière de décision XOR")

plt.show()

313/313 ━━━━━━━━━━━━━━━━━━━━ 0s 810us/step

Introduction IA : Deep learning, réseau de neurones et Tensorflow¶

La descente de gradient¶

Réseau à un neurone : perceptron¶

Réseau à N neurones : résoudre le XOR¶