import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input

# Données (on construit des donnees suivant une régression du type y = 3x + 2)
np.random.seed(0)
X = np.linspace(0, 10, 100).reshape(-1, 1)
y = 3 * X + 2 + np.random.normal(0, 0.5, size=(100, 1))

plt.plot(X, y, 'o', markersize=3)
plt.show()

# Modèle Keras : 1 neurone
model = Sequential([Dense(units=1, input_shape=(1,), activation='linear')])

C:\Users\Serge\anaconda3\Lib\site-packages\keras\src\layers\core\dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)

model.compile(optimizer='sgd', loss='mae')
# Entraînement
model.fit(X, y, epochs=200, verbose=0)

<keras.src.callbacks.history.History at 0x1947a3eeb10>

# Récupération des coefficients
weights, bias = model.layers[0].get_weights()
a = weights[0][0]  # pente
b = bias[0]        # intercept
print(f"Pente (a) estimée (proche de 3) : {a:.4f}")
print(f"Intercept (b) estimé (proche de 2) : {b:.4f}")

Pente (a) estimée (proche de 3) : 3.0041
Intercept (b) estimé (proche de 2) : 1.9644

predictions = model.predict( X )
plt.plot(X, y, 'o', markersize=3)
plt.plot(X, predictions)
plt.show()

4/4 ━━━━━━━━━━━━━━━━━━━━ 0s 15ms/step

score = model.evaluate(X, y, verbose=0)
print('loss  (MAE)    : {:5.4f}'.format(score))

loss  (MAE)    : 0.4066

vary = np.sum((y - np.mean(y))**2)

residuals = y - predictions
error_moy = np.sum(np.abs(residuals))/len(y)
rss = np.sum(residuals**2)

print("MAE : " + str(error_moy))
print("R2 : " + str(1 - (rss / vary)))

MAE : 0.40664207402808117
R2 : 0.9966167200359299

z = np.polyfit(list(X.flatten()), list(y.flatten()), 1)
print("Les paramètres du polynome : " + str(z)) #Pente puis intercept
y_pred = z[1] + z[0] * X
residuals = y - y_pred
rss = np.sum(residuals**2)
print("R2 : " + str(1 - (rss / vary)))

Les paramètres du polynome : [2.98513366 2.10423572]
R2 : 0.9966836581389286

# Données (on construit des donnees suivant une régression du type y = 3x1 + 6x2 + 2)
np.random.seed(0)
x1 = np.linspace(0, 10, 100).reshape(-1, 1)
x2 = np.random.rand(100,1) * 10
x2 = x2.reshape(-1,1)
X = np.hstack([x1,x2])
y = 3 * x1 + 6 * x2 + 2 + np.random.normal(0, 0.5, size=(100, 1))

# Modèle Keras : 1 neurone
model = Sequential([Dense(units=1, input_shape=(2,), activation='linear')]) #input_shape passe a 2
model.compile(optimizer='sgd', loss='mse')
model.fit(X, y, epochs=200, verbose=0)

<keras.src.callbacks.history.History at 0x1947d107190>

weights, bias = model.layers[0].get_weights()
a = weights[0][0]  # pente 1
b = weights[1][0]  # pente 2
c = bias[0]        # intercept
print(f"Pente (a) estimée (proche de 3) : {a:.4f}")
print(f"Pente (b) estimée (proche de 6) : {b:.4f}")
print(f"Intercept (c) estimé (proche de 2) : {c:.4f}")

Pente (a) estimée (proche de 3) : 2.9685
Pente (b) estimée (proche de 6) : 5.9776
Intercept (c) estimé (proche de 2) : 2.1146

# Données (polynôme degré 3 :  y = 1 + 2x - 0.5x^2 + 0.1x^3 + bruit)
np.random.seed(0)

X = np.linspace(-3, 3, 200).reshape(-1, 1)

y = (
    1
    + 2 * X
    - 0.5 * X**2
    + 0.1 * X**3
    + np.random.normal(0, 0.5, size=(200, 1))
)

# Construction des features polynomiales
X_poly = np.hstack([
    X,
    X**2,
    X**3
])

# Modèle : 1 neurone linéaire
model = Sequential([Dense(1, input_shape=(3,), activation='linear')])
model.compile(optimizer='sgd', loss='mse')
# Entraînement
model.fit(X_poly, y, epochs=100, verbose=0)
# Extraction des coefficients
weights, bias = model.layers[0].get_weights()

beta_1, beta_2, beta_3 = weights.flatten()
beta_0 = bias[0]

# Affichage
print(f"β0 (intercept) : {beta_0:.3f}")
print(f"β1 (x)         : {beta_1:.3f}")
print(f"β2 (x²)        : {beta_2:.3f}")
print(f"β3 (x³)        : {beta_3:.3f}")

β0 (intercept) : -9.535
β1 (x)         : 16.391
β2 (x²)        : 4.327
β3 (x³)        : 12.200

loss = model.evaluate(X_poly, y, verbose=0)
print(loss)

22295.36328125

# Une couche d'entrée et une couche de sortie de dimension 1 pour les prédictions entre les deux deux couches cachées de 12 neurones non linéaires
model = Sequential()
model.add(Input((3,), name="InputLayer")) #Entrée on definit juste la dimension des variables (3 variables X1,X2 et X3)
model.add(Dense(12, activation='relu', name='Dense_n1')) #Une première couche à 12 neurones 
model.add(Dense(12, activation='relu', name='Dense_n2')) #Une deuxième couche à 12 neurones 
model.add(Dense(1, name='Output')) #Une seule valeur de sortie (la prédiction du réseau a partir des 3 variables)

model.compile(optimizer = 'adam',
                loss      = 'mse',
                metrics   = ['mae', 'mse'] )

model.summary()

Model: "sequential_3"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ Dense_n1 (Dense)                     │ (None, 12)                  │              48 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ Dense_n2 (Dense)                     │ (None, 12)                  │             156 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ Output (Dense)                       │ (None, 1)                   │              13 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

 Total params: 217 (868.00 B)

 Trainable params: 217 (868.00 B)

 Non-trainable params: 0 (0.00 B)

history = model.fit(X_poly, y, epochs = 100, verbose = 0)

score = model.evaluate(X_poly, y,  verbose=0)

print('loss      : {:5.4f}'.format(score[0]))
print('mae       : {:5.4f}'.format(score[1]))
print('mse       : {:5.4f}'.format(score[2]))

loss      : 0.4707
mae       : 0.5575
mse       : 0.4707

X1 = np.array([[-1.6342]]).reshape(-1, 1)
y1 = (1 + 2 * X1 - 0.5 * X1**2 + 0.1 * X1**3)
X_poly1 = np.hstack([X1, X1**2, X1**3])
predictions = model.predict( X_poly1 )

print("Prediction : " + str(predictions[0][0]))
print("Reality    : " + str(format(y1[0][0])))

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 81ms/step
Prediction : -3.348153
Reality    : -4.0401358473688

vary = np.sum((y - np.mean(y))**2)
y_pred = model.predict( X_poly )

residuals = y - y_pred
error_moy = np.sum(np.abs(residuals)) / len(y)
rss = np.sum(residuals**2)

print("MAE : " + str(error_moy))
print("R2 : " + str(1 - (rss / vary)))

7/7 ━━━━━━━━━━━━━━━━━━━━ 0s 4ms/step 
MAE : 0.557464841609098
R2 : 0.9781682824716684

import matplotlib.pyplot as plt

loss_valeurs = history.history['loss']
epoches = range(1, len(loss_valeurs) + 1)

plt.figure(figsize=(8, 5))
plt.plot(epoches, loss_valeurs, 'b-')
plt.title("Fonction de Loss pendant l'entraînement")
plt.xlabel('Époques')
plt.ylabel('Erreur (Loss)')
plt.grid(True)

plt.show()

z = np.polyfit(list(X.flatten()), list(y.flatten()), 3)
print("Les paramètres du polynome : " + str(z)) #Plus haut degres en premier

Les paramètres du polynome : [ 0.08049601 -0.49523311  2.09081018  1.02101087]

y_pred = z[3] + z[2] * X + z[1] * X**2 + z[0] * X**3

residuals = y - y_pred
error_moy = np.sum(np.abs(residuals)) / len(y)
rss = np.sum(residuals**2)

print(error_moy)
print("R2 : " + str(1 - (rss / vary)))

0.4155176840113402
R2 : 0.988246588509612

adresse = "boston.csv"

import pandas as pd
df = pd.read_csv(adresse,sep =',')
df.head()

#On decoupe les donnees en donnees d'entrainement et de test en mélangeant celles-ci
data = df.sample(frac=1., axis=0)
data_train = data.sample(frac=0.7, axis=0)
data_test  = data.drop(data_train.index)
#On separe x et y
x_train = data_train.drop('medv',  axis=1)
y_train = data_train['medv']
x_test  = data_test.drop('medv',   axis=1)
y_test  = data_test['medv']

#On normalise X
mean = x_train.mean()
std  = x_train.std()
x_train = (x_train - mean) / std
x_test  = (x_test  - mean) / std
#On passe en array
x_train, y_train = np.array(x_train), np.array(y_train)
x_test,  y_test  = np.array(x_test),  np.array(y_test)

#Calcul de la variance a reconstituer
vary = np.sum((y_test - np.mean(y_test))**2)

from scipy.stats import t, f
from scipy.stats import norm

def reg(X, y, names, verbose=True) :
    # === 1. Ajouter une constante pour l’intercept ===
    X_ = np.c_[np.ones(len(X)), X]  # colonne de 1 pour le biais
    # === 2. Calcul des coefficients OLS : β̂ = (XᵀX)⁻¹ Xᵀy ===
    XtX_inv = np.linalg.inv(X_.T @ X_)
    beta_hat = XtX_inv @ X_.T @ y
    # === 3. Calcul des résidus ===
    y_pred = X_ @ beta_hat
    residuals = y - y_pred
    # === 4. Variance résiduelle : σ² = RSS / (n - p) ===
    n, p = X_.shape  # p = nb de paramètres (incluant l’intercept)
    RSS = np.sum(residuals**2)
    sigma2 = RSS / (n - p)
    # === 5. Variance des coefficients : Var(β̂) = σ² * (XᵀX)⁻¹ ===
    var_beta = sigma2 * XtX_inv
    se_beta = np.sqrt(np.diag(var_beta))  # erreurs standard
    # === 6. t-statistics : t = β̂ / SE(β̂) ===
    t_stats = beta_hat / se_beta
    # === 7. p-values (bilatérales) ===
    p_values = 2 * (1 - t.cdf(np.abs(t_stats), df=n - p))
    # === 8. R2 ===
    r2 = 1 - RSS / np.sum((y - np.mean(y))**2)
    k = p - 1
    F = (r2 / k) / ((1 - r2) / (n - k - 1))
    p_value = 1 - f.cdf(F, k, n - k - 1)
    # === Affichage ===
    if verbose :
        for i, (b, se, tval, pval) in enumerate(zip(beta_hat, se_beta, t_stats, p_values)):
            if i == 0 : print(f"cst: {b:.4f}, SE={se:.4f}, t={tval:.4f}, p={pval:.4f}")
            if i != 0 : print(f"{names[i-1]}: {b:.4f}, SE={se:.4f}, t={tval:.4f}, p={pval:.4f}")
        print(f"\nR² = {1 - RSS / np.sum((y - np.mean(y))**2):.4f}")
    return beta_hat, se_beta, t_stats, p_values, r2, p_value

noms = df.columns
res = reg(x_train, y_train, noms)

cst: 22.3819, SE=0.2577, t=86.8399, p=0.0000
crim: -0.9784, SE=0.3427, t=-2.8546, p=0.0046
zn: 0.8455, SE=0.3727, t=2.2683, p=0.0239
indus: 0.2121, SE=0.5103, t=0.4156, p=0.6780
chas: 0.8184, SE=0.2661, t=3.0757, p=0.0023
nox: -2.1003, SE=0.5356, t=-3.9212, p=0.0001
rm: 2.8640, SE=0.3526, t=8.1217, p=0.0000
age: 0.5534, SE=0.4417, t=1.2529, p=0.2111
dis: -2.7938, SE=0.4935, t=-5.6616, p=0.0000
rad: 2.7411, SE=0.7272, t=3.7693, p=0.0002
tax: -1.7197, SE=0.8028, t=-2.1421, p=0.0329
ptratio: -2.1600, SE=0.3370, t=-6.4093, p=0.0000
b: 1.0519, SE=0.3075, t=3.4214, p=0.0007
lstat: -4.0640, SE=0.4258, t=-9.5444, p=0.0000

R² = 0.7398

X_ = np.c_[np.ones(len(x_test)), x_test]
y_pred = X_ @ res[0]
ypred = res[-1]
print("Pred : ", y_pred[10], "Value : ", y_test[10])
print("Pred : ", y_pred[11], "Value : ", y_test[11])

Pred :  8.196582272310376 Value :  5.0
Pred :  31.389964640513373 Value :  31.1

residuals = y_test - y_pred
error_moy = np.sum(np.abs(residuals)) / len(y_test)
rss = np.sum(residuals**2)
print("MAE : " + str(error_moy))
print("R2 : " + str(1 - (rss / vary)))

MAE : 3.508086014387225
R2 : 0.7258450492857227

model = Sequential()
model.add(Input((13,), name="InputLayer"))
model.add(Dense(32, activation='relu', name='Dense_n1'))
model.add(Dense(64, activation='relu', name='Dense_n2'))
model.add(Dense(32, activation='relu', name='Dense_n3'))
model.add(Dense(1, name='Output'))
  
model.compile(optimizer = 'adam', loss = 'mse', metrics = ['mae', 'mse'] )

model.fit(x_train, y_train, epochs = 200, verbose = False, validation_data = (x_test, y_test))

<keras.src.callbacks.history.History at 0x1947e63f250>

score = model.evaluate(x_test, y_test, verbose=0)
print('loss      : {:5.4f}'.format(score[0]))
print('mae       : {:5.4f}'.format(score[1]))
print('mse       : {:5.4f}'.format(score[2]))

loss      : 12.2917
mae       : 2.3448
mse       : 12.2917

predictions = model( x_test )
print("Prediction : {:.2f}".format(predictions[0][0]))
print("Value : ", y_test[0])
print("Prediction : {:.2f}".format(predictions[1][0]))
print("Value : ", y_test[1])

Prediction : 18.30
Value :  22.0
Prediction : 9.17
Value :  13.4

residuals = np.array(np.array(predictions).flatten()) - y_test
error_moy = np.sum(np.abs(residuals))/len(y_test)
rss = np.sum(residuals**2)
print("MAE : " + str(error_moy))
print("R2 : " + str(1 - (rss / vary)))

MAE : 2.344783627359491
R2 : 0.8438493496996629

from sklearn.neural_network import MLPRegressor

regr = MLPRegressor(random_state=1, max_iter=2000, tol=0.1) # Une couche de 100 neurones
regr.fit(x_train, y_train)
regr.predict(x_test)
regr.score(x_test, y_test)

0.6221644268051829

# Deux couches de neurones sont en théorie plus adaptées à notre problème
regr = MLPRegressor(random_state=1, hidden_layer_sizes=(50,50), max_iter=2000, tol=0.1) 
regr.fit(x_train, y_train)
regr.predict(x_test)
regr.score(x_test, y_test)

0.69868613458768

from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(x_train, y_train)
regr.predict(x_test)
regr.score(x_test, y_test)

0.7812232553842731

from sklearn.ensemble import GradientBoostingRegressor
regr = GradientBoostingRegressor(random_state=0)
regr.fit(x_train, y_train)
regr.predict(x_test)
regr.score(x_test, y_test)

0.868236377908265

from sklearn.ensemble import HistGradientBoostingRegressor #Pour de grands jeu de données
HistGradientBoostingRegressor().fit(x_train, y_train)
regr.predict(x_test)
regr.score(x_test, y_test)

0.868236377908265

from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))
regr.fit(x_train, y_train)
regr.predict(x_test)
regr.score(x_test, y_test)

0.620450119313478

	crim	zn	indus	nox	rm	age	dis	rad	tax	ptratio	b	lstat	medv
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1	296	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2	242	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3	222	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3	222	18.7	396.90	5.33	36.2

Introduction au réseau de neurones : de la simple régression à l'économétrie¶

Régression linéaire simple et réseau de neurones¶

Régression linéaire multiple et réseau de neurones¶

Polynôme et réseau de neurones¶

Exemple pratique : Prédiction des prix de l'immobilier à Boston¶

L'alternative Scikit-learn¶