conda install --channel conda-forge pysal

import geopandas as gpd
gdf = gpd.read_file('C:/Users/Serge/Desktop/PARIS/IRIS75C.shp')
gdf.head()

ax = gdf.plot(column='Revenu', scheme='Quantiles', k=5, cmap='GnBu')
ax.set_axis_off()

import libpysal as lps
wq = lps.weights.Queen.from_dataframe(gdf, use_index=False, silence_warnings=True)

wq = lps.weights.Queen.from_dataframe(gdf)

import numpy as np
np.array(list(wq.cardinalities.items()))[0:5,]

array([[0, 8],
       [1, 9],
       [2, 6],
       [3, 7],
       [4, 5]])

wq.islands

[]

wq.min_neighbors

1

wq.max_neighbors

15

wq[0]

{1: 1.0, 2: 1.0, 3: 1.0, 264: 1.0, 274: 1.0, 275: 1.0, 277: 1.0, 279: 1.0}

a = gdf[0:1].plot(color='red')
b = gdf.iloc[list(wq[0]),].plot(ax=a)
b.set_axis_off()

ax = gdf.plot(edgecolor='grey', facecolor='w', linewidth=0.5)
f,ax = wq.plot(gdf, ax=ax,
        edge_kws=dict(color='r', linestyle=':', linewidth=0.5),
        node_kws=dict(marker=''))
ax.set_axis_off()

wq.transform = 'r'

y = gdf['Revenu']
ylag = lps.weights.lag_spatial(wq, y)

import matplotlib.pyplot as plt
plt.plot(y, ylag, 'o', markersize = 2)
plt.ylabel('Spatial Lag Revenu')
plt.xlabel('Revenu')
plt.show()

wnn = lps.weights.KNN.from_dataframe(gdf, k=1, silence_warnings=True) #Plus proche voisin

ax = gdf.plot(edgecolor='grey', facecolor='w', linewidth=0.5)
f,ax = wnn.plot(gdf, ax=ax,
        edge_kws=dict(color='r', linestyle=':', linewidth=0.5),
        node_kws=dict(marker=''))
ax.set_axis_off()

b = gdf.centroid
bl = list(zip(b.x,b.y))
wt = lps.weights.DistanceBand(bl,threshold=500, silence_warnings=True) #Distance max 500m
wt2 = lps.weights.DistanceBand(bl,threshold=1000) #Distance max 1k

ax = gdf.plot(edgecolor='grey', facecolor='w', linewidth=0.5)
f,ax = wt.plot(gdf, ax=ax,
        edge_kws=dict(color='r', linestyle=':', linewidth=0.5),
        node_kws=dict(marker=''))
ax.set_axis_off()

ax = gdf.plot(edgecolor='grey', facecolor='w', linewidth=0.5)
f,ax = wt2.plot(gdf, ax=ax,
        edge_kws=dict(color='r', linestyle=':', linewidth=0.5),
        node_kws=dict(marker=''))
ax.set_axis_off()

wd = lps.weights.Delaunay(np.array(bl)) #triangulation Delaunay

ax = gdf.plot(edgecolor='grey', facecolor='w', linewidth=0.5)
f,ax = wd.plot(gdf, ax=ax,
        edge_kws=dict(color='r', linestyle=':', linewidth=0.5),
        node_kws=dict(marker=''))
ax.set_axis_off()

import esda
y = gdf['Revenu']
mi = esda.moran.Moran(y, wq)

print(mi.I)

0.7749098569647341

print(mi.p_norm)

0.0

ylag = lps.weights.lag_spatial(wq, y)
fig, ax = plt.subplots(1)
plt.plot(y, ylag, 'o', markersize = 1)
plt.vlines(y.mean(), ylag.min(), ylag.max(), linestyle='--') #Droite verticale representant la moyenne des revenus 
plt.hlines(ylag.mean(), y.min(), y.max(), linestyle='--') #Droite horizontale representant la moyenne des revenus voisins
import numpy as np
b, a = np.polyfit(y, ylag, 1)
plt.plot(y, a + b*y, 'r') # Droite de regression representant le I de Moran
plt.title('Diagramme de Moran')
plt.ylabel('Revenu voisin')
plt.xlabel('Revenu')
plt.show()

li = esda.moran.Moran_Local(y, wq)

li.Is[0:5]

array([-0.07378559,  0.43237012,  1.42078162,  2.03806609,  3.0821462 ])

li.q[0:5]

array([4, 3, 3, 3, 3])

(li.p_sim < 0.05).sum()

374

sig = 1 * (li.p_sim < 0.05)
hotspot = 1 * (sig * li.q==1)
coldspot = 3 * (sig * li.q==3)
doughnut = 2 * (sig * li.q==2)
diamond = 4 * (sig * li.q==4)
spots = hotspot + coldspot + doughnut + diamond

spot_labels = [ '0 ns', '1 hot spot', '2 doughnut', '3 cold spot', '4 diamond']
#labels = [spot_labels[i] for i in spots]
labels = []
for i in spots :
    labels = labels + [spot_labels[i]]
from matplotlib import colors
hmap = colors.ListedColormap([ 'lightgrey', 'red', 'pink', 'blue', 'lightblue'])
ax = gdf.assign(cl=labels).plot(column='cl', cmap=hmap, legend=True, edgecolor='white', linewidth=0.5)
ax.set_axis_off()
plt.show()

from spreg import OLS

import numpy as np
y = np.array(gdf['Revenu'])
x1 = np.array(gdf[['Tx_Mono','Tx_Ouvrier']])
ols1 = OLS(y,x1,nonspat_diag=False, name_y='Revenu', name_x=['Tx_Mono','Tx_Ouvrier'], name_ds='IRIS' )

print(ols1.summary)

REGRESSION RESULTS
------------------

SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :        IRIS
Weights matrix      :        None
Dependent Variable  :      Revenu                Number of Observations:         864
Mean dependent var  :  30374.5058                Number of Variables   :           3
S.D. dependent var  :  10322.4742                Degrees of Freedom    :         861
R-squared           :      0.5577
Adjusted R-squared  :      0.5566

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT     43544.51658       484.17904        89.93474         0.00000
             Tx_Mono    -18146.21392      2461.15257        -7.37306         0.00000
          Tx_Ouvrier    -171966.35415      8648.54530       -19.88385         0.00000
------------------------------------------------------------------------------------
================================ END OF REPORT =====================================

ols1.betas

array([[  43544.51658102],
       [ -18146.21392112],
       [-171966.35414997]])

ols1.u[0:5]

array([[ -194.81432627],
       [-1343.72174765],
       [-7718.83187525],
       [ 9223.62966982],
       [  603.41276394]])

ols2 = OLS(y, x1, wq, spat_diag=True, moran=True, name_y='Revenu', name_x=['Tx_Mono','Tx_Ouvrier'], name_w='Queen', name_ds='IRIS' )

print(ols2.summary)

REGRESSION RESULTS
------------------

SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :        IRIS
Weights matrix      :       Queen
Dependent Variable  :      Revenu                Number of Observations:         864
Mean dependent var  :  30374.5058                Number of Variables   :           3
S.D. dependent var  :  10322.4742                Degrees of Freedom    :         861
R-squared           :      0.5577
Adjusted R-squared  :      0.5566
Sum squared residual: 4.06761e+10                F-statistic           :    542.7218
Sigma-square        :47242904.651                Prob(F-statistic)     :  3.164e-153
S.E. of regression  :    6873.347                Log likelihood        :   -8858.252
Sigma-square ML     :47078866.787                Akaike info criterion :   17722.503
S.E of regression ML:   6861.4041                Schwarz criterion     :   17736.788

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT     43544.51658       484.17904        89.93474         0.00000
             Tx_Mono    -18146.21392      2461.15257        -7.37306         0.00000
          Tx_Ouvrier    -171966.35415      8648.54530       -19.88385         0.00000
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER            5.350

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2         414.413           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test                2           6.194           0.0452
Koenker-Bassett test              2           2.874           0.2377

DIAGNOSTICS FOR SPATIAL DEPENDENCE
TEST                           MI/DF       VALUE           PROB
Moran's I (error)              0.5632        28.292           0.0000
Lagrange Multiplier (lag)         1         787.057           0.0000
Robust LM (lag)                   1         118.642           0.0000
Lagrange Multiplier (error)       1         784.560           0.0000
Robust LM (error)                 1         116.145           0.0000
Lagrange Multiplier (SARMA)       2         903.202           0.0000

================================ END OF REPORT =====================================

spols = OLS(y, x1, wq, spat_diag=True, slx_lags=1 , name_y='Revenu', name_x=['Tx_Mono','Tx_Ouvrier'], name_w='Queen', name_ds='IRIS' )

print(spols.summary)

REGRESSION RESULTS
------------------

SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES WITH SPATIALLY LAGGED X (SLX)
-----------------------------------------------------------------------
Data set            :        IRIS
Weights matrix      :       Queen
Dependent Variable  :      Revenu                Number of Observations:         864
Mean dependent var  :  30374.5058                Number of Variables   :           5
S.D. dependent var  :  10322.4742                Degrees of Freedom    :         859
R-squared           :      0.6205
Adjusted R-squared  :      0.6187
Sum squared residual: 3.48957e+10                F-statistic           :    351.1505
Sigma-square        :40623596.168                Prob(F-statistic)     :  4.907e-179
S.E. of regression  :    6373.664                Log likelihood        :   -8792.035
Sigma-square ML     :40388505.913                Akaike info criterion :   17594.070
S.E of regression ML:   6355.1952                Schwarz criterion     :   17617.878

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     t-Statistic     Probability
------------------------------------------------------------------------------------
            CONSTANT     46567.92189       604.09852        77.08663         0.00000
             Tx_Mono    -16284.51444      2513.42976        -6.47900         0.00000
          Tx_Ouvrier    -81403.97447     11163.14897        -7.29221         0.00000
           W_Tx_Mono     -2845.20198      4543.59318        -0.62620         0.53135
        W_Tx_Ouvrier    -143986.80224     15367.49001        -9.36957         0.00000
------------------------------------------------------------------------------------

REGRESSION DIAGNOSTICS
MULTICOLLINEARITY CONDITION NUMBER           13.066

TEST ON NORMALITY OF ERRORS
TEST                             DF        VALUE           PROB
Jarque-Bera                       2         415.816           0.0000

DIAGNOSTICS FOR HETEROSKEDASTICITY
RANDOM COEFFICIENTS
TEST                             DF        VALUE           PROB
Breusch-Pagan test                4          17.548           0.0015
Koenker-Bassett test              4           8.062           0.0893

DIAGNOSTICS FOR SPATIAL DEPENDENCE
TEST                              DF       VALUE           PROB
Lagrange Multiplier (lag)         1         907.135           0.0000
Robust LM (lag)                   1          36.213           0.0000
Lagrange Multiplier (error)       1         871.097           0.0000
Robust LM (error)                 1           0.175           0.6761
Lagrange Multiplier (SARMA)       2         907.310           0.0000

================================ END OF REPORT =====================================

	OBJECTID	N_SQ_IR	C_CAINSEE	N_QU	C_IR	C_TYPEIR	L_IR	M2_IP	M2_POP	M2_EMP	SHAPE_Leng	SHAPE_Area	Tx_Mono	Tx_Ouvrier	Revenu	geometry
0	20	750004033	75118	69	751186922	Habitat	Grandes Carrières 22	69711.047044	42891.780556	69688.085258	1330.434903	80920.786958	0.1699	0.0517	31376.0	POLYGON ((651431.058 6866348.459, 651280.765 6...
1	21	750004043	75118	69	751186923	Habitat	Grandes Carrières 23	51939.257797	51939.489210	51939.257797	1103.360595	68661.340091	0.3265	0.0875	21229.0	POLYGON ((651067.528 6866226.862, 651010.998 6...
2	22	750004027	75118	69	751186924	Habitat	Grandes Carrières 24	49511.462977	43233.728108	47498.682647	1164.179080	73724.514027	0.3491	0.0701	17436.0	POLYGON ((651095.339 6866434.192, 651040.113 6...
3	23	750004026	75118	69	751186925	Habitat	Grandes Carrières 25	53456.911856	37176.576688	44169.508934	1615.787944	81608.212989	0.5014	0.1773	13180.0	POLYGON ((651779.725 6866558.266, 651508.519 6...
4	24	750004862	75118	69	751186926	Habitat	Grandes Carrières 26	100866.919597	38308.332796	83837.826331	2001.067712	176037.028979	0.5323	0.1397	10465.0	POLYGON ((651917.450 6866637.766, 651812.424 6...

Statistique spatiale dans Python : PySAL¶

LibPySAL : Voisinage et pondération spatiale¶

ESDA : Exploration des données - Autocorrélation spatiale¶

SPREG : Modélisation spatiale - Les régressions spatiales¶