pip install rpy2

import rpy2.robjects as ro
print(ro.r('R.version.string'))

setx PATH "%PATH%;C:\Program Files\R\R-4.3.1\bin"

import os
os.environ["R_HOME"] = "C:/Program Files/R/R-4.3.1"

import rpy2.robjects as ro
print(ro.r('R.version.string'))

[1] "R version 4.3.1 (2023-06-16 ucrt)"

from rpy2.robjects import r, IntVector
ma_liste = [10, 20, 30, 40, 50] # Liste R
r_liste = IntVector(ma_liste) # Possibilité d'utiliser FloatVector()
resume = r['summary'](r_liste) # Appel de la fonction summury sur le vecteur/liste R 
print(resume)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     10      20      30      30      40      50

import pandas as pd
from rpy2.robjects import pandas2ri, r
# Activer la conversion automatique pandas -> R
pandas2ri.activate()
# DataFrame en Python
data = pd.DataFrame({
    'x': [1, 2, 3, 4, 5],
    'y': [3, 5, 7, 9, 10]
})
# Convertir en DataFrame R
r_dataframe = pandas2ri.py2rpy(data)

# Faire la régression linéaire avec la fonction lm
modele = r['lm'](formula='y ~ x', data=r_dataframe)
# Afficher le résumé du modèle
resume = r['summary'](modele)
print(resume)
# Recupérer les coefficents de la regession avec la fonction coefficients
coefficients = r['coefficients'](modele)
print("Coefficients :")
print(coefficients)
# Recupérer le R2 avec la méthode rx2
r_squared = resume.rx2('r.squared')[0]
print(f"\nR² : {r_squared}")

Call:
(function (formula, data, subset, weights, na.action, method = "qr", 
    model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, 
    contrasts = NULL, offset, ...) 
{
    ret.x <- x
    ret.y <- y
    cl <- match.call()
    mf <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "weights", "na.action", 
        "offset"), names(mf), 0L)
    mf <- mf[c(1L, m)]
    mf$drop.unused.levels <- TRUE
    mf[[1L]] <- quote(stats::model.frame)
    mf <- eval(mf, parent.frame())
    if (method == "model.frame") 
        return(mf)
    else if (method != "qr") 
        warning(gettextf("method = '%s' is not supported. Using 'qr'", 
            method), domain = NA)
    mt <- attr(mf, "terms")
    y <- model.response(mf, "numeric")
    w <- as.vector(model.weights(mf))
    if (!is.null(w) && !is.numeric(w)) 
        stop("'weights' must be a numeric vector")
    offset <- model.offset(mf)
    mlm <- is.matrix(y)
    ny <- if (mlm) 
        nrow(y)
    else length(y)
    if (!is.null(offset)) {
        if (!mlm) 
            offset <- as.vector(offset)
        if (NROW(offset) != ny) 
            stop(gettextf("number of offsets is %d, should equal %d (number of observations)", 
                NROW(offset), ny), domain = NA)
    }
    if (is.empty.model(mt)) {
        x <- NULL
        z <- list(coefficients = if (mlm) matrix(NA_real_, 0, 
            ncol(y)) else numeric(), residuals = y, fitted.values = 0 * 
            y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w != 
            0) else ny)
        if (!is.null(offset)) {
            z$fitted.values <- offset
            z$residuals <- y - offset
        }
    }
    else {
        x <- model.matrix(mt, mf, contrasts)
        z <- if (is.null(w)) 
            lm.fit(x, y, offset = offset, singular.ok = singular.ok, 
                ...)
        else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok, 
            ...)
    }
    class(z) <- c(if (mlm) "mlm", "lm")
    z$na.action <- attr(mf, "na.action")
    z$offset <- offset
    z$contrasts <- attr(x, "contrasts")
    z$xlevels <- .getXlevels(mt, mf)
    z$call <- cl
    z$terms <- mt
    if (model) 
        z$model <- mf
    if (ret.x) 
        z$x <- x
    if (ret.y) 
        z$y <- y
    if (!qr) 
        z$qr <- NULL
    z
})(formula = "y ~ x", data = structure(list(x = 1:5, y = c(3L, 
5L, 7L, 9L, 10L)), class = "data.frame", row.names = c("0", "1", 
"2", "3", "4")))

Residuals:
         0          1          2          3          4 
-2.000e-01 -2.026e-15  2.000e-01  4.000e-01 -4.000e-01 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.4000     0.3830   3.656 0.035353 *  
x             1.8000     0.1155  15.588 0.000574 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3651 on 3 degrees of freedom
Multiple R-squared:  0.9878,	Adjusted R-squared:  0.9837 
F-statistic:   243 on 1 and 3 DF,  p-value: 0.0005737


Coefficients :
[1.4 1.8]

R² : 0.9878048780487805

from rpy2.robjects import r

r('''
    # Définir un vecteur
    x <- c(1, 2, 3, 4, 5)
    
    # Calculer la moyenne
    moy <- mean(x)
    
    # Afficher la moyenne
    print(moy)
''')

[1] 3

moyenne = r('moy')[0]
print(f"La moyenne est : {moyenne}")

La moyenne est : 3.0

data = pd.DataFrame({
    'x': [1, 2, 3, 4, 5],
    'y': [3, 5, 7, 9, 10]
})
r_dataframe = pandas2ri.py2rpy(data)
r.assign('df', r_dataframe)
r('''
    x <- df$x
    y <- df$y
    reg <- lm(y ~ x)
    print(summary(reg))
''')

Call:
lm(formula = y ~ x)

Residuals:
         1          2          3          4          5 
-2.000e-01 -2.026e-15  2.000e-01  4.000e-01 -4.000e-01 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.4000     0.3830   3.656 0.035353 *  
x             1.8000     0.1155  15.588 0.000574 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3651 on 3 degrees of freedom
Multiple R-squared:  0.9878,	Adjusted R-squared:  0.9837 
F-statistic:   243 on 1 and 3 DF,  p-value: 0.0005737

r('''
    coef <- coefficients(reg)
    r2 <- summary(reg)$r.squared
''')
c = r('coef')
print("Coefficients  : ",c)
r2 = r('r2')
print("R2 : ", r2[0])

Coefficients  :  [1.4 1.8]
R2 :  0.9878048780487805

from rpy2.robjects import r
from rpy2.robjects.packages import importr
r('.libPaths("C:/Users/Serge/Documents/R/win-library/4.3")')
importr('sf')
importr('sp')
r('''
    adresse <- 'C:/Users/Serge/Desktop/Donnees/Departements.shp'
    dept <- st_read(adresse)
    print(head(dept))
''')

Reading layer `Departements' from data source 
  `C:\Users\Serge\Desktop\Donnees\Departements.shp' using driver `ESRI Shapefile'
Simple feature collection with 96 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 99227.52 ymin: 6049649 xmax: 1242375 ymax: 7110525
Projected CRS: RGF93 v1 / Lambert-93
Simple feature collection with 6 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 644569.2 ymin: 6272483 xmax: 1077506 ymax: 6997001
Projected CRS: RGF93 v1 / Lambert-93
  ID_GEOFLA CODE_DEPT                NOM_DEPT         NOM_CHF CODE_REG
1         1        01                     AIN BOURG-EN-BRESSE       82
2         2        02                   AISNE            LAON       22
3         3        03                  ALLIER         MOULINS       83
4         4        04 ALPES-DE-HAUTE-PROVENCE DIGNE-LES-BAINS       93
5         5        05            HAUTES-ALPES             GAP       93
6         6        06         ALPES-MARITIMES            NICE       93
                  NOM_REGION Pop_hommes Pop_femmes Pop_Etrang Pop_totale
1                RHONE-ALPES     287336     294023      45109     581355
2                   PICARDIE     262006     276794      13447     538790
3                   AUVERGNE     164392     178416       9394     342807
4 PROVENCE-ALPES-COTE-D'AZUR      76580      81387       6388     157965
5 PROVENCE-ALPES-COTE-D'AZUR      65494      68714       4192     134205
6 PROVENCE-ALPES-COTE-D'AZUR     511716     572712     100388    1084428
  Naissances  Deces Solde_nat Solde_mig Tx_Etrange Superfice population
1      61000  37114     23886     41991     0.0776  5784.556     581355
2      62669  48087     14582    -11105     0.0250  7419.598     538790
3      30652  38838     -8186      6378     0.0274  7380.145     342807
4      13741  13972      -231     18513     0.0404  6995.771     157965
5      12882  10793      2089     10485     0.0312  5689.465     134205
6     102510 101409      1101     71461     0.0926  4295.464    1084428
                        geometry
1 MULTIPOLYGON (((919195 6541...
2 MULTIPOLYGON (((735604.5 68...
3 MULTIPOLYGON (((753768.9 65...
4 MULTIPOLYGON (((992637.8 63...
5 MULTIPOLYGON (((1012912 640...
6 MULTIPOLYGON (((1018256 627...

r('''
    plot(dept$geometry)
''')

r('''
png("C:/Users/Serge/Desktop/premiere-carte.png", width = 800, height = 600)  # Ouvre un dispositif graphique PNG
plot(dept$geometry)
dev.off()  # Ferme le dispositif graphique
''')
# Chargement de l'image dans Python
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img = mpimg.imread("C:/Users/Serge/Desktop/premiere-carte.png")
plt.imshow(img)
plt.axis("off")  # Masquer les axes
plt.show()

import pandas as pd
from rpy2.robjects import pandas2ri
shape = r('data.frame(dept)')
pd_from_r_df = pandas2ri.rpy2py(shape)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[12], line 4
      2 from rpy2.robjects import pandas2ri
      3 shape = r('data.frame(dept)')
----> 4 pd_from_r_df = pandas2ri.rpy2py(shape)

File ~\anaconda3\Lib\functools.py:909, in singledispatch.<locals>.wrapper(*args, **kw)
    905 if not args:
    906     raise TypeError(f'{funcname} requires at least '
    907                     '1 positional argument')
--> 909 return dispatch(args[0].__class__)(*args, **kw)

File ~\anaconda3\Lib\site-packages\rpy2\robjects\pandas2ri.py:332, in rpy2py_dataframe(obj)
    330 rpy2py = conversion.get_conversion().rpy2py
    331 colnames_lst = []
--> 332 od = OrderedDict(
    333     (i, rpy2py(col) if isinstance(col, rinterface.SexpVector) else col)
    334     for i, col in enumerate(_flatten_dataframe(obj, colnames_lst))
    335 )
    337 res = pandas.DataFrame.from_dict(od)
    338 res.columns = tuple('.'.join(_) if isinstance(_, list) else _ for _ in colnames_lst)

File ~\anaconda3\Lib\site-packages\rpy2\robjects\pandas2ri.py:332, in <genexpr>(.0)
    330 rpy2py = conversion.get_conversion().rpy2py
    331 colnames_lst = []
--> 332 od = OrderedDict(
    333     (i, rpy2py(col) if isinstance(col, rinterface.SexpVector) else col)
    334     for i, col in enumerate(_flatten_dataframe(obj, colnames_lst))
    335 )
    337 res = pandas.DataFrame.from_dict(od)
    338 res.columns = tuple('.'.join(_) if isinstance(_, list) else _ for _ in colnames_lst)

File ~\anaconda3\Lib\site-packages\rpy2\robjects\pandas2ri.py:319, in _flatten_dataframe(obj, colnames_lst)
    317 col = obj[i]
    318 if isinstance(col, ListSexpVector):
--> 319     _ = _records_to_columns(col)
    320     colnames_lst.extend((n, subn) for subn in _.keys())
    321     for subcol in _.values():

File ~\anaconda3\Lib\site-packages\rpy2\robjects\pandas2ri.py:274, in _records_to_columns(obj)
    272 for i, record in enumerate(obj_ri):
    273     checknames = set()
--> 274     for name in record.names:
    275         if name in checknames:
    276             raise ValueError(
    277                 f'The record {i} has "{name}" duplicated.'
    278             )

TypeError: 'NULLType' object is not iterable

r('''
dept_df <- data.frame(dept)
dept_df$geometry <- st_as_text(dept_df$geometry)
''')
shape = r('dept_df')
pd_from_r_df = pandas2ri.rpy2py(shape)

print(pd_from_r_df.columns)

Index(['ID_GEOFLA', 'CODE_DEPT', 'NOM_DEPT', 'NOM_CHF', 'CODE_REG',
       'NOM_REGION', 'Pop_hommes', 'Pop_femmes', 'Pop_Etrang', 'Pop_totale',
       'Naissances', 'Deces', 'Solde_nat', 'Solde_mig', 'Tx_Etrange',
       'Superfice', 'population', 'geometry'],
      dtype='object')

from shapely import wkt
pd_from_r_df['geometry'] = pd_from_r_df['geometry'].apply(wkt.loads)

import geopandas as gpd
gdf = gpd.GeoDataFrame(pd_from_r_df, geometry='geometry')
gdf.plot()

<Axes: >

import matplotlib.pyplot as plt
b = gdf.centroid
fig, ax = plt.subplots()
gdf.plot(ax=ax, color='white', edgecolor='black', linewidth=0.2)
b.plot(ax=ax, marker='o', color='red', markersize=3)
plt.show()

gdf = gpd.read_file('C:/Users/Serge/Desktop/Reseau/Villes.shp')
gdf.plot()
df = pd.DataFrame(gdf.drop(columns='geometry')) # Transformation du geoDataFrame en dataFrame sans la colonne geometry
df['geometry'] = gdf['geometry'].apply(str) # Ajout du champ geometry au format texte
pandas2ri.activate() # Activer la conversion automatique entre pandas et R
py_gdf = pandas2ri.py2rpy(df) # DataFrame de Python vers R
r.assign("py_gdf", py_gdf) #Import du DataFrame dans R

r('''
py_gdf$geometry <- st_as_sfc(py_gdf$geometry)  # Ajuster le CRS si nécessaire st_as_sfc(py_gdf$geometry, crs = 27572)
sf_object <- st_as_sf(py_gdf)
buf <- st_buffer(sf_object, dist=50000)
''')

r('''
png("C:/Users/Serge/Desktop/carte.png", width = 800, height = 600)
plot(buf$geometry)
plot(sf_object$geometry, add=TRUE)
dev.off() 
''')
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img = mpimg.imread("C:/Users/Serge/Desktop/carte.png")
plt.imshow(img)
plt.title("Carte")
plt.axis("off") 
plt.show()

#-------------------- Partie Import des données Python vers R --------------------
import geopandas as gpd
import pandas as pd
from rpy2.robjects import r, pandas2ri

r('.libPaths("C:/Users/Serge/Documents/R/win-library/4.3")')
importr('sf')
importr('sp')

pandas2ri.activate()
gdf = gpd.read_file('C:/Users/Serge/Desktop/Reseau/Villes.shp')
df = pd.DataFrame(gdf.drop(columns='geometry'))
df['geometry'] = gdf['geometry'].apply(str)
py_gdf = pandas2ri.py2rpy(df)
r.assign("py_gdf", py_gdf)

r('''
py_gdf$geometry <- st_as_sfc(py_gdf$geometry, crs = 27572) 
sf_object <- st_as_sf(py_gdf)
sp_object <- as_Spatial(sf_object)
''')

#-------------------- Partie Fonction K de Ripley --------------------
importr('spatstat')

r('''
    x <- sp_object$coords.x1
    y <- sp_object$coords.x2
    minx <- min(x)
    maxx <- max(x)
    miny <- min(y)
    maxy <- max(y)
    ppp <- as.ppp(data.frame(x,y), owin(c(minx,maxx), c(miny,maxy)))
    K <- Kest(ppp, correction='isotropic')
''')

r('''
    png("C:/Users/Serge/Desktop/K.png", width = 800, height = 600) 
    plot(K)
    dev.off() 
''')

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

img = mpimg.imread("C:/Users/Serge/Desktop/K.png")
plt.imshow(img)
plt.title("Ripley K")
plt.axis("off")  # Masquer les axes
plt.show()

#-------------------- Partie Fonction K de Ripley avec enveloppe --------------------
importr('dbmss')
r('''
    wmppp <- as.wmppp(ppp)
    K_env <- KEnvelope(wmppp)
    png("C:/Users/Serge/Desktop/K_env.png", width = 800, height = 600) 
    plot(K_env)
    dev.off() 
''')
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

img = mpimg.imread("C:/Users/Serge/Desktop/K_env.png")
plt.imshow(img)
plt.title("Ripley K")
plt.axis("off")  # Masquer les axes
plt.show()

Generating 100 simulations by evaluating expression  ...
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,  100.

Done.

importr('GWmodel')
importr('sp')

gdf = gpd.read_file('C:/Users/Serge/Desktop/PARIS/IRIS75C.shp')
df = pd.DataFrame(gdf.drop(columns='geometry'))
df['geometry'] = gdf['geometry'].apply(str)
py_gdf = pandas2ri.py2rpy(df)
r.assign("py_gdf", py_gdf)

r('''
py_gdf$geometry <- st_as_sfc(py_gdf$geometry, crs = 27572) 
iris75 <- st_as_sf(py_gdf)
rev <- iris75$Revenu
mono <- iris75$Tx_Mono
lrev <- log(rev) 
reg <- lm(lrev ~ mono)
irispa <- as_Spatial(iris75)
dist <- gw.dist(dp.locat=coordinates(irispa))
bw0 <- bw.gwr(reg, data=irispa, approach="AIC", kernel="bisquare", adaptive=TRUE, dMat=dist)
rgeo <- gwr.robust(reg, data=irispa, bw=bw0, kernel="bisquare", filtered=FALSE, adaptive=TRUE, dMat=dist)
#summary(rgeo)
r2 <- rgeo$SDF$Local_R2
a <- rgeo$SDF$mono
''')

Adaptive bandwidth (number of nearest neighbours): 541 AICc value: -209.4885 
Adaptive bandwidth (number of nearest neighbours): 342 AICc value: -312.853 
Adaptive bandwidth (number of nearest neighbours): 218 AICc value: -415.915 
Adaptive bandwidth (number of nearest neighbours): 142 AICc value: -518.7867 
Adaptive bandwidth (number of nearest neighbours): 94 AICc value: -612.9181 
Adaptive bandwidth (number of nearest neighbours): 65 AICc value: -679.7402 
Adaptive bandwidth (number of nearest neighbours): 46 AICc value: -719.5343 
Adaptive bandwidth (number of nearest neighbours): 35 AICc value: -740.1084 
Adaptive bandwidth (number of nearest neighbours): 28 AICc value: -750.2609 
Adaptive bandwidth (number of nearest neighbours): 23 AICc value: -750.5761 
Adaptive bandwidth (number of nearest neighbours): 21 AICc value: -744.107 
Adaptive bandwidth (number of nearest neighbours): 25 AICc value: -751.1441 
Adaptive bandwidth (number of nearest neighbours): 26 AICc value: -750.6209 
Adaptive bandwidth (number of nearest neighbours): 24 AICc value: -750.7929 
Adaptive bandwidth (number of nearest neighbours): 25 AICc value: -751.1441

a = r('a')
ax = gdf.assign(a=a).plot(column='a', scheme='equal_interval', k=7, cmap='coolwarm', legend=True, edgecolor='white', linewidth=0.2, legend_kwds={"loc": "lower right"})
ax.set_axis_off()
plt.show()
r2 = r('r2')
ax = gdf.assign(r2=r2).plot(column='r2', scheme='equal_interval', k=4, cmap='Reds', legend=True, edgecolor='white', linewidth=0.2, legend_kwds={"loc": "lower right"})
ax.set_axis_off()
plt.show()

import mapclassify as mc
# Définition des seuils personnalisés
bins = [-4, -2, -1, 0, 1, 2, 4] 

# Appliquer la classification sur une colonne spécifique
gdf['a'] = a
gdf['classe'] = mc.UserDefined(gdf['a'], bins).yb

# Afficher la carte
gdf.plot(column="classe", cmap="coolwarm")

<Axes: >

Coupler Python et R : Rpy2¶

Les bases pour interagir avec R¶

Jongler entre les données spatiales Python (GeoDataFrame) et R (SF)¶

Enrichir Python de fonctions de statistiques spatiales avancées¶