url = "https://www.dropbox.com/s/kf9dldnqgo4eidu/agregados_semana_municipio.pkl?dl=1"
r = requests.get(url, allow_redirects=True)
open('/content/semana_municipio.pkl', 'wb').write(r.content)

url = "https://www.dropbox.com/s/46h5pnrhgnw9qca/pca_vulnerability.csv?dl=1"
r = requests.get(url, allow_redirects=True)
open('/content/pca_vulnerability.csv', 'wb').write(r.content)

url = "https://www.dropbox.com/s/md8gl5oy3mpequg/municipios.gpkg?dl=1"
r = requests.get(url, allow_redirects=True)
open('/content/municipios.gpkg', 'wb').write(r.content)


%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from libpysal.weights.contiguity import Queen
import giddy
import numpy as np
import pandas as pd
import geopandas as gpd
import splot
from splot.esda import moran_scatterplot
from esda.moran import Moran, Moran_Local, Moran_Local_BV
from splot.esda import lisa_cluster
import seaborn as sns
import contextily as ctx
import pickle5 as pickle
plt.style.use('ggplot')


with open("data/semana_municipio.pkl", "rb") as fh:
  confirmados_por_semana = pickle.load(fh)

confirmados_por_semana = confirmados_por_semana.groupby(['CLAVE_MUNICIPIO_RES','FECHA_SINTOMAS'])[['Nuevos Casos']].sum()
confirmados_por_semana_wide = (confirmados_por_semana
                   .reset_index()
                   .pivot_table("Nuevos Casos", "CLAVE_MUNICIPIO_RES", "FECHA_SINTOMAS")
                  )
muns_geo = gpd.read_file("data/municipios.gpkg")
confirmados_por_semana_wide = (confirmados_por_semana_wide
                   .merge(muns_geo, left_on='CLAVE_MUNICIPIO_RES', right_on='municipio_cvegeo', how='right')
                   .fillna(0))
confirmados_por_semana_wide.head()


confirmados_por_semana_wide = confirmados_por_semana_wide.drop([1986], axis=0) # quitamos Cozumel
w = Queen.from_dataframe(confirmados_por_semana_wide)
w.T = 'r' # estandarizamos por fila

/home/plablo/miniconda3/envs/geoinformatica/lib/python3.7/site-packages/libpysal/weights/weights.py:172: UserWarning: The weights matrix is not fully connected: 
 There are 2 disconnected components.
  warnings.warn(message)


list(confirmados_por_semana_wide.columns)

[Timestamp('2020-01-05 00:00:00'),
 Timestamp('2020-01-12 00:00:00'),
 Timestamp('2020-01-19 00:00:00'),
 Timestamp('2020-01-26 00:00:00'),
 Timestamp('2020-02-02 00:00:00'),
 Timestamp('2020-02-09 00:00:00'),
 Timestamp('2020-02-16 00:00:00'),
 Timestamp('2020-02-23 00:00:00'),
 Timestamp('2020-03-01 00:00:00'),
 Timestamp('2020-03-08 00:00:00'),
 Timestamp('2020-03-15 00:00:00'),
 Timestamp('2020-03-22 00:00:00'),
 Timestamp('2020-03-29 00:00:00'),
 Timestamp('2020-04-05 00:00:00'),
 Timestamp('2020-04-12 00:00:00'),
 Timestamp('2020-04-19 00:00:00'),
 Timestamp('2020-04-26 00:00:00'),
 Timestamp('2020-05-03 00:00:00'),
 Timestamp('2020-05-10 00:00:00'),
 Timestamp('2020-05-17 00:00:00'),
 Timestamp('2020-05-24 00:00:00'),
 Timestamp('2020-05-31 00:00:00'),
 Timestamp('2020-06-07 00:00:00'),
 Timestamp('2020-06-14 00:00:00'),
 Timestamp('2020-06-21 00:00:00'),
 Timestamp('2020-06-28 00:00:00'),
 Timestamp('2020-07-05 00:00:00'),
 Timestamp('2020-07-12 00:00:00'),
 Timestamp('2020-07-19 00:00:00'),
 Timestamp('2020-07-26 00:00:00'),
 Timestamp('2020-08-02 00:00:00'),
 Timestamp('2020-08-09 00:00:00'),
 Timestamp('2020-08-16 00:00:00'),
 Timestamp('2020-08-23 00:00:00'),
 Timestamp('2020-08-30 00:00:00'),
 Timestamp('2020-09-06 00:00:00'),
 Timestamp('2020-09-13 00:00:00'),
 Timestamp('2020-09-20 00:00:00'),
 Timestamp('2020-09-27 00:00:00'),
 Timestamp('2020-10-04 00:00:00'),
 Timestamp('2020-10-11 00:00:00'),
 Timestamp('2020-10-18 00:00:00'),
 Timestamp('2020-10-25 00:00:00'),
 Timestamp('2020-11-01 00:00:00'),
 Timestamp('2020-11-08 00:00:00'),
 Timestamp('2020-11-15 00:00:00'),
 Timestamp('2020-11-22 00:00:00'),
 Timestamp('2020-11-29 00:00:00'),
 Timestamp('2020-12-06 00:00:00'),
 Timestamp('2020-12-13 00:00:00'),
 Timestamp('2020-12-20 00:00:00'),
 Timestamp('2020-12-27 00:00:00'),
 Timestamp('2021-01-03 00:00:00'),
 Timestamp('2021-01-10 00:00:00'),
 Timestamp('2021-01-17 00:00:00'),
 Timestamp('2021-01-24 00:00:00'),
 Timestamp('2021-01-31 00:00:00'),
 Timestamp('2021-02-07 00:00:00'),
 Timestamp('2021-02-14 00:00:00'),
 Timestamp('2021-02-21 00:00:00'),
 Timestamp('2021-02-28 00:00:00'),
 Timestamp('2021-03-07 00:00:00'),
 Timestamp('2021-03-14 00:00:00'),
 Timestamp('2021-03-21 00:00:00'),
 Timestamp('2021-03-28 00:00:00'),
 Timestamp('2021-04-04 00:00:00'),
 Timestamp('2021-04-11 00:00:00'),
 Timestamp('2021-04-18 00:00:00'),
 Timestamp('2021-04-25 00:00:00'),
 Timestamp('2021-05-02 00:00:00'),
 Timestamp('2021-05-09 00:00:00'),
 Timestamp('2021-05-16 00:00:00'),
 Timestamp('2021-05-23 00:00:00'),
 Timestamp('2021-05-30 00:00:00'),
 Timestamp('2021-06-06 00:00:00'),
 Timestamp('2021-06-13 00:00:00'),
 Timestamp('2021-06-20 00:00:00'),
 Timestamp('2021-06-27 00:00:00'),
 Timestamp('2021-07-04 00:00:00'),
 Timestamp('2021-07-11 00:00:00'),
 Timestamp('2021-07-18 00:00:00'),
 Timestamp('2021-07-25 00:00:00'),
 Timestamp('2021-08-01 00:00:00'),
 Timestamp('2021-08-08 00:00:00'),
 Timestamp('2021-08-15 00:00:00'),
 Timestamp('2021-08-22 00:00:00'),
 Timestamp('2021-08-29 00:00:00'),
 Timestamp('2021-09-05 00:00:00'),
 Timestamp('2021-09-12 00:00:00'),
 Timestamp('2021-09-19 00:00:00'),
 Timestamp('2021-09-26 00:00:00'),
 Timestamp('2021-10-03 00:00:00'),
 Timestamp('2021-10-10 00:00:00'),
 Timestamp('2021-10-17 00:00:00'),
 Timestamp('2021-10-24 00:00:00'),
 Timestamp('2021-10-31 00:00:00'),
 Timestamp('2021-11-07 00:00:00'),
 Timestamp('2021-11-14 00:00:00'),
 Timestamp('2021-11-21 00:00:00'),
 Timestamp('2021-11-28 00:00:00'),
 Timestamp('2021-12-05 00:00:00'),
 Timestamp('2021-12-12 00:00:00'),
 Timestamp('2021-12-19 00:00:00'),
 Timestamp('2021-12-26 00:00:00'),
 Timestamp('2022-01-02 00:00:00'),
 Timestamp('2022-01-09 00:00:00'),
 Timestamp('2022-01-16 00:00:00'),
 Timestamp('2022-01-23 00:00:00'),
 '_uid_',
 'entidad_cvegeo',
 'municipio_cvegeo',
 'municipio_nombre',
 'pob',
 'geometry']


y = confirmados_por_semana_wide.iloc[:,20:107].to_numpy(copy=True) # sacamos el array de los valores: una fila por observación, las variables en columnas
y = (y / confirmados_por_semana_wide['pob'].to_numpy().reshape(2456,1)) * 1000 # tasa por 1000 habitantes
sm = giddy.markov.Spatial_Markov(y, w, fixed=True, k=5, m=5) # instanciamos el modelo
sm.summary()

--------------------------------------------------------------
                     Spatial Markov Test                      
--------------------------------------------------------------
Number of classes: 5
Number of transitions: 211216
Number of regimes: 5
Regime names: LAG0, LAG1, LAG2, LAG3, LAG4
--------------------------------------------------------------
   Test                   LR                Chi-2
  Stat.            16728.631            20038.130
    DOF                   80                   80
p-value                0.000                0.000
--------------------------------------------------------------
P(H0)           C0         C1         C2         C3         C4
     C0      0.725      0.048      0.112      0.074      0.041
     C1      0.220      0.391      0.327      0.055      0.007
     C2      0.185      0.112      0.461      0.206      0.036
     C3      0.118      0.019      0.202      0.491      0.170
     C4      0.067      0.002      0.035      0.163      0.733
--------------------------------------------------------------
P(LAG0)         C0         C1         C2         C3         C4
     C0      0.819      0.040      0.074      0.041      0.026
     C1      0.321      0.420      0.231      0.025      0.004
     C2      0.378      0.119      0.387      0.103      0.013
     C3      0.412      0.021      0.177      0.314      0.076
     C4      0.460      0.004      0.045      0.112      0.379
--------------------------------------------------------------
P(LAG1)         C0         C1         C2         C3         C4
     C0      0.700      0.067      0.131      0.072      0.031
     C1      0.234      0.427      0.299      0.036      0.004
     C2      0.224      0.144      0.463      0.152      0.018
     C3      0.203      0.029      0.246      0.425      0.098
     C4      0.236      0.004      0.068      0.194      0.497
--------------------------------------------------------------
P(LAG2)         C0         C1         C2         C3         C4
     C0      0.634      0.057      0.159      0.102      0.048
     C1      0.175      0.363      0.383      0.072      0.007
     C2      0.147      0.114      0.494      0.216      0.028
     C3      0.116      0.021      0.233      0.498      0.132
     C4      0.114      0.002      0.064      0.248      0.572
--------------------------------------------------------------
P(LAG3)         C0         C1         C2         C3         C4
     C0      0.617      0.036      0.142      0.129      0.076
     C1      0.137      0.325      0.430      0.097      0.012
     C2      0.115      0.091      0.473      0.271      0.050
     C3      0.064      0.018      0.199      0.535      0.185
     C4      0.053      0.003      0.044      0.216      0.685
--------------------------------------------------------------
P(LAG4)         C0         C1         C2         C3         C4
     C0      0.581      0.020      0.117      0.149      0.133
     C1      0.103      0.308      0.402      0.147      0.039
     C2      0.102      0.068      0.432      0.303      0.095
     C3      0.058      0.011      0.153      0.515      0.263
     C4      0.024      0.001      0.021      0.122      0.831
--------------------------------------------------------------


fig, axes = plt.subplots(2,3,figsize = (15,8))
for i in range(2):
    for j in range(3):
        ax = axes[i,j]
        if i==0 and j==0:
            im = sns.heatmap(sm.p, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
                          square=True, cmap="YlGn",fmt='.3f')
            ax.set_title("Global",fontsize=13) 
        else:

            p_temp = sm.P[i*3+j-1]
            im = sns.heatmap(p_temp, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
                          square=True, cmap="YlGn",fmt='.3f')
            ax.set_title("Retraso espacial %d"%(i*3+j),fontsize=13)


y = confirmados_por_semana_wide.iloc[:,20:107].to_numpy(copy=True) # sacamos el array de los valores: una fila por observación, las variables en columnas
y = (y / confirmados_por_semana_wide['pob'].to_numpy().reshape(2456,1)) * 1000 # tasa por 1000 habitantes
sm_r = giddy.markov.Spatial_Markov(y, w, fixed=False, k=5, m=5) # spatial_markov instance o
sm_r.summary()

--------------------------------------------------------------
                     Spatial Markov Test                      
--------------------------------------------------------------
Number of classes: 5
Number of transitions: 211216
Number of regimes: 5
Regime names: LAG0, LAG1, LAG2, LAG3, LAG4
--------------------------------------------------------------
   Test                   LR                Chi-2
  Stat.            16304.593            19187.827
    DOF                   80                   80
p-value                0.000                0.000
--------------------------------------------------------------
P(H0)           C0         C1         C2         C3         C4
     C0      0.724      0.047      0.112      0.073      0.043
     C1      0.200      0.412      0.319      0.060      0.009
     C2      0.184      0.120      0.449      0.212      0.036
     C3      0.117      0.021      0.208      0.477      0.177
     C4      0.073      0.003      0.037      0.174      0.714
--------------------------------------------------------------
P(LAG0)         C0         C1         C2         C3         C4
     C0      0.817      0.039      0.075      0.042      0.027
     C1      0.304      0.454      0.213      0.026      0.004
     C2      0.372      0.130      0.367      0.113      0.018
     C3      0.408      0.032      0.195      0.280      0.084
     C4      0.447      0.003      0.046      0.124      0.379
--------------------------------------------------------------
P(LAG1)         C0         C1         C2         C3         C4
     C0      0.705      0.061      0.129      0.071      0.035
     C1      0.210      0.447      0.301      0.037      0.006
     C2      0.227      0.153      0.449      0.153      0.019
     C3      0.209      0.030      0.246      0.412      0.103
     C4      0.217      0.009      0.076      0.202      0.497
--------------------------------------------------------------
P(LAG2)         C0         C1         C2         C3         C4
     C0      0.643      0.056      0.157      0.098      0.047
     C1      0.154      0.393      0.369      0.076      0.007
     C2      0.156      0.119      0.482      0.213      0.030
     C3      0.115      0.023      0.237      0.483      0.142
     C4      0.111      0.006      0.061      0.253      0.570
--------------------------------------------------------------
P(LAG3)         C0         C1         C2         C3         C4
     C0      0.607      0.043      0.150      0.126      0.074
     C1      0.138      0.335      0.399      0.109      0.019
     C2      0.115      0.100      0.469      0.273      0.044
     C3      0.069      0.020      0.206      0.519      0.185
     C4      0.055      0.003      0.046      0.215      0.681
--------------------------------------------------------------
P(LAG4)         C0         C1         C2         C3         C4
     C0      0.591      0.026      0.112      0.143      0.127
     C1      0.112      0.329      0.408      0.121      0.030
     C2      0.092      0.082      0.424      0.317      0.085
     C3      0.050      0.013      0.163      0.507      0.267
     C4      0.026      0.001      0.021      0.135      0.817
--------------------------------------------------------------


fig, axes = plt.subplots(2,3,figsize = (15,8)) 

for i in range(2):
    for j in range(3):
        ax = axes[i,j]
        if i==0 and j==0:
            im = sns.heatmap(sm_r.p, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
                          square=True, cmap="YlGn",fmt='.3f')
            ax.set_title("Global",fontsize=13) 
        else:
            p_temp = sm_r.P[i*3+j-1]
            im = sns.heatmap(p_temp, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
                          square=True, cmap="YlGn",fmt='.3f')
            ax.set_title("Retraso espacial %d"%(i*3+j),fontsize=13)


ultima_confirmados = confirmados_por_semana_wide[[pd.to_datetime('2021-01-03'), 'municipio_cvegeo', 'geometry', 'pob']].copy()
ultima_confirmados['tasa'] = (ultima_confirmados[pd.to_datetime('2021-01-03')] / ultima_confirmados['pob'])*1000
local_moran = Moran_Local(ultima_confirmados['tasa'].to_numpy(), w, permutations=1000)
fig, ax = plt.subplots(figsize=(12, 12))
fig, ax = moran_scatterplot(local_moran, p=0.05, ax=ax)
ax.set_xlabel('Tasa de Confirmados')
ax.set_ylabel('Retraso Espacial')
plt.show()


y = confirmados_por_semana_wide.iloc[:,20:107].to_numpy(copy=True) # sacamos el array de los valores: una fila por observación, las variables en columnas
y = (y / confirmados_por_semana_wide['pob'].to_numpy().reshape(2456,1)) * 1000 # tasa por 1000 habitantes
lm = giddy.markov.LISA_Markov(y, w, permutations=1000)
"Chi2: %8.3f, p: %5.2f, dof: %d" % lm.chi_2

/home/plablo/miniconda3/envs/geoinformatica/lib/python3.7/site-packages/giddy/markov.py:1399: RuntimeWarning: invalid value encountered in true_divide
  r = y / ybar
/home/plablo/miniconda3/envs/geoinformatica/lib/python3.7/site-packages/giddy/markov.py:1401: RuntimeWarning: divide by zero encountered in true_divide
  rlag = ylag / ybar
/home/plablo/miniconda3/envs/geoinformatica/lib/python3.7/site-packages/giddy/markov.py:1401: RuntimeWarning: invalid value encountered in true_divide
  rlag = ylag / ybar

'Chi2: 144786.662, p:  0.00, dof: 9'


fig, ax = plt.subplots(figsize = (7,6))
im = sns.heatmap(lm.p, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
                 square=True,  cmap="YlGn",fmt='.3f', xticklabels=['HH', 'LH', 'LL', 'HL'],
                 yticklabels=['HH', 'LH', 'LL', 'HL'])


lisa_confirmados = Moran_Local.by_col(ultima_confirmados.drop(pd.to_datetime('2021-01-03'), axis=1).fillna(0),'tasa',  w, permutations=10000, outvals=['q'])
# lisa_ultima = lisa_ultima.loc[:,['municipio_cvegeo', 'geometry', 'tasa,', 'tasa_p_sim', 'tasa_q']]
lisa_confirmados.rename({'tasa_q': 'cluster'}, inplace=True, axis=1)
lisa_confirmados.loc[lisa_confirmados['tasa_p_sim'] >= 0.5, 'cluster'] = -4
lisa_confirmados = gpd.geodataframe.GeoDataFrame(lisa_confirmados)
lisa_confirmados.crs = 'EPSG:4326'
lisa_confirmados['cluster'] = lisa_confirmados['cluster'].replace({1: 'HH', 2: 'LH', 3:'LL', 4:'HL', -4:'NS'})
lisa_confirmados.head()
color_dict = {'HH':'red', 'LL':'blue', 'HL':'orange', 'LH':'cornflowerblue', 'NS':'lightgray'}
fig, ax = plt.subplots(figsize=(14, 16))
for ctype, data in lisa_confirmados.groupby('cluster'):
    color = color_dict[ctype]
    # Plot each group using the color defined above
    data.to_crs(epsg=3857).plot(color=color,
              ax=ax,
              label=ctype,
              alpha=0.8 )

red_patch = mpatches.Patch(color='red', label='HH')
blue_patch = mpatches.Patch(color='blue', label='LL')
orange_patch = mpatches.Patch(color='orange', label='HL')
lblue_patch = mpatches.Patch(color='cornflowerblue', label='LH')
gray_patch = mpatches.Patch(color='lightgray', label='NS')
plt.legend(handles=[red_patch, blue_patch, orange_patch, lblue_patch, gray_patch], loc=(0.75,0.5))
ax.set(title='Conglomerados espaciales para la tasa de confirmados')
ax.set_axis_off()
ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik)
plt.show()

Análisis espacio-temporal¶

Markov espacial¶

LISA Markov¶

	2020-01-05 00:00:00	2020-01-12 00:00:00	2020-01-19 00:00:00	2020-01-26 00:00:00	2020-02-02 00:00:00	2020-02-09 00:00:00	2020-02-16 00:00:00	2020-02-23 00:00:00	2020-03-01 00:00:00	2020-03-08 00:00:00	...	2022-01-02 00:00:00	2022-01-09 00:00:00	2022-01-16 00:00:00	2022-01-23 00:00:00	_uid_	entidad_cvegeo	municipio_cvegeo	municipio_nombre	pob	geometry
0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	9.0	2.0	2.0	0.0	1	16	16046	Juárez	15290.0	MULTIPOLYGON (((-100.45897 19.32952, -100.4588...
1	0.0	1.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	1.0	...	3.0	1.0	3.0	0.0	2	16	16047	Jungapeo	22358.0	MULTIPOLYGON (((-100.55704 19.51833, -100.5561...
2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	...	0.0	1.0	2.0	0.0	3	16	16048	Lagunillas	5862.0	MULTIPOLYGON (((-101.38110 19.60250, -101.3811...
3	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	1.0	0.0	4	16	16049	Madero	18769.0	MULTIPOLYGON (((-101.11654 19.53720, -101.1168...
4	3.0	17.0	34.0	59.0	52.0	68.0	62.0	40.0	42.0	28.0	...	1431.0	2768.0	1759.0	13.0	5	05	05035	Torreón	744247.0	MULTIPOLYGON (((-103.42797 25.30109, -103.3861...