Feb-02-2025, 09:05 PM
# Import necessary libraries import numpy as np import pandas as pd import geopandas as gpd from libpysal.weights import Queen, higher_order, KNN, lag_spatial from shapely.geometry import Polygon import statsmodels.formula.api as smf from scipy import stats from scipy.stats import chi2 from esda.moran import Moran from shapely.geometry import Point from pysal.lib import weights from pysal.explore import esda import matplotlib.pyplot as plt import matplotlib.patches as mpatches import mplcursors import matplotlib.colors as mcolors # Define the file path and sheet name file_path = "C:\\Users\\daves\\OneDrive\\Pessoal\\Acadêmico\\Mestrado\\Dissertação - Execução\\Análises\\Regressão\\Base de dados - Regressão.xlsx" sheet_name = "Base de dados" # Read the specific sheet and convert to DataFrame df = pd.read_excel(file_path, sheet_name=sheet_name) # Convert the 'Codigo' column to string and rename to 'CD_MUN' df['Codigo'] = df['Codigo'].astype(str) df.rename(columns={'Codigo': 'CD_MUN'}, inplace=True) ### Load shapefile shapefile_path = r"C:\Users\daves\OneDrive\Pessoal\Acadêmico\Mestrado\Dissertação - Execução\Análises\MT_Municipios_2022\MT_Municipios_2022.shp" municipalities = gpd.read_file(shapefile_path) # Merge the data df2 = municipalities.merge(df, on='CD_MUN', how='inner') df2 # Replace zero values with a small positive value (1e-10) columns_to_replace = ['PIA', 'Total Tillage', 'Annual Investment ', 'Production', 'Total Pastages', 'Capital Stock', 'TFP'] for column in columns_to_replace: df2[column] = df2[column].replace(0, 1e-10) # Calculate the logarithms of the variables and add to the DataFrame df2['log_Production'] = np.log(df2['Production']) df2['log_Total_Tillage'] = np.log(df2['Total Tillage']) df2['log_PIA'] = np.log(df2['PIA']) df2['log_Annual_Investment'] = np.log(df2['Annual Investment ']) df2['log_Total_Pastages'] = np.log(df2['Total Pastages']) df2['log_Capital_Stock'] = np.log(df2['Capital Stock']) df2['log_TFP'] = np.log(df2['TFP']) df2 # Exclude rows where the 'Ano' column is different from 2006 df2 = df2[df2['Ano'] == 2006] df2 # Create the spatial weights matrix # Ensure df2 is a GeoDataFrame with a 'geometry' column if not isinstance(df2, gpd.GeoDataFrame): df2 = gpd.GeoDataFrame(df2, geometry='geometry') # First-order contiguity matrix (W) W = Queen.from_dataframe(df2, use_index=False) # Second-order contiguity matrix (W2) W2 = higher_order(W, k=2) # k-nearest neighbors matrix (M) k = 5 # Number of nearest neighbors (adjust as needed) M = KNN.from_dataframe(df2, k=k) # Function to calculate and display the results of Getis-Ord G def calculate_getis_ord(variable, matrix, matrix_name): # Calculate Getis-Ord G g = esda.G(variable, matrix) # Display the result print(f"\nGetis-Ord G test of Spatial Autocorrelation for {matrix_name}") print("--------------------------------------------") print(f"Getis-Ord G: {g.G}") print(f"Expected G: {g.EG}") print(f"Variance: {g.VG}") print(f"P-value: {g.p_norm}") print("--------------------------------------------") # Apply the function for each spatial weights matrix calculate_getis_ord(df2['log_Total_Tillage'], W, 'W') calculate_getis_ord(df2['log_Total_Tillage'], W2, 'W2') calculate_getis_ord(df2['log_Total_Tillage'], M, 'M') # Function to plot the results of Local Getis-Ord with custom colors def plot_local_getis_ord_clusters(df, g, title, alpha=0.05): """ Function to plot the results of Local Getis-Ord with custom colors. """ # Add the results of Local Getis-Ord to the GeoDataFrame df['Local Getis-Ord G'] = g.Zs df['p-value'] = g.p_sim # Define custom colors for the clusters def get_color(row): if row['p-value'] > alpha: # No statistical significance return 'white' elif row['Local Getis-Ord G'] < 0: # Cold Spot return '#00008B' # Dark blue else: # Hot Spot return '#FF0000' # Red df['color'] = df.apply(get_color, axis=1) # Plot the map fig, ax = plt.subplots(1, 1, figsize=(10, 10)) # Plot the Getis-Ord G values df.plot(column='color', ax=ax, legend=False, edgecolor='black', linewidth=0.5) # Add title and legend plt.title(title) # Add custom legend legend_labels = { 'Hot Spots': '#FF0000', 'No Correlation': 'white', 'Cold Spots': '#00008B' } patches = [mpatches.Patch(color=color, label=label) for label, color in legend_labels.items()] plt.legend(handles=patches, title='Spatial Autocorrelation', loc='center left', bbox_to_anchor=(1.05, 0.5)) # Legend on the right side plt.axis('off') # Remove axis values plt.tight_layout() # Adjust layout to avoid cuts plt.show() # Calculate Local Getis-Ord for each spatial weights matrix gW = esda.G_Local(df2['log_Total_Tillage'], W) gW2 = esda.G_Local(df2['log_Total_Tillage'], W2) gM = esda.G_Local(df2['log_Total_Tillage'], M) # Plot the results for each spatial weights matrix plot_local_getis_ord_clusters(df2, gW, 'Local Getis-Ord Results for W matrix') plot_local_getis_ord_clusters(df2, gW2, 'Local Getis-Ord Results for W2 matrix') plot_local_getis_ord_clusters(df2, gM, 'Local Getis-Ord Results for M matrix')I need help with the output of my code. The plots seems to have a light blue layer beneath the actual data that has to be ploted. Could you help me to see what is wrong with the code?
Attached Files