Hi all,
I am kinda new to Python, and would really appreciate some help with the following code. The purpose is to take individual variable data from the respective column from each of (258) files and convert into a NetCDF4 file. Part of the output file is as follows:
I get the following error:
I am kinda new to Python, and would really appreciate some help with the following code. The purpose is to take individual variable data from the respective column from each of (258) files and convert into a NetCDF4 file. Part of the output file is as follows:
Output:# NRECS: 1096
# DT: 24
# STARTDATE: 1999-01-01 00:00:00
# ALMA_OUTPUT: 0
# NVARS: 10
# YEAR MONTH DAY OUT_SNOW_COVER OUT_SURF_TEMP OUT_RUNOFF OUT_BASEFLOW OUT_SWE OUT_EVAP OUT_PREC
1999 01 01 0.0000 -0.6910 0.0000 1.7175 0.0000 1.2187 1.2250
1999 01 02 0.0000 -8.1983 0.0000 1.7042 0.0000 0.0132 0.0000
1999 01 03 0.0000 -13.7701 0.0000 1.6907 0.0000 0.0076 0.0000
1999 01 04 1.0000 -11.0906 0.0000 1.6772 6.1095 0.4404 7.4750
1999 01 05 1.0000 -7.4365 0.0000 1.6637 9.7234 0.6585 4.3000
1999 01 06 1.0000 -6.4047 0.0000 1.6501 12.1842 0.5672 3.0000
1999 01 07 1.0000 -9.1578 0.0000 1.6364 12.0282 0.5211 0.0000
The code is as follows:1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
# import dependencies from __future__ import print_function import sys import os, string # handle dates... import datetime as dt # NetCDF and Numeric from netCDF4 import * from numpy import * def flux2nc(influxes,outpath,var = None ,start_year = None ,end_year = None ): # building file list and sorted lat lon list dirin = os.path.dirname(influxes) try : file_list = os.listdir(dirin) except OSError: raise OSError( 'Input flux directory not valid, please fix path' ) lat_t = [] lon_t = [] lat = [] lon = [] try : for f in file_list: lat_t.append( float ( str .split(f, "_" )[ 1 ])) lon_t.append( float ( str .split(f, "_" )[ 2 ])) except ValueError: raise ValueError( 'Input path contains files that are not flux files' ) for i in lat_t: if i not in lat: lat.append(i) for i in lon_t: if i not in lon: lon.append(i) # putting in order. Lat should be from top to botom # lon from left to right lon.sort() lat.sort() lat.reverse() del (lat_t) del (lon_t) # if variable is not set, get it from user if var = = None : #determining the parameter to use print ( "Choose output parameter" ) print ( "1 - SNOW_COVER" ) print ( "2 - SURFACE_TEMPERATURE" ) print ( "3 - Runoff" ) print ( "4 - Base flow" ) print ( "5 - Snow Water Equivalent" ) print ( "6 - EVAPORATION" ) print ( "7 - PRECIPITATION" ) varini = input ( 'Choose output (1 a 7)>' ) #getting the collumn right if int (varini) < 7 : var = varini + 2 elif varini = = 6 : #more than one soil layer... camada = input ( 'which soil layer?>' ) var = varini + 2 + camada #set name of out_file. Named after parameter choice if var = = 3 : var_txt = "ppt" var_name = "Precipitation" elif var = = 4 : var_txt = "evap" var_name = "Evapotranspiration" elif var = = 5 : var_txt = "runoff" var_name = "Runoff" elif var = = 6 : var_txt = "base" var_name = "Baseflow" elif var = = 7 : var_txt = "swe" var_name = "Snow Water Equivalent" else : var_txt = "soilLyr" + str (camada) var_name = "Soil moisture, layer {0}" . format (camada) # if the date information is not set get it from user if start_year = = None : # for what date? start_year = input ( "Enter start year:" ) if end_year = = None : end_year = input ( "End year:" ) # set date information in datetime object inidate = dt.date(start_year, 1 , 1 ) enddate = dt.date(end_year, 12 , 31 ) # calculate number of days in time series days = enddate.toordinal() - inidate.toordinal() + 1 #print "Gridding {0} data...".format(var_name) # # create array containig all data # This is going to be huge. Create an array with -9999 (NoData) # Then populate the array by reading each flux file # all_data = zeros([days, len (lat), len (lon)], dtype = float32) all_data[:,:,:] = - 9999 c = len (file_list) # for each file in list for f in file_list: # get lat & lon and it's index latitude = float ( str .split(f, sep = "_" )[ 1 ]) longitude = float ( str .split(f, sep = "_" )[ 2 ]) lat_id = lat.index(latitude) lon_id = lon.index(longitude) c = c - 1 infile = open (dirin + '/' + f, "r" ) lixo = infile.readlines() infile.close() dado = [] for l in lixo: if int (string.split(l, sep = "\t" )[ 0 ]) in range (inidate.year, enddate.year + 1 ): dado.append( float ( str .split(l, sep = "\t" )[var])) # putting data inside array. # Since data has lat & lon fixed uses dimension [:,lat_index,lon_index] all_data[:,lat_id,lon_id] = dado del dado # del data to free memory for large datasets try : # open netCDF file for writing ncfile = Dataset(outpath + str (var_txt) + '_' + str (start_year) + ".nc" , "w" ) # set netCDF metadata information ncfile.Conventions = "CF-1.6" ncfile.title = "VIC hydrologic flux outputs" ncfile.source = 'VIC hydrologic model 4.2.d' ncfile.history = "Created using the script created by NASA SERVIR. " + dt.date.today().isoformat() ncfile.date_created = str (dt.datetime.now()) ncfile.references = "N/A" ncfile.comment = "N/A" ncfile.start_date = inidate.isoformat() ncfile.end_date = enddate.isoformat() #create dimensions ncfile.createDimension( "longitude" , len (lon)) ncfile.createDimension( "latitude" , len (lat)) ncfile.createDimension( "time" , days) #create variables latvar = ncfile.createVariable( "latitude" , float , ( "latitude" ,)) latvar.long_name = "Latitude" latvar.units = "degrees_north" latvar[:] = lat lonvar = ncfile.createVariable( "longitude" , float , ( "longitude" ,)) lonvar.long_name = "Longitude" lonvar.units = "degrees_east" lonvar[:] = lon timevar = ncfile.createVariable( "time" , int , ( "time" ,)) timevar.long_name = "Time" timevar.units = "days since " + inidate.isoformat() timevar.calendar = 'gregorian' timevar[:] = range ( 0 , days) # save gridded flux data to file data_var = ncfile.createVariable(var_txt, float , ( "time" , "latitude" , "longitude" )) data_var.long_name = var_name data_var.missing_value = - 9999.0 data_var.units = "mm" data_var[:] = all_data[:,:,:] # close the file ncfile.close() except IOError: raise IOError( 'Output path is not valid, please fix the path string' ) return def main(): # checking user input if len (sys.argv) ! = 3 : print ( "Wrong user input" ) print ( "Convert VIC fluxes files to NetCDF" ) print ( "usage flux2cdf.py <vic flux dir> <out netcdf dir>" ) print ( "DIR INPUTS SHOULD CONTAIN TRAILING /" ) sys.exit() if sys.argv[ 1 ][ - 1 ] ! = "/" : print ( "VIC FLUX DIR SHOULD CONTAIN TRAILING /" ) print ( "fixing it for you..." ) sys.argv[ 1 ] = sys.argv[ 1 ] + "/" print ( "IMPORTANT: " + sys.argv[ 1 ] + " SHOULD CONTAIN ONLY FLUXES FILES!!!" ) flux2nc(sys.argv[ 1 ],sys.argv[ 2 ]) return # Execute the main level program if run as standalone if __name__ = = "__main__" : main() |
Error:python3 flux2nc_a.py /mnt/d/Spring_2020/VIC/VIC_Output/ /mnt/d/Spring_2020/VIC/VIC_Output/NetCDF
IMPORTANT: /mnt/d/Spring_2020/VIC/VIC_Output/ SHOULD CONTAIN ONLY FLUXES FILES!!!
Choose output parameter
1 - SNOW_COVER
2 - SURFACE_TEMPERATURE
3 - Runoff
4 - Base flow
5 - Snow Water Equivalent
6 - EVAPORATION
7 - PRECIPITATION
Choose output (1 a 7)>1
Enter start year:1999
End year:2000
Traceback (most recent call last):
File "flux2nc_a.py", line 241, in <module>
main()
File "flux2nc_a.py", line 235, in main
flux2nc(sys.argv[1],sys.argv[2])
File "flux2nc_a.py", line 156, in flux2nc
if int(str.split(l, sep="\t")[0]) in range(inidate.year, enddate.year+1):
ValueError: invalid literal for int() with base 10: '# NRECS: 1096\n'