1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/ameriflux/05.read_ascii.ncl Mon Jan 26 22:08:20 2009 -0500
1.3 @@ -0,0 +1,413 @@
1.4 +;----------------------------------------------------------------------
1.5 +; set _FillValue
1.6 +; change Month to yyyymm
1.7 +; add Month to output
1.8 +; add lat, lon to output
1.9 +; add year to output
1.10 +;
1.11 +; This example reads an ASCII file that is formatted a specific way, and
1.12 +; writes out the results to a netCDF file.
1.13 +;
1.14 +; The first line in the ASCII file must be a header, with each field
1.15 +; separated by a single character delimiter (like a ","). The rest of
1.16 +; the file must be such that each row contains all fields, each
1.17 +; separated by the designated delimiter.
1.18 +;
1.19 +; The fields can be integer, float, double, character, or string.
1.20 +; String fields cannot be written to a netCDF file. They have to
1.21 +; be read in as character arrays and written out that way.
1.22 +;----------------------------------------------------------------------
1.23 +
1.24 +;----------------------------------------------------------------------
1.25 +; This function returns the index locations of the given delimiter
1.26 +; in a row or several rows of strings.
1.27 +;----------------------------------------------------------------------
1.28 +function delim_indices(strings,nfields,delimiter)
1.29 +local cstrings, cdelim
1.30 +begin
1.31 + nrows = dimsizes(strings)
1.32 +;
1.33 +; Handle special case if we only have one string. Make sure it
1.34 +; is put into a 2D array.
1.35 +;
1.36 + if(nrows.eq.1) then
1.37 + cstrings = new((/1,strlen(strings)+1/),character)
1.38 + end if
1.39 +
1.40 + cstrings = stringtochar(strings) ; Convert to characters.
1.41 + cdelim = stringtochar(delimiter) ; Convert delimiter to character.
1.42 +;
1.43 +; Som error checking here. Make sure delimiter is one character.
1.44 +;
1.45 + nc = dimsizes(cdelim)
1.46 + rank = dimsizes(nc)
1.47 + if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
1.48 + print("delim_indices: fatal: the delimiter you've selected")
1.49 + print("must be a single character. Can't continue.")
1.50 + exit
1.51 + end if
1.52 +
1.53 +;
1.54 +; Create array to hold indices of delimiter locations, and then loop
1.55 +; through each row and find all the delimiters. Make sure each row has
1.56 +; the correct number of delimiters.
1.57 +;
1.58 + ndelims = nfields-1
1.59 + cindices = new((/nrows,ndelims/),integer)
1.60 + do i = 0, nrows-1
1.61 + ii = ind(cstrings(i,:).eq.cdelim(0))
1.62 +;
1.63 +; Make sure there were delimiters on this row. If not, we just quit.
1.64 +; This could probably be modified to do this more gracefully.
1.65 +;
1.66 + if(any(ismissing(ii))) then
1.67 + print("delim_indices: fatal: I didn't find any delimiters")
1.68 + print("('" + delimiter + "') on row " + i + ". Can't continue.")
1.69 + exit
1.70 + end if
1.71 + if(dimsizes(ii).ne.ndelims) then
1.72 + print("delim_indices: fatal: I expected to find " + ndelims)
1.73 + print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
1.74 + print("Can't continue.")
1.75 + exit
1.76 + end if
1.77 +
1.78 + cindices(i,:) = ii
1.79 +
1.80 + delete(ii) ; For next time through loop
1.81 + end do
1.82 +
1.83 + return(cindices)
1.84 +end
1.85 +
1.86 +;----------------------------------------------------------------------
1.87 +; This function reads in a particular field from a string array,
1.88 +; given the field number to read (fields start at #1 and go to #nfield),
1.89 +; and the indices of the delimiters.
1.90 +;
1.91 +; It returns either an integer, float, double, character, or a string,
1.92 +; depending on the input flag "return_type".
1.93 +;----------------------------------------------------------------------
1.94 +function read_field(strings,ifield,indices,return_type)
1.95 +local nstring, cstrings, nf, tmp_str
1.96 +begin
1.97 + nrows = dimsizes(strings)
1.98 +;
1.99 +; Handle special case if we only have one string. Make sure it
1.100 +; is put into a 2D array.
1.101 +;
1.102 + if(nrows.eq.1) then
1.103 + cstrings = new((/1,strlen(strings)+1/),character)
1.104 + end if
1.105 +
1.106 + cstrings = stringtochar(strings)
1.107 + nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
1.108 +
1.109 +;
1.110 +; Error checking. Make sure user has entered a valid field.
1.111 +;
1.112 + if(ifield.le.0.or.ifield.gt.nf) then
1.113 + print("read_field: fatal: you've selected a field that is")
1.114 + print("out-of-range of the number of fields that you have (" + nf + ").")
1.115 + exit
1.116 + end if
1.117 +
1.118 +;
1.119 +; Set up array to return. For string, int, float, or double arrays,
1.120 +; we don't have to do anything special. For character arrays,
1.121 +; however, we do.
1.122 +;
1.123 + if(return_type.ne."character") then
1.124 + return_array = new(nrows,return_type)
1.125 + else
1.126 +;
1.127 +; We don't know what the biggest character array is at this point, so
1.128 +; make it bigger than necessary, and then resize later as necessary.
1.129 +;
1.130 + tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
1.131 +
1.132 + max_len = 0 ; Use to keep track of max lengths of strings.
1.133 + end if
1.134 +
1.135 + do i = 0,nrows-1
1.136 +;
1.137 +; Special case of first field in row.
1.138 +;
1.139 + if(ifield.eq.1) then
1.140 + ibeg = 0
1.141 + iend = indices(i,ifield-1)-1
1.142 + else
1.143 +;
1.144 +; Special case of first field in row.
1.145 +;
1.146 + if(ifield.eq.nf) then
1.147 + ibeg = indices(i,ifield-2)+1
1.148 + iend = dimsizes(cstrings(i,:))-1
1.149 +;
1.150 +; Any field between first and last field.
1.151 +;
1.152 + else
1.153 + ibeg = indices(i,ifield-2)+1
1.154 + iend = indices(i,ifield-1)-1
1.155 + end if
1.156 + end if
1.157 +;
1.158 +; Here's the code that pulls off the correct string, and converts it
1.159 +; to float if desired.
1.160 +;
1.161 + if(return_type.eq."integer") then
1.162 + return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
1.163 + end if
1.164 + if(return_type.eq."float") then
1.165 + return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
1.166 + end if
1.167 + if(return_type.eq."double") then
1.168 + return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
1.169 + end if
1.170 + if(return_type.eq."string") then
1.171 + return_array(i) = chartostring(cstrings(i,ibeg:iend))
1.172 + end if
1.173 + if(return_type.eq."character") then
1.174 + if( (iend-ibeg+1) .gt. max_len) then
1.175 + max_len = iend-ibeg+1
1.176 + end if
1.177 + tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
1.178 + end if
1.179 + end do
1.180 +
1.181 + if(return_type.eq."character") then
1.182 + return_array = new((/nrows,max_len/),"character")
1.183 + return_array = tmp_return_array(:,0:max_len-1)
1.184 + end if
1.185 +
1.186 + return(return_array)
1.187 +end
1.188 +
1.189 +
1.190 +;----------------------------------------------------------------------
1.191 +; This function reads in string fields only to get the maximum string
1.192 +; length.
1.193 +;----------------------------------------------------------------------
1.194 +function get_maxlen(strings,ifield,indices)
1.195 +local nstring, cstrings, nf, tmp_str
1.196 +begin
1.197 + nrows = dimsizes(strings)
1.198 +;
1.199 +; Handle special case if we only have one string. Make sure it
1.200 +; is put into a 2D array.
1.201 +;
1.202 + if(nrows.eq.1) then
1.203 + cstrings = new((/1,strlen(strings)+1/),character)
1.204 + end if
1.205 +
1.206 + cstrings = stringtochar(strings)
1.207 + nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
1.208 +
1.209 +;
1.210 +; Error checking. Make sure user has entered a valid field.
1.211 +;
1.212 + if(ifield.le.0.or.ifield.gt.nf) then
1.213 + print("read_field: fatal: you've selected a field that is")
1.214 + print("out-of-range of the number of fields that you have (" + nf + ").")
1.215 + exit
1.216 + end if
1.217 +;
1.218 +; We don't know what the biggest character array is at this point, so
1.219 +; make it bigger than necessary, and then resize later as necessary.
1.220 +;
1.221 + tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
1.222 +
1.223 + max_len = 0 ; Use to keep track of max lengths of strings.
1.224 +
1.225 + do i = 0,nrows-1
1.226 +;
1.227 +; Special case of first field in row.
1.228 +;
1.229 + if(ifield.eq.1) then
1.230 + ibeg = 0
1.231 + iend = indices(i,ifield-1)-1
1.232 + else
1.233 +;
1.234 +; Special case of first field in row.
1.235 +;
1.236 + if(ifield.eq.nf) then
1.237 + ibeg = indices(i,ifield-2)+1
1.238 + iend = dimsizes(cstrings(i,:))-1
1.239 +;
1.240 +; Any field between first and last field.
1.241 +;
1.242 + else
1.243 + ibeg = indices(i,ifield-2)+1
1.244 + iend = indices(i,ifield-1)-1
1.245 + end if
1.246 + end if
1.247 + if( (iend-ibeg+1) .gt. max_len) then
1.248 + max_len = iend-ibeg+1
1.249 + end if
1.250 + end do
1.251 +
1.252 + return(max_len)
1.253 +end
1.254 +
1.255 +;----------------------------------------------------------------------
1.256 +; Main code.
1.257 +;----------------------------------------------------------------------
1.258 +begin
1.259 +
1.260 +;###############################################################
1.261 +; Set up defaults. We are hard-coding here..
1.262 +
1.263 + year = 1991
1.264 + station = "USHa1"
1.265 + lat = 42.5378
1.266 + lon = -72.1715 + 360.
1.267 +
1.268 + nfields = 30 ; # of fields
1.269 + delimiter = "," ; field delimiter
1.270 +
1.271 + filename = station+year+"_L4_m.txt" ; ASCII" file to read.
1.272 + cdf_file = station+year+"_L4_m.nc" ; netCDF file to write.
1.273 +
1.274 + if(isfilepresent(cdf_file))
1.275 + print("Warning: '" + cdf_file + "' exists. Will remove it.")
1.276 + system("/bin/rm " + cdf_file)
1.277 + end if
1.278 +
1.279 +; In this case, fields #1-#2 are integers,
1.280 +; and the rest of the fields are floats.
1.281 +
1.282 + var_types = new(nfields,string)
1.283 + var_types = "float" ; Most are floats.
1.284 + var_types(0:1) = "integer"
1.285 +
1.286 +;#####################################################################
1.287 +
1.288 +; Read in data as strings. This will create a string array that has the
1.289 +; same number of strings as there are rows in the file. We will then need
1.290 +; to parse each string later.
1.291 +
1.292 + read_data = asciiread(filename,-1,"string")
1.293 +
1.294 + header = read_data(0) ; Header. Use for variable names.
1.295 + data = read_data(1:) ; Get rid of first line which is a header.
1.296 + nmonth = dimsizes(data) ; Number of rows == number of month.
1.297 +
1.298 +; Read in locations of delimiters in each string row.
1.299 +
1.300 + hindices = delim_indices(header,nfields,delimiter) ; header row
1.301 + dindices = delim_indices(data,nfields,delimiter) ; rest of file
1.302 +
1.303 +; Read in the field names which will become variable names on
1.304 +; the netCDF file.
1.305 +
1.306 + var_names = new(nfields,string)
1.307 +
1.308 + do i=0,nfields-1
1.309 + var_names(i) = read_field(header,i+1,hindices,"string")
1.310 + end do
1.311 +
1.312 +;-------------------------------------------------------------------
1.313 +; Write out this netCDF file efficiently so it will be faster.
1.314 +; Try to predefine everything before you write to it.
1.315 +
1.316 + f = addfile(cdf_file,"c")
1.317 + setfileoption(f,"DefineMode",True) ; Enter predefine phase.
1.318 +
1.319 +; Write global attributes to file. It's okay to do this before
1.320 +; predefining the file's variables. We are still in "define" mode.
1.321 +
1.322 + fAtt = True
1.323 + fAtt@description = "Data read in from " + filename + " ASCII file."
1.324 + fAtt@creation_date = systemfunc ("date")
1.325 + fileattdef( f, fAtt )
1.326 +
1.327 +; Write dimension names to file.
1.328 +
1.329 + dim_names = (/ "year", "month" /)
1.330 + dim_sizes = (/ -1 , nmonth /)
1.331 + dimUnlim = (/ True , False /)
1.332 + filedimdef( f, dim_names, dim_sizes, dimUnlim )
1.333 +
1.334 + filedimdef( f, "lat", 1, False )
1.335 + filedimdef( f, "lon", 1, False )
1.336 +
1.337 +; Define each variable on the file.
1.338 +
1.339 + filevardef( f, "year", "integer", "year" )
1.340 + filevardef( f, "lat" , "float" , "lat" )
1.341 + filevardef( f, "lon" , "float" , "lon" )
1.342 +
1.343 + do i=0,nfields-1
1.344 +
1.345 +; define variable
1.346 +
1.347 + filevardef(f, var_names(i), var_types(i), dim_names)
1.348 +
1.349 +; define variable attributes
1.350 +
1.351 + if (i.le.1) then
1.352 + varAtt = 0
1.353 +; varAtt@_FillValue = -999
1.354 + else
1.355 + varAtt = 0.
1.356 +; varAtt@_FillValue = 1.e36
1.357 + end if
1.358 +
1.359 + varAtt@long_name = var_names(i)
1.360 +
1.361 + filevarattdef( f, var_names(i) , varAtt )
1.362 +
1.363 + delete (varAtt)
1.364 + end do
1.365 +;-----------------------------------------------------------------
1.366 +
1.367 +; Loop through each field, read the values for that field, print
1.368 +; information about the variable, and then write it to the netCDF file.
1.369 +
1.370 + do i=0,nfields-1
1.371 + ifield = i+1 ; Fields start at #1, not #0.
1.372 +
1.373 + tmp_data = new((/1,nmonth/),var_types(i))
1.374 +
1.375 + if (i.le.1) then
1.376 + tmp_data@_FillValue = -999
1.377 + else
1.378 + tmp_data@_FillValue = 1.e36
1.379 + end if
1.380 +
1.381 + tmp_data(0,:) = read_field(data,ifield,dindices,var_types(i))
1.382 +
1.383 + tmp_data = where(tmp_data .le. -9000.,tmp_data@_FillValue,tmp_data)
1.384 +
1.385 +; change Month to yyyymm
1.386 +
1.387 + if (i.eq.0) then
1.388 + tmp_data(0,:) = tmp_data(0,:) + year*100
1.389 + end if
1.390 +
1.391 +; Print some info about the variable.
1.392 +
1.393 +; print("")
1.394 +; print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
1.395 +; print("Type is " + var_types(i) + ".")
1.396 +; print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
1.397 +
1.398 +; if(any(ismissing(tmp_data))) then
1.399 +; print("This variable does contain missing values.")
1.400 +; else
1.401 +; print("This variable doesn't contain missing values.")
1.402 +; end if
1.403 +
1.404 +; write variable to file
1.405 +
1.406 + f->$var_names(i)$ = tmp_data ; Write to netCDF file.
1.407 +
1.408 + delete(tmp_data) ; Delete for next round.
1.409 + end do
1.410 +
1.411 +; write variable to file
1.412 +
1.413 + f->year = year
1.414 + f->lat = lat
1.415 + f->lon = lon
1.416 +end