1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/npp/01.read_81.ncl.y Mon Jan 26 22:08:20 2009 -0500
1.3 @@ -0,0 +1,406 @@
1.4 +;----------------------------------------------------------------------
1.5 +; This example reads an ASCII file that is formatted a specific way, and
1.6 +; writes out the results to a netCDF file.
1.7 +;
1.8 +; The first line in the ASCII file must be a header, with each field
1.9 +; separated by a single character delimiter (like a ","). The rest of
1.10 +; the file must be such that each row contains all fields, each
1.11 +; separated by the designated delimiter.
1.12 +;
1.13 +; The fields can be integer, float, double, character, or string.
1.14 +; String fields cannot be written to a netCDF file. They have to
1.15 +; be read in as character arrays and written out that way.
1.16 +;----------------------------------------------------------------------
1.17 +
1.18 +;----------------------------------------------------------------------
1.19 +; This function returns the index locations of the given delimiter
1.20 +; in a row or several rows of strings.
1.21 +;----------------------------------------------------------------------
1.22 +function delim_indices(strings,nfields,delimiter)
1.23 +local cstrings, cdelim
1.24 +begin
1.25 + nrows = dimsizes(strings)
1.26 +;
1.27 +; Handle special case if we only have one string. Make sure it
1.28 +; is put into a 2D array.
1.29 +;
1.30 + if(nrows.eq.1) then
1.31 + cstrings = new((/1,strlen(strings)+1/),character)
1.32 + end if
1.33 +
1.34 + cstrings = stringtochar(strings) ; Convert to characters.
1.35 + cdelim = stringtochar(delimiter) ; Convert delimiter to character.
1.36 +;
1.37 +; Som error checking here. Make sure delimiter is one character.
1.38 +;
1.39 + nc = dimsizes(cdelim)
1.40 + rank = dimsizes(nc)
1.41 + if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
1.42 + print("delim_indices: fatal: the delimiter you've selected")
1.43 + print("must be a single character. Can't continue.")
1.44 + exit
1.45 + end if
1.46 +
1.47 +;
1.48 +; Create array to hold indices of delimiter locations, and then loop
1.49 +; through each row and find all the delimiters. Make sure each row has
1.50 +; the correct number of delimiters.
1.51 +;
1.52 + ndelims = nfields-1
1.53 + cindices = new((/nrows,ndelims/),integer)
1.54 + do i = 0, nrows-1
1.55 + ii = ind(cstrings(i,:).eq.cdelim(0))
1.56 +;
1.57 +; Make sure there were delimiters on this row. If not, we just quit.
1.58 +; This could probably be modified to do this more gracefully.
1.59 +;
1.60 + if(any(ismissing(ii))) then
1.61 + print("delim_indices: fatal: I didn't find any delimiters")
1.62 + print("('" + delimiter + "') on row " + i + ". Can't continue.")
1.63 + exit
1.64 + end if
1.65 + if(dimsizes(ii).ne.ndelims) then
1.66 + print("delim_indices: fatal: I expected to find " + ndelims)
1.67 + print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
1.68 + print("Can't continue.")
1.69 + exit
1.70 + end if
1.71 +
1.72 + cindices(i,:) = ii
1.73 +
1.74 + delete(ii) ; For next time through loop
1.75 + end do
1.76 +
1.77 + return(cindices)
1.78 +end
1.79 +
1.80 +;----------------------------------------------------------------------
1.81 +; This function reads in a particular field from a string array,
1.82 +; given the field number to read (fields start at #1 and go to #nfield),
1.83 +; and the indices of the delimiters.
1.84 +;
1.85 +; It returns either an integer, float, double, character, or a string,
1.86 +; depending on the input flag "return_type".
1.87 +;----------------------------------------------------------------------
1.88 +function read_field(strings,ifield,indices,return_type)
1.89 +local nstring, cstrings, nf, tmp_str
1.90 +begin
1.91 + nrows = dimsizes(strings)
1.92 +;
1.93 +; Handle special case if we only have one string. Make sure it
1.94 +; is put into a 2D array.
1.95 +;
1.96 + if(nrows.eq.1) then
1.97 + cstrings = new((/1,strlen(strings)+1/),character)
1.98 + end if
1.99 +
1.100 + cstrings = stringtochar(strings)
1.101 + nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
1.102 +
1.103 +;
1.104 +; Error checking. Make sure user has entered a valid field.
1.105 +;
1.106 + if(ifield.le.0.or.ifield.gt.nf) then
1.107 + print("read_field: fatal: you've selected a field that is")
1.108 + print("out-of-range of the number of fields that you have (" + nf + ").")
1.109 + exit
1.110 + end if
1.111 +
1.112 +;
1.113 +; Set up array to return. For string, int, float, or double arrays,
1.114 +; we don't have to do anything special. For character arrays,
1.115 +; however, we do.
1.116 +;
1.117 + if(return_type.ne."character") then
1.118 + return_array = new(nrows,return_type)
1.119 + else
1.120 +;
1.121 +; We don't know what the biggest character array is at this point, so
1.122 +; make it bigger than necessary, and then resize later as necessary.
1.123 +;
1.124 + tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
1.125 +
1.126 + max_len = 0 ; Use to keep track of max lengths of strings.
1.127 + end if
1.128 +
1.129 + do i = 0,nrows-1
1.130 +;
1.131 +; Special case of first field in row.
1.132 +;
1.133 + if(ifield.eq.1) then
1.134 + ibeg = 0
1.135 + iend = indices(i,ifield-1)-1
1.136 + else
1.137 +;
1.138 +; Special case of first field in row.
1.139 +;
1.140 + if(ifield.eq.nf) then
1.141 + ibeg = indices(i,ifield-2)+1
1.142 + iend = dimsizes(cstrings(i,:))-1
1.143 +;
1.144 +; Any field between first and last field.
1.145 +;
1.146 + else
1.147 + ibeg = indices(i,ifield-2)+1
1.148 + iend = indices(i,ifield-1)-1
1.149 + end if
1.150 + end if
1.151 +;
1.152 +; Here's the code that pulls off the correct string, and converts it
1.153 +; to float if desired.
1.154 +;
1.155 + if(return_type.eq."integer") then
1.156 + return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
1.157 + end if
1.158 + if(return_type.eq."float") then
1.159 + return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
1.160 + end if
1.161 + if(return_type.eq."double") then
1.162 + return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
1.163 + end if
1.164 + if(return_type.eq."string") then
1.165 + return_array(i) = chartostring(cstrings(i,ibeg:iend))
1.166 + end if
1.167 + if(return_type.eq."character") then
1.168 + if( (iend-ibeg) .gt. max_len) then
1.169 + max_len = iend-ibeg
1.170 + end if
1.171 + tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
1.172 + end if
1.173 + end do
1.174 +
1.175 + if(return_type.eq."character") then
1.176 + return_array = new((/nrows,max_len/),"character")
1.177 + return_array = tmp_return_array(:,0:max_len-1)
1.178 + end if
1.179 +
1.180 + return(return_array)
1.181 +end
1.182 +
1.183 +
1.184 +;----------------------------------------------------------------------
1.185 +; This function reads in string fields only to get the maximum string
1.186 +; length.
1.187 +;----------------------------------------------------------------------
1.188 +function get_maxlen(strings,ifield,indices)
1.189 +local nstring, cstrings, nf, tmp_str
1.190 +begin
1.191 + nrows = dimsizes(strings)
1.192 +;
1.193 +; Handle special case if we only have one string. Make sure it
1.194 +; is put into a 2D array.
1.195 +;
1.196 + if(nrows.eq.1) then
1.197 + cstrings = new((/1,strlen(strings)+1/),character)
1.198 + end if
1.199 +
1.200 + cstrings = stringtochar(strings)
1.201 + nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
1.202 +
1.203 +;
1.204 +; Error checking. Make sure user has entered a valid field.
1.205 +;
1.206 + if(ifield.le.0.or.ifield.gt.nf) then
1.207 + print("read_field: fatal: you've selected a field that is")
1.208 + print("out-of-range of the number of fields that you have (" + nf + ").")
1.209 + exit
1.210 + end if
1.211 +;
1.212 +; We don't know what the biggest character array is at this point, so
1.213 +; make it bigger than necessary, and then resize later as necessary.
1.214 +;
1.215 + tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
1.216 +
1.217 + max_len = 0 ; Use to keep track of max lengths of strings.
1.218 +
1.219 + do i = 0,nrows-1
1.220 +;
1.221 +; Special case of first field in row.
1.222 +;
1.223 + if(ifield.eq.1) then
1.224 + ibeg = 0
1.225 + iend = indices(i,ifield-1)-1
1.226 + else
1.227 +;
1.228 +; Special case of first field in row.
1.229 +;
1.230 + if(ifield.eq.nf) then
1.231 + ibeg = indices(i,ifield-2)+1
1.232 + iend = dimsizes(cstrings(i,:))-1
1.233 +;
1.234 +; Any field between first and last field.
1.235 +;
1.236 + else
1.237 + ibeg = indices(i,ifield-2)+1
1.238 + iend = indices(i,ifield-1)-1
1.239 + end if
1.240 + end if
1.241 + if( (iend-ibeg) .gt. max_len) then
1.242 + max_len = iend-ibeg
1.243 + end if
1.244 + end do
1.245 +
1.246 + return(max_len)
1.247 +end
1.248 +
1.249 +;----------------------------------------------------------------------
1.250 +; Main code.
1.251 +;----------------------------------------------------------------------
1.252 +begin
1.253 +;
1.254 +; Set up defaults here. We are hard-coding the field types here.
1.255 +; You can set up this script to try to determine the field types
1.256 +; automatically, but this is a bit tedious. Maybe later.
1.257 +;
1.258 + filename = "data.81" ; ASCII" file to read.
1.259 + cdf_file = filename + ".nc" ; netCDF file to write.
1.260 + nfields = 22 ; # of fields
1.261 + delimiter = "," ; field delimiter
1.262 +;
1.263 +; In this case, fields #6-#8 are strings, fields #2, #3, and #11
1.264 +; are float, and the rest of the fields are integers.
1.265 +;
1.266 + var_types = new(nfields,string)
1.267 + var_strlens = new(nfields,integer) ; var to hold strlens, just in case.
1.268 +
1.269 + var_types = "integer" ; Most are ints.
1.270 + var_types(5:7) = "character" ; Corresponds to fields 6-8.
1.271 + var_types(1:2) = "float"
1.272 + var_types(10) = "float"
1.273 +
1.274 + if(isfilepresent(cdf_file))
1.275 + print("Warning: '" + cdf_file + "' exists. Will remove it.")
1.276 + system("/bin/rm " + cdf_file)
1.277 + end if
1.278 +
1.279 +;
1.280 +; Read in data as strings. This will create a string array that has the
1.281 +; same number of strings as there are rows in the file. We will then need
1.282 +; to parse each string later.
1.283 +;
1.284 + read_data = asciiread(filename,-1,"string")
1.285 + header = read_data(0) ; Header. Use for variable names.
1.286 + data = read_data(1:) ; Get rid of first line which is a header.
1.287 + nrows = dimsizes(data) ; Number of rows.
1.288 +
1.289 +;
1.290 +; Read in locations of delimiters in each string row.
1.291 +;
1.292 + hindices = delim_indices(header,nfields,delimiter) ; header row
1.293 + dindices = delim_indices(data,nfields,delimiter) ; rest of file
1.294 +
1.295 +;
1.296 +; Read in the field names which will become variable names on
1.297 +; the netCDF file.
1.298 +;
1.299 + var_names = new(nfields,string)
1.300 +
1.301 + do i=0,nfields-1
1.302 + var_names(i) = read_field(header,i+1,hindices,"string")
1.303 + end do
1.304 +
1.305 +;
1.306 +; Write out this netCDF file efficiently so it will be faster.
1.307 +; Try to predefine everything before you write to it.
1.308 +;
1.309 + f = addfile(cdf_file,"c")
1.310 + setfileoption(f,"DefineMode",True) ; Enter predefine phase.
1.311 +
1.312 +;
1.313 +; Write global attributes to file. It's okay to do this before
1.314 +; predefining the file's variables. We are still in "define" mode.
1.315 +;
1.316 + fAtt = True
1.317 + fAtt@description = "Data read in from " + filename + " ASCII file."
1.318 + fAtt@creation_date = systemfunc ("date")
1.319 + fileattdef( f, fAtt )
1.320 +
1.321 +;
1.322 +; Write dimension names to file. If there are no character variables,
1.323 +; then there's only one dimension name ("nvalues").
1.324 +;
1.325 +; Otherwise, we need to write a dimension name for every character
1.326 +; variable, which will indicate the maximum string length for that
1.327 +; variable.
1.328 +;
1.329 + indc = ind(var_types.eq."character")
1.330 + if(.not.any(ismissing(indc))) then
1.331 +;
1.332 +; We have to treat the character arrays special here. We need to
1.333 +; know their sizes so we can write the maximum size of each char
1.334 +; array to the netCDF file as a dimension name. This means we
1.335 +; need to read in the character variables once to get the string
1.336 +; lengths, then we'll read them again later to get the actual values.
1.337 +;
1.338 + do i=0,dimsizes(indc)-1
1.339 + var_strlens(indc(i)) = get_maxlen(data,indc(i)+1,dindices)
1.340 + end do
1.341 +
1.342 + ndims = dimsizes(indc) + 1
1.343 + dimNames = new(ndims,string)
1.344 + dimSizes = new(ndims,integer)
1.345 + dimUnlim = new(ndims,logical)
1.346 +
1.347 + dimUnlim = False
1.348 + dimUnlim(0) = True
1.349 + dimNames(0) = "nvalues"
1.350 + dimNames(1:ndims-1) = var_names(indc) + "_StrLen"
1.351 + dimSizes(0) = -1
1.352 + dimSizes(1:ndims-1) = var_strlens(indc)
1.353 + filedimdef(f,dimNames,dimSizes,dimUnlim)
1.354 + else
1.355 +;
1.356 +; No character variables, so just write the one dimension name.
1.357 +;
1.358 + filedimdef(f,"nvalues",-1,True)
1.359 + end if
1.360 +
1.361 +;
1.362 +; Define each variable on the file.
1.363 +;
1.364 +; Don't deal with variables that are of type string.
1.365 +;
1.366 + do i=0,nfields-1
1.367 + if(var_types(i).ne."string") then
1.368 + if(var_types(i).ne."character") then
1.369 + filevardef(f, var_names(i), var_types(i), "nvalues")
1.370 + else
1.371 + filevardef(f, var_names(i), var_types(i), \
1.372 + (/"nvalues",var_names(i)+"_StrLen"/))
1.373 + end if
1.374 + end if
1.375 + end do
1.376 +
1.377 +;
1.378 +; Loop through each field, read the values for that field, print
1.379 +; information about the variable, and then write it to the netCDF
1.380 +; file.
1.381 +;
1.382 + do i=0,nfields-1
1.383 + ifield = i+1 ; Fields start at #1, not #0.
1.384 +;
1.385 +; Note: you can't write strings to a netCDF file, so these have
1.386 +; to be written out as character arrays.
1.387 +;
1.388 + tmp_data = read_field(data,ifield,dindices,var_types(i))
1.389 +;
1.390 +; Print some info about the variable.
1.391 +;
1.392 + print("")
1.393 + print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
1.394 + print("Type is " + var_types(i) + ".")
1.395 + if(var_types(i).ne."string".and.var_types(i).ne."character") then
1.396 + print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
1.397 + end if
1.398 +
1.399 + if(any(ismissing(tmp_data))) then
1.400 + print("This variable does contain missing values.")
1.401 + else
1.402 + print("This variable doesn't contain missing values.")
1.403 + end if
1.404 +
1.405 + f->$var_names(i)$ = tmp_data ; Write to netCDF file.
1.406 +
1.407 + delete(tmp_data) ; Delete for next round.
1.408 + end do
1.409 +end