Modifications to scoring and graphics production for the final version of code for the C-LAMP paper in GCB.
1 ;----------------------------------------------------------------------
2 ; This example reads an ASCII file that is formatted a specific way, and
3 ; writes out the results to a netCDF file.
5 ; The first line in the ASCII file must be a header, with each field
6 ; separated by a single character delimiter (like a ","). The rest of
7 ; the file must be such that each row contains all fields, each
8 ; separated by the designated delimiter.
10 ; The fields can be integer, float, double, character, or string.
11 ; String fields cannot be written to a netCDF file. They have to
12 ; be read in as character arrays and written out that way.
13 ;----------------------------------------------------------------------
15 ;----------------------------------------------------------------------
16 ; This function returns the index locations of the given delimiter
17 ; in a row or several rows of strings.
18 ;----------------------------------------------------------------------
19 function delim_indices(strings,nfields,delimiter)
20 local cstrings, cdelim
22 nrows = dimsizes(strings)
24 ; Handle special case if we only have one string. Make sure it
25 ; is put into a 2D array.
28 cstrings = new((/1,strlen(strings)+1/),character)
31 cstrings = stringtochar(strings) ; Convert to characters.
32 cdelim = stringtochar(delimiter) ; Convert delimiter to character.
34 ; Som error checking here. Make sure delimiter is one character.
38 if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
39 print("delim_indices: fatal: the delimiter you've selected")
40 print("must be a single character. Can't continue.")
45 ; Create array to hold indices of delimiter locations, and then loop
46 ; through each row and find all the delimiters. Make sure each row has
47 ; the correct number of delimiters.
50 cindices = new((/nrows,ndelims/),integer)
52 ii = ind(cstrings(i,:).eq.cdelim(0))
54 ; Make sure there were delimiters on this row. If not, we just quit.
55 ; This could probably be modified to do this more gracefully.
57 if(any(ismissing(ii))) then
58 print("delim_indices: fatal: I didn't find any delimiters")
59 print("('" + delimiter + "') on row " + i + ". Can't continue.")
62 if(dimsizes(ii).ne.ndelims) then
63 print("delim_indices: fatal: I expected to find " + ndelims)
64 print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
65 print("Can't continue.")
71 delete(ii) ; For next time through loop
77 ;----------------------------------------------------------------------
78 ; This function reads in a particular field from a string array,
79 ; given the field number to read (fields start at #1 and go to #nfield),
80 ; and the indices of the delimiters.
82 ; It returns either an integer, float, double, character, or a string,
83 ; depending on the input flag "return_type".
84 ;----------------------------------------------------------------------
85 function read_field(strings,ifield,indices,return_type)
86 local nstring, cstrings, nf, tmp_str
88 nrows = dimsizes(strings)
90 ; Handle special case if we only have one string. Make sure it
91 ; is put into a 2D array.
94 cstrings = new((/1,strlen(strings)+1/),character)
97 cstrings = stringtochar(strings)
98 nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
101 ; Error checking. Make sure user has entered a valid field.
103 if(ifield.le.0.or.ifield.gt.nf) then
104 print("read_field: fatal: you've selected a field that is")
105 print("out-of-range of the number of fields that you have (" + nf + ").")
110 ; Set up array to return. For string, int, float, or double arrays,
111 ; we don't have to do anything special. For character arrays,
114 if(return_type.ne."character") then
115 return_array = new(nrows,return_type)
118 ; We don't know what the biggest character array is at this point, so
119 ; make it bigger than necessary, and then resize later as necessary.
121 tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
123 max_len = 0 ; Use to keep track of max lengths of strings.
128 ; Special case of first field in row.
132 iend = indices(i,ifield-1)-1
135 ; Special case of first field in row.
137 if(ifield.eq.nf) then
138 ibeg = indices(i,ifield-2)+1
139 iend = dimsizes(cstrings(i,:))-1
141 ; Any field between first and last field.
144 ibeg = indices(i,ifield-2)+1
145 iend = indices(i,ifield-1)-1
149 ; Here's the code that pulls off the correct string, and converts it
150 ; to float if desired.
152 if(return_type.eq."integer") then
153 return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
155 if(return_type.eq."float") then
156 return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
158 if(return_type.eq."double") then
159 return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
161 if(return_type.eq."string") then
162 return_array(i) = chartostring(cstrings(i,ibeg:iend))
164 if(return_type.eq."character") then
165 if( (iend-ibeg) .gt. max_len) then
168 tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
172 if(return_type.eq."character") then
173 return_array = new((/nrows,max_len/),"character")
174 return_array = tmp_return_array(:,0:max_len-1)
181 ;----------------------------------------------------------------------
182 ; This function reads in string fields only to get the maximum string
184 ;----------------------------------------------------------------------
185 function get_maxlen(strings,ifield,indices)
186 local nstring, cstrings, nf, tmp_str
188 nrows = dimsizes(strings)
190 ; Handle special case if we only have one string. Make sure it
191 ; is put into a 2D array.
194 cstrings = new((/1,strlen(strings)+1/),character)
197 cstrings = stringtochar(strings)
198 nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
201 ; Error checking. Make sure user has entered a valid field.
203 if(ifield.le.0.or.ifield.gt.nf) then
204 print("read_field: fatal: you've selected a field that is")
205 print("out-of-range of the number of fields that you have (" + nf + ").")
209 ; We don't know what the biggest character array is at this point, so
210 ; make it bigger than necessary, and then resize later as necessary.
212 tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
214 max_len = 0 ; Use to keep track of max lengths of strings.
218 ; Special case of first field in row.
222 iend = indices(i,ifield-1)-1
225 ; Special case of first field in row.
227 if(ifield.eq.nf) then
228 ibeg = indices(i,ifield-2)+1
229 iend = dimsizes(cstrings(i,:))-1
231 ; Any field between first and last field.
234 ibeg = indices(i,ifield-2)+1
235 iend = indices(i,ifield-1)-1
238 if( (iend-ibeg) .gt. max_len) then
246 ;----------------------------------------------------------------------
248 ;----------------------------------------------------------------------
251 ; Set up defaults here. We are hard-coding the field types here.
252 ; You can set up this script to try to determine the field types
253 ; automatically, but this is a bit tedious. Maybe later.
255 filename = "data.81" ; ASCII" file to read.
256 cdf_file = filename + ".nc" ; netCDF file to write.
257 nfields = 22 ; # of fields
258 delimiter = "," ; field delimiter
260 ; In this case, fields #6-#8 are strings, fields #2, #3, and #11
261 ; are float, and the rest of the fields are integers.
263 var_types = new(nfields,string)
264 var_strlens = new(nfields,integer) ; var to hold strlens, just in case.
266 var_types = "integer" ; Most are ints.
267 var_types(5:7) = "character" ; Corresponds to fields 6-8.
268 var_types(1:2) = "float"
269 var_types(10) = "float"
271 if(isfilepresent(cdf_file))
272 print("Warning: '" + cdf_file + "' exists. Will remove it.")
273 system("/bin/rm " + cdf_file)
277 ; Read in data as strings. This will create a string array that has the
278 ; same number of strings as there are rows in the file. We will then need
279 ; to parse each string later.
281 read_data = asciiread(filename,-1,"string")
282 header = read_data(0) ; Header. Use for variable names.
283 data = read_data(1:) ; Get rid of first line which is a header.
284 nrows = dimsizes(data) ; Number of rows.
287 ; Read in locations of delimiters in each string row.
289 hindices = delim_indices(header,nfields,delimiter) ; header row
290 dindices = delim_indices(data,nfields,delimiter) ; rest of file
293 ; Read in the field names which will become variable names on
296 var_names = new(nfields,string)
299 var_names(i) = read_field(header,i+1,hindices,"string")
303 ; Write out this netCDF file efficiently so it will be faster.
304 ; Try to predefine everything before you write to it.
306 f = addfile(cdf_file,"c")
307 setfileoption(f,"DefineMode",True) ; Enter predefine phase.
310 ; Write global attributes to file. It's okay to do this before
311 ; predefining the file's variables. We are still in "define" mode.
314 fAtt@description = "Data read in from " + filename + " ASCII file."
315 fAtt@creation_date = systemfunc ("date")
316 fileattdef( f, fAtt )
319 ; Write dimension names to file. If there are no character variables,
320 ; then there's only one dimension name ("nvalues").
322 ; Otherwise, we need to write a dimension name for every character
323 ; variable, which will indicate the maximum string length for that
326 indc = ind(var_types.eq."character")
327 if(.not.any(ismissing(indc))) then
329 ; We have to treat the character arrays special here. We need to
330 ; know their sizes so we can write the maximum size of each char
331 ; array to the netCDF file as a dimension name. This means we
332 ; need to read in the character variables once to get the string
333 ; lengths, then we'll read them again later to get the actual values.
335 do i=0,dimsizes(indc)-1
336 var_strlens(indc(i)) = get_maxlen(data,indc(i)+1,dindices)
339 ndims = dimsizes(indc) + 1
340 dimNames = new(ndims,string)
341 dimSizes = new(ndims,integer)
342 dimUnlim = new(ndims,logical)
346 dimNames(0) = "nvalues"
347 dimNames(1:ndims-1) = var_names(indc) + "_StrLen"
349 dimSizes(1:ndims-1) = var_strlens(indc)
350 filedimdef(f,dimNames,dimSizes,dimUnlim)
353 ; No character variables, so just write the one dimension name.
355 filedimdef(f,"nvalues",-1,True)
359 ; Define each variable on the file.
361 ; Don't deal with variables that are of type string.
364 if(var_types(i).ne."string") then
365 if(var_types(i).ne."character") then
366 filevardef(f, var_names(i), var_types(i), "nvalues")
368 filevardef(f, var_names(i), var_types(i), \
369 (/"nvalues",var_names(i)+"_StrLen"/))
375 ; Loop through each field, read the values for that field, print
376 ; information about the variable, and then write it to the netCDF
380 ifield = i+1 ; Fields start at #1, not #0.
382 ; Note: you can't write strings to a netCDF file, so these have
383 ; to be written out as character arrays.
385 tmp_data = read_field(data,ifield,dindices,var_types(i))
387 ; Print some info about the variable.
390 print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
391 print("Type is " + var_types(i) + ".")
392 if(var_types(i).ne."string".and.var_types(i).ne."character") then
393 print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
396 if(any(ismissing(tmp_data))) then
397 print("This variable does contain missing values.")
399 print("This variable doesn't contain missing values.")
402 f->$var_names(i)$ = tmp_data ; Write to netCDF file.
404 delete(tmp_data) ; Delete for next round.