1 ;----------------------------------------------------------------------
4 ; This example reads an ASCII file that is formatted a specific way, and
5 ; writes out the results to a netCDF file.
7 ; The first line in the ASCII file must be a header, with each field
8 ; separated by a single character delimiter (like a ","). The rest of
9 ; the file must be such that each row contains all fields, each
10 ; separated by the designated delimiter.
12 ; The fields can be integer, float, double, character, or string.
13 ; String fields cannot be written to a netCDF file. They have to
14 ; be read in as character arrays and written out that way.
15 ;----------------------------------------------------------------------
17 ;----------------------------------------------------------------------
18 ; This function returns the index locations of the given delimiter
19 ; in a row or several rows of strings.
20 ;----------------------------------------------------------------------
21 function delim_indices(strings,nfields,delimiter)
22 local cstrings, cdelim
24 nrows = dimsizes(strings)
26 ; Handle special case if we only have one string. Make sure it
27 ; is put into a 2D array.
30 cstrings = new((/1,strlen(strings)+1/),character)
33 cstrings = stringtochar(strings) ; Convert to characters.
34 cdelim = stringtochar(delimiter) ; Convert delimiter to character.
36 ; Som error checking here. Make sure delimiter is one character.
40 if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
41 print("delim_indices: fatal: the delimiter you've selected")
42 print("must be a single character. Can't continue.")
47 ; Create array to hold indices of delimiter locations, and then loop
48 ; through each row and find all the delimiters. Make sure each row has
49 ; the correct number of delimiters.
52 cindices = new((/nrows,ndelims/),integer)
54 ii = ind(cstrings(i,:).eq.cdelim(0))
56 ; Make sure there were delimiters on this row. If not, we just quit.
57 ; This could probably be modified to do this more gracefully.
59 if(any(ismissing(ii))) then
60 print("delim_indices: fatal: I didn't find any delimiters")
61 print("('" + delimiter + "') on row " + i + ". Can't continue.")
64 if(dimsizes(ii).ne.ndelims) then
65 print("delim_indices: fatal: I expected to find " + ndelims)
66 print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
67 print("Can't continue.")
73 delete(ii) ; For next time through loop
79 ;----------------------------------------------------------------------
80 ; This function reads in a particular field from a string array,
81 ; given the field number to read (fields start at #1 and go to #nfield),
82 ; and the indices of the delimiters.
84 ; It returns either an integer, float, double, character, or a string,
85 ; depending on the input flag "return_type".
86 ;----------------------------------------------------------------------
87 function read_field(strings,ifield,indices,return_type)
88 local nstring, cstrings, nf, tmp_str
90 nrows = dimsizes(strings)
92 ; Handle special case if we only have one string. Make sure it
93 ; is put into a 2D array.
96 cstrings = new((/1,strlen(strings)+1/),character)
99 cstrings = stringtochar(strings)
100 nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
103 ; Error checking. Make sure user has entered a valid field.
105 if(ifield.le.0.or.ifield.gt.nf) then
106 print("read_field: fatal: you've selected a field that is")
107 print("out-of-range of the number of fields that you have (" + nf + ").")
112 ; Set up array to return. For string, int, float, or double arrays,
113 ; we don't have to do anything special. For character arrays,
116 if(return_type.ne."character") then
117 return_array = new(nrows,return_type)
120 ; We don't know what the biggest character array is at this point, so
121 ; make it bigger than necessary, and then resize later as necessary.
123 tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
125 max_len = 0 ; Use to keep track of max lengths of strings.
130 ; Special case of first field in row.
134 iend = indices(i,ifield-1)-1
137 ; Special case of first field in row.
139 if(ifield.eq.nf) then
140 ibeg = indices(i,ifield-2)+1
141 iend = dimsizes(cstrings(i,:))-1
143 ; Any field between first and last field.
146 ibeg = indices(i,ifield-2)+1
147 iend = indices(i,ifield-1)-1
151 ; Here's the code that pulls off the correct string, and converts it
152 ; to float if desired.
154 if(return_type.eq."integer") then
155 return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
157 if(return_type.eq."float") then
158 return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
160 if(return_type.eq."double") then
161 return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
163 if(return_type.eq."string") then
164 return_array(i) = chartostring(cstrings(i,ibeg:iend))
166 if(return_type.eq."character") then
167 if( (iend-ibeg+1) .gt. max_len) then
168 max_len = iend-ibeg+1
170 tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
174 if(return_type.eq."character") then
175 return_array = new((/nrows,max_len/),"character")
176 return_array = tmp_return_array(:,0:max_len-1)
183 ;----------------------------------------------------------------------
184 ; This function reads in string fields only to get the maximum string
186 ;----------------------------------------------------------------------
187 function get_maxlen(strings,ifield,indices)
188 local nstring, cstrings, nf, tmp_str
190 nrows = dimsizes(strings)
192 ; Handle special case if we only have one string. Make sure it
193 ; is put into a 2D array.
196 cstrings = new((/1,strlen(strings)+1/),character)
199 cstrings = stringtochar(strings)
200 nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1)
203 ; Error checking. Make sure user has entered a valid field.
205 if(ifield.le.0.or.ifield.gt.nf) then
206 print("read_field: fatal: you've selected a field that is")
207 print("out-of-range of the number of fields that you have (" + nf + ").")
211 ; We don't know what the biggest character array is at this point, so
212 ; make it bigger than necessary, and then resize later as necessary.
214 tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
216 max_len = 0 ; Use to keep track of max lengths of strings.
220 ; Special case of first field in row.
224 iend = indices(i,ifield-1)-1
227 ; Special case of first field in row.
229 if(ifield.eq.nf) then
230 ibeg = indices(i,ifield-2)+1
231 iend = dimsizes(cstrings(i,:))-1
233 ; Any field between first and last field.
236 ibeg = indices(i,ifield-2)+1
237 iend = indices(i,ifield-1)-1
240 if( (iend-ibeg+1) .gt. max_len) then
241 max_len = iend-ibeg+1
248 ;----------------------------------------------------------------------
250 ;----------------------------------------------------------------------
253 ; Set up defaults here. We are hard-coding the field types here.
254 ; You can set up this script to try to determine the field types
255 ; automatically, but this is a bit tedious. Maybe later.
259 nfields = 30 ; # of fields
260 delimiter = "," ; field delimiter
262 filename = station+year+"_L4_m.txt" ; ASCII" file to read.
263 cdf_file = station+year+"_L4_m.nc" ; netCDF file to write.
265 ; In this case, fields #2-#2 are integers,
266 ; and the rest of the fields are floats.
268 var_types = new(nfields,string)
269 var_strlens = new(nfields,integer) ; var to hold strlens, just in case.
271 var_types = "float" ; Most are floats.
272 var_types(0:1) = "integer"
274 if(isfilepresent(cdf_file))
275 print("Warning: '" + cdf_file + "' exists. Will remove it.")
276 system("/bin/rm " + cdf_file)
279 ; Read in data as strings. This will create a string array that has the
280 ; same number of strings as there are rows in the file. We will then need
281 ; to parse each string later.
283 read_data = asciiread(filename,-1,"string")
284 header = read_data(0) ; Header. Use for variable names.
285 data = read_data(1:) ; Get rid of first line which is a header.
286 nrows = dimsizes(data) ; Number of rows.
288 ; Read in locations of delimiters in each string row.
290 hindices = delim_indices(header,nfields,delimiter) ; header row
291 dindices = delim_indices(data,nfields,delimiter) ; rest of file
296 ; Read in the field names which will become variable names on
299 var_names = new(nfields,string)
302 var_names(i) = read_field(header,i+1,hindices,"string")
305 ; Write out this netCDF file efficiently so it will be faster.
306 ; Try to predefine everything before you write to it.
308 f = addfile(cdf_file,"c")
309 setfileoption(f,"DefineMode",True) ; Enter predefine phase.
311 ; Write global attributes to file. It's okay to do this before
312 ; predefining the file's variables. We are still in "define" mode.
315 fAtt@description = "Data read in from " + filename + " ASCII file."
316 fAtt@creation_date = systemfunc ("date")
317 fileattdef( f, fAtt )
319 ; Write dimension names to file. If there are no character variables,
320 ; then there's only one dimension name ("nvalues").
325 dim_names = (/ "year", "month" /)
326 dim_sizes = (/ nyear , nmonth /)
327 dimUnlim = (/ True , False /)
329 filedimdef( f, dim_names, dim_sizes, dimUnlim )
331 ; Define each variable on the file.
333 filevardef( f, "year", "integer", "year" )
335 ; Don't deal with variable Month (i=0).
338 filevardef(f, var_names(i), var_types(i), dim_names)
341 ; Loop through each field, read the values for that field, print
342 ; information about the variable, and then write it to the netCDF
346 ifield = i+1 ; Fields start at #1, not #0.
348 ; Note: you can't write strings to a netCDF file, so these have
349 ; to be written out as character arrays.
351 tmp_data = new((/1,nmonth/),var_types(i))
353 out_data = read_field(data,ifield,dindices,var_types(i))
355 tmp_data(0,:) = out_data(:)
357 ; Print some info about the variable.
360 print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
361 print("Type is " + var_types(i) + ".")
362 print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
364 if(any(ismissing(tmp_data))) then
365 print("This variable does contain missing values.")
367 print("This variable doesn't contain missing values.")
370 f->$var_names(i)$ = tmp_data ; Write to netCDF file.
372 delete(tmp_data) ; Delete for next round.