|
1 ;---------------------------------------------------------------------- |
|
2 ; This example reads an ASCII file that is formatted a specific way, and |
|
3 ; writes out the results to a netCDF file. |
|
4 ; |
|
5 ; The first line in the ASCII file must be a header, with each field |
|
6 ; separated by a single character delimiter (like a ","). The rest of |
|
7 ; the file must be such that each row contains all fields, each |
|
8 ; separated by the designated delimiter. |
|
9 ; |
|
10 ; The fields can be integer, float, double, character, or string. |
|
11 ; String fields cannot be written to a netCDF file. They have to |
|
12 ; be read in as character arrays and written out that way. |
|
13 ;---------------------------------------------------------------------- |
|
14 |
|
15 ;---------------------------------------------------------------------- |
|
16 ; This function returns the index locations of the given delimiter |
|
17 ; in a row or several rows of strings. |
|
18 ;---------------------------------------------------------------------- |
|
19 function delim_indices(strings,nfields,delimiter) |
|
20 local cstrings, cdelim |
|
21 begin |
|
22 nrows = dimsizes(strings) |
|
23 ; |
|
24 ; Handle special case if we only have one string. Make sure it |
|
25 ; is put into a 2D array. |
|
26 ; |
|
27 if(nrows.eq.1) then |
|
28 cstrings = new((/1,strlen(strings)+1/),character) |
|
29 end if |
|
30 |
|
31 cstrings = stringtochar(strings) ; Convert to characters. |
|
32 cdelim = stringtochar(delimiter) ; Convert delimiter to character. |
|
33 ; |
|
34 ; Som error checking here. Make sure delimiter is one character. |
|
35 ; |
|
36 nc = dimsizes(cdelim) |
|
37 rank = dimsizes(nc) |
|
38 if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then |
|
39 print("delim_indices: fatal: the delimiter you've selected") |
|
40 print("must be a single character. Can't continue.") |
|
41 exit |
|
42 end if |
|
43 |
|
44 ; |
|
45 ; Create array to hold indices of delimiter locations, and then loop |
|
46 ; through each row and find all the delimiters. Make sure each row has |
|
47 ; the correct number of delimiters. |
|
48 ; |
|
49 ndelims = nfields-1 |
|
50 cindices = new((/nrows,ndelims/),integer) |
|
51 do i = 0, nrows-1 |
|
52 ii = ind(cstrings(i,:).eq.cdelim(0)) |
|
53 ; |
|
54 ; Make sure there were delimiters on this row. If not, we just quit. |
|
55 ; This could probably be modified to do this more gracefully. |
|
56 ; |
|
57 if(any(ismissing(ii))) then |
|
58 print("delim_indices: fatal: I didn't find any delimiters") |
|
59 print("('" + delimiter + "') on row " + i + ". Can't continue.") |
|
60 exit |
|
61 end if |
|
62 if(dimsizes(ii).ne.ndelims) then |
|
63 print("delim_indices: fatal: I expected to find " + ndelims) |
|
64 print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".") |
|
65 print("Can't continue.") |
|
66 exit |
|
67 end if |
|
68 |
|
69 cindices(i,:) = ii |
|
70 |
|
71 delete(ii) ; For next time through loop |
|
72 end do |
|
73 |
|
74 return(cindices) |
|
75 end |
|
76 |
|
77 ;---------------------------------------------------------------------- |
|
78 ; This function reads in a particular field from a string array, |
|
79 ; given the field number to read (fields start at #1 and go to #nfield), |
|
80 ; and the indices of the delimiters. |
|
81 ; |
|
82 ; It returns either an integer, float, double, character, or a string, |
|
83 ; depending on the input flag "return_type". |
|
84 ;---------------------------------------------------------------------- |
|
85 function read_field(strings,ifield,indices,return_type) |
|
86 local nstring, cstrings, nf, tmp_str |
|
87 begin |
|
88 nrows = dimsizes(strings) |
|
89 ; |
|
90 ; Handle special case if we only have one string. Make sure it |
|
91 ; is put into a 2D array. |
|
92 ; |
|
93 if(nrows.eq.1) then |
|
94 cstrings = new((/1,strlen(strings)+1/),character) |
|
95 end if |
|
96 |
|
97 cstrings = stringtochar(strings) |
|
98 nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) |
|
99 |
|
100 ; |
|
101 ; Error checking. Make sure user has entered a valid field. |
|
102 ; |
|
103 if(ifield.le.0.or.ifield.gt.nf) then |
|
104 print("read_field: fatal: you've selected a field that is") |
|
105 print("out-of-range of the number of fields that you have (" + nf + ").") |
|
106 exit |
|
107 end if |
|
108 |
|
109 ; |
|
110 ; Set up array to return. For string, int, float, or double arrays, |
|
111 ; we don't have to do anything special. For character arrays, |
|
112 ; however, we do. |
|
113 ; |
|
114 if(return_type.ne."character") then |
|
115 return_array = new(nrows,return_type) |
|
116 else |
|
117 ; |
|
118 ; We don't know what the biggest character array is at this point, so |
|
119 ; make it bigger than necessary, and then resize later as necessary. |
|
120 ; |
|
121 tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") |
|
122 |
|
123 max_len = 0 ; Use to keep track of max lengths of strings. |
|
124 end if |
|
125 |
|
126 do i = 0,nrows-1 |
|
127 ; |
|
128 ; Special case of first field in row. |
|
129 ; |
|
130 if(ifield.eq.1) then |
|
131 ibeg = 0 |
|
132 iend = indices(i,ifield-1)-1 |
|
133 else |
|
134 ; |
|
135 ; Special case of first field in row. |
|
136 ; |
|
137 if(ifield.eq.nf) then |
|
138 ibeg = indices(i,ifield-2)+1 |
|
139 iend = dimsizes(cstrings(i,:))-1 |
|
140 ; |
|
141 ; Any field between first and last field. |
|
142 ; |
|
143 else |
|
144 ibeg = indices(i,ifield-2)+1 |
|
145 iend = indices(i,ifield-1)-1 |
|
146 end if |
|
147 end if |
|
148 ; |
|
149 ; Here's the code that pulls off the correct string, and converts it |
|
150 ; to float if desired. |
|
151 ; |
|
152 if(return_type.eq."integer") then |
|
153 return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend))) |
|
154 end if |
|
155 if(return_type.eq."float") then |
|
156 return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend))) |
|
157 end if |
|
158 if(return_type.eq."double") then |
|
159 return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend))) |
|
160 end if |
|
161 if(return_type.eq."string") then |
|
162 return_array(i) = chartostring(cstrings(i,ibeg:iend)) |
|
163 end if |
|
164 if(return_type.eq."character") then |
|
165 if( (iend-ibeg+1) .gt. max_len) then |
|
166 max_len = iend-ibeg+1 |
|
167 end if |
|
168 tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend) |
|
169 end if |
|
170 end do |
|
171 |
|
172 if(return_type.eq."character") then |
|
173 return_array = new((/nrows,max_len/),"character") |
|
174 return_array = tmp_return_array(:,0:max_len-1) |
|
175 end if |
|
176 |
|
177 return(return_array) |
|
178 end |
|
179 |
|
180 |
|
181 ;---------------------------------------------------------------------- |
|
182 ; This function reads in string fields only to get the maximum string |
|
183 ; length. |
|
184 ;---------------------------------------------------------------------- |
|
185 function get_maxlen(strings,ifield,indices) |
|
186 local nstring, cstrings, nf, tmp_str |
|
187 begin |
|
188 nrows = dimsizes(strings) |
|
189 ; |
|
190 ; Handle special case if we only have one string. Make sure it |
|
191 ; is put into a 2D array. |
|
192 ; |
|
193 if(nrows.eq.1) then |
|
194 cstrings = new((/1,strlen(strings)+1/),character) |
|
195 end if |
|
196 |
|
197 cstrings = stringtochar(strings) |
|
198 nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) |
|
199 |
|
200 ; |
|
201 ; Error checking. Make sure user has entered a valid field. |
|
202 ; |
|
203 if(ifield.le.0.or.ifield.gt.nf) then |
|
204 print("read_field: fatal: you've selected a field that is") |
|
205 print("out-of-range of the number of fields that you have (" + nf + ").") |
|
206 exit |
|
207 end if |
|
208 ; |
|
209 ; We don't know what the biggest character array is at this point, so |
|
210 ; make it bigger than necessary, and then resize later as necessary. |
|
211 ; |
|
212 tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") |
|
213 |
|
214 max_len = 0 ; Use to keep track of max lengths of strings. |
|
215 |
|
216 do i = 0,nrows-1 |
|
217 ; |
|
218 ; Special case of first field in row. |
|
219 ; |
|
220 if(ifield.eq.1) then |
|
221 ibeg = 0 |
|
222 iend = indices(i,ifield-1)-1 |
|
223 else |
|
224 ; |
|
225 ; Special case of first field in row. |
|
226 ; |
|
227 if(ifield.eq.nf) then |
|
228 ibeg = indices(i,ifield-2)+1 |
|
229 iend = dimsizes(cstrings(i,:))-1 |
|
230 ; |
|
231 ; Any field between first and last field. |
|
232 ; |
|
233 else |
|
234 ibeg = indices(i,ifield-2)+1 |
|
235 iend = indices(i,ifield-1)-1 |
|
236 end if |
|
237 end if |
|
238 if( (iend-ibeg+1) .gt. max_len) then |
|
239 max_len = iend-ibeg+1 |
|
240 end if |
|
241 end do |
|
242 |
|
243 return(max_len) |
|
244 end |
|
245 |
|
246 ;---------------------------------------------------------------------- |
|
247 ; Main code. |
|
248 ;---------------------------------------------------------------------- |
|
249 begin |
|
250 ; |
|
251 ; Set up defaults here. We are hard-coding the field types here. |
|
252 ; You can set up this script to try to determine the field types |
|
253 ; automatically, but this is a bit tedious. Maybe later. |
|
254 ; |
|
255 filename = "data.933" ; ASCII" file to read. |
|
256 cdf_file = filename + ".nc" ; netCDF file to write. |
|
257 nfields = 45 ; # of fields |
|
258 delimiter = "," ; field delimiter |
|
259 ; |
|
260 ; In this case, fields #6-#8 are strings, fields #2, #3, and #11 |
|
261 ; are float, and the rest of the fields are integers. |
|
262 ; |
|
263 var_types = new(nfields,string) |
|
264 var_strlens = new(nfields,integer) ; var to hold strlens, just in case. |
|
265 |
|
266 var_types = "integer" ; Most are ints. |
|
267 var_types(5:7) = "character" ; Corresponds to fields 6-8. |
|
268 var_types(1:2) = "float" |
|
269 var_types(10) = "float" |
|
270 |
|
271 if(isfilepresent(cdf_file)) |
|
272 print("Warning: '" + cdf_file + "' exists. Will remove it.") |
|
273 system("/bin/rm " + cdf_file) |
|
274 end if |
|
275 |
|
276 ; |
|
277 ; Read in data as strings. This will create a string array that has the |
|
278 ; same number of strings as there are rows in the file. We will then need |
|
279 ; to parse each string later. |
|
280 ; |
|
281 read_data = asciiread(filename,-1,"string") |
|
282 header = read_data(0) ; Header. Use for variable names. |
|
283 data = read_data(1:) ; Get rid of first line which is a header. |
|
284 nrows = dimsizes(data) ; Number of rows. |
|
285 |
|
286 ; |
|
287 ; Read in locations of delimiters in each string row. |
|
288 ; |
|
289 hindices = delim_indices(header,nfields,delimiter) ; header row |
|
290 dindices = delim_indices(data,nfields,delimiter) ; rest of file |
|
291 |
|
292 ; |
|
293 ; Read in the field names which will become variable names on |
|
294 ; the netCDF file. |
|
295 ; |
|
296 var_names = new(nfields,string) |
|
297 |
|
298 do i=0,nfields-1 |
|
299 var_names(i) = read_field(header,i+1,hindices,"string") |
|
300 end do |
|
301 |
|
302 ; |
|
303 ; Write out this netCDF file efficiently so it will be faster. |
|
304 ; Try to predefine everything before you write to it. |
|
305 ; |
|
306 f = addfile(cdf_file,"c") |
|
307 setfileoption(f,"DefineMode",True) ; Enter predefine phase. |
|
308 |
|
309 ; |
|
310 ; Write global attributes to file. It's okay to do this before |
|
311 ; predefining the file's variables. We are still in "define" mode. |
|
312 ; |
|
313 fAtt = True |
|
314 fAtt@description = "Data read in from " + filename + " ASCII file." |
|
315 fAtt@creation_date = systemfunc ("date") |
|
316 fileattdef( f, fAtt ) |
|
317 |
|
318 ; |
|
319 ; Write dimension names to file. If there are no character variables, |
|
320 ; then there's only one dimension name ("nvalues"). |
|
321 ; |
|
322 ; Otherwise, we need to write a dimension name for every character |
|
323 ; variable, which will indicate the maximum string length for that |
|
324 ; variable. |
|
325 ; |
|
326 indc = ind(var_types.eq."character") |
|
327 if(.not.any(ismissing(indc))) then |
|
328 ; |
|
329 ; We have to treat the character arrays special here. We need to |
|
330 ; know their sizes so we can write the maximum size of each char |
|
331 ; array to the netCDF file as a dimension name. This means we |
|
332 ; need to read in the character variables once to get the string |
|
333 ; lengths, then we'll read them again later to get the actual values. |
|
334 ; |
|
335 do i=0,dimsizes(indc)-1 |
|
336 var_strlens(indc(i)) = get_maxlen(data,indc(i)+1,dindices) |
|
337 end do |
|
338 |
|
339 ndims = dimsizes(indc) + 1 |
|
340 dimNames = new(ndims,string) |
|
341 dimSizes = new(ndims,integer) |
|
342 dimUnlim = new(ndims,logical) |
|
343 |
|
344 dimUnlim = False |
|
345 dimUnlim(0) = True |
|
346 dimNames(0) = "nvalues" |
|
347 dimNames(1:ndims-1) = var_names(indc) + "_StrLen" |
|
348 dimSizes(0) = -1 |
|
349 dimSizes(1:ndims-1) = var_strlens(indc) |
|
350 filedimdef(f,dimNames,dimSizes,dimUnlim) |
|
351 else |
|
352 ; |
|
353 ; No character variables, so just write the one dimension name. |
|
354 ; |
|
355 filedimdef(f,"nvalues",-1,True) |
|
356 end if |
|
357 |
|
358 ; |
|
359 ; Define each variable on the file. |
|
360 ; |
|
361 ; Don't deal with variables that are of type string. |
|
362 ; |
|
363 do i=0,nfields-1 |
|
364 if(var_types(i).ne."string") then |
|
365 if(var_types(i).ne."character") then |
|
366 filevardef(f, var_names(i), var_types(i), "nvalues") |
|
367 else |
|
368 filevardef(f, var_names(i), var_types(i), \ |
|
369 (/"nvalues",var_names(i)+"_StrLen"/)) |
|
370 end if |
|
371 end if |
|
372 end do |
|
373 |
|
374 ; |
|
375 ; Loop through each field, read the values for that field, print |
|
376 ; information about the variable, and then write it to the netCDF |
|
377 ; file. |
|
378 ; |
|
379 do i=0,nfields-1 |
|
380 ifield = i+1 ; Fields start at #1, not #0. |
|
381 ; |
|
382 ; Note: you can't write strings to a netCDF file, so these have |
|
383 ; to be written out as character arrays. |
|
384 ; |
|
385 tmp_data = read_field(data,ifield,dindices,var_types(i)) |
|
386 ; |
|
387 ; Print some info about the variable. |
|
388 ; |
|
389 print("") |
|
390 print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").") |
|
391 print("Type is " + var_types(i) + ".") |
|
392 if(var_types(i).ne."string".and.var_types(i).ne."character") then |
|
393 print("min/max = " + min(tmp_data) + "/" + max(tmp_data)) |
|
394 end if |
|
395 |
|
396 if(any(ismissing(tmp_data))) then |
|
397 print("This variable does contain missing values.") |
|
398 else |
|
399 print("This variable doesn't contain missing values.") |
|
400 end if |
|
401 |
|
402 f->$var_names(i)$ = tmp_data ; Write to netCDF file. |
|
403 |
|
404 delete(tmp_data) ; Delete for next round. |
|
405 end do |
|
406 end |