ameriflux/02.read_ascii.ncl
author Forrest Hoffman <forrest@climatemodeling.org>
Mon, 26 Jan 2009 22:08:20 -0500
changeset 0 0c6405ab2ff4
permissions -rw-r--r--
Initial commit of C-LAMP Diagnostics from Jeff Lee
     1 ;----------------------------------------------------------------------
     2 ; add year to output
     3 ;
     4 ; This example reads an ASCII file that is formatted a specific way, and
     5 ; writes out the results to a netCDF file.
     6 ;
     7 ; The first line in the ASCII file must be a header, with each field
     8 ; separated by a single character delimiter (like a ","). The rest of
     9 ; the file must be such that each row contains all fields, each
    10 ; separated by the designated delimiter.
    11 ;
    12 ; The fields can be integer, float, double, character, or string.
    13 ; String fields cannot be written to a netCDF file. They have to
    14 ; be read in as character arrays and written out that way.
    15 ;----------------------------------------------------------------------
    16 
    17 ;----------------------------------------------------------------------
    18 ; This function returns the index locations of the given delimiter
    19 ; in a row or several rows of strings.
    20 ;----------------------------------------------------------------------
    21 function delim_indices(strings,nfields,delimiter)
    22 local cstrings, cdelim
    23 begin
    24   nrows = dimsizes(strings)
    25 ;
    26 ; Handle special case if we only have one string. Make sure it
    27 ; is put into a 2D array.
    28 ;
    29   if(nrows.eq.1) then
    30     cstrings = new((/1,strlen(strings)+1/),character)
    31   end if
    32 
    33   cstrings = stringtochar(strings)        ; Convert to characters.
    34   cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
    35 ;
    36 ; Som error checking here. Make sure delimiter is one character.
    37 ;
    38   nc   = dimsizes(cdelim)
    39   rank = dimsizes(nc)
    40   if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
    41     print("delim_indices: fatal: the delimiter you've selected")
    42     print("must be a single character. Can't continue.")
    43     exit
    44   end if
    45 
    46 ;
    47 ; Create array to hold indices of delimiter locations, and then loop
    48 ; through each row and find all the delimiters. Make sure each row has
    49 ; the correct number of delimiters.
    50 ;
    51   ndelims  = nfields-1
    52   cindices = new((/nrows,ndelims/),integer)
    53   do i = 0, nrows-1
    54     ii = ind(cstrings(i,:).eq.cdelim(0))
    55 ;
    56 ; Make sure there were delimiters on this row. If not, we just quit.
    57 ; This could probably be modified to do this more gracefully.
    58 ;
    59     if(any(ismissing(ii))) then
    60       print("delim_indices: fatal: I didn't find any delimiters")
    61       print("('" + delimiter + "') on row " + i + ". Can't continue.")
    62       exit
    63     end if
    64     if(dimsizes(ii).ne.ndelims) then
    65       print("delim_indices: fatal: I expected to find " + ndelims)
    66       print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
    67       print("Can't continue.")
    68       exit
    69     end if
    70 
    71     cindices(i,:) = ii
    72 
    73     delete(ii)            ; For next time through loop
    74   end do
    75 
    76   return(cindices)
    77 end
    78 
    79 ;----------------------------------------------------------------------
    80 ; This function reads in a particular field from a string array,
    81 ; given the field number to read (fields start at #1 and go to #nfield),
    82 ; and the indices of the delimiters.
    83 ;
    84 ; It returns either an integer, float, double, character, or a string,
    85 ; depending on the input flag "return_type".
    86 ;----------------------------------------------------------------------
    87 function read_field(strings,ifield,indices,return_type)
    88 local nstring, cstrings, nf, tmp_str
    89 begin
    90   nrows = dimsizes(strings)
    91 ;
    92 ; Handle special case if we only have one string. Make sure it
    93 ; is put into a 2D array.
    94 ;
    95   if(nrows.eq.1) then
    96     cstrings = new((/1,strlen(strings)+1/),character)
    97   end if
    98 
    99   cstrings = stringtochar(strings)
   100   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   101 
   102 ;
   103 ; Error checking. Make sure user has entered a valid field.
   104 ;
   105   if(ifield.le.0.or.ifield.gt.nf) then
   106     print("read_field: fatal: you've selected a field that is")
   107     print("out-of-range of the number of fields that you have (" + nf + ").")
   108     exit
   109   end if
   110 
   111 ;
   112 ; Set up array to return. For string, int, float, or double arrays,
   113 ; we don't have to do anything special. For character arrays,
   114 ; however, we do.
   115 ;
   116   if(return_type.ne."character") then
   117     return_array = new(nrows,return_type)
   118   else
   119 ;
   120 ; We don't know what the biggest character array is at this point, so
   121 ; make it bigger than necessary, and then resize later as necessary.
   122 ;
   123     tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   124 
   125     max_len = 0     ; Use to keep track of max lengths of strings.
   126   end if
   127 
   128   do i = 0,nrows-1
   129 ;
   130 ; Special case of first field in row.
   131 ;
   132     if(ifield.eq.1) then
   133       ibeg = 0
   134       iend = indices(i,ifield-1)-1
   135     else
   136 ;
   137 ; Special case of first field in row.
   138 ;
   139       if(ifield.eq.nf) then
   140         ibeg = indices(i,ifield-2)+1
   141         iend = dimsizes(cstrings(i,:))-1
   142 ;
   143 ; Any field between first and last field.
   144 ;
   145       else
   146         ibeg = indices(i,ifield-2)+1
   147         iend = indices(i,ifield-1)-1
   148       end if  
   149     end if
   150 ;
   151 ; Here's the code that pulls off the correct string, and converts it
   152 ; to float if desired.
   153 ;
   154     if(return_type.eq."integer") then
   155       return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
   156     end if
   157     if(return_type.eq."float") then
   158       return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
   159     end if
   160     if(return_type.eq."double") then
   161       return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
   162     end if
   163     if(return_type.eq."string") then
   164       return_array(i) = chartostring(cstrings(i,ibeg:iend))
   165     end if
   166     if(return_type.eq."character") then
   167       if( (iend-ibeg+1) .gt. max_len) then
   168         max_len = iend-ibeg+1
   169       end if
   170       tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
   171     end if
   172   end do
   173 
   174   if(return_type.eq."character") then
   175     return_array = new((/nrows,max_len/),"character")
   176     return_array = tmp_return_array(:,0:max_len-1)
   177   end if
   178 
   179   return(return_array)
   180 end
   181 
   182 
   183 ;----------------------------------------------------------------------
   184 ; This function reads in string fields only to get the maximum string
   185 ; length.
   186 ;----------------------------------------------------------------------
   187 function get_maxlen(strings,ifield,indices)
   188 local nstring, cstrings, nf, tmp_str
   189 begin
   190   nrows = dimsizes(strings)
   191 ;
   192 ; Handle special case if we only have one string. Make sure it
   193 ; is put into a 2D array.
   194 ;
   195   if(nrows.eq.1) then
   196     cstrings = new((/1,strlen(strings)+1/),character)
   197   end if
   198 
   199   cstrings = stringtochar(strings)
   200   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   201 
   202 ;
   203 ; Error checking. Make sure user has entered a valid field.
   204 ;
   205   if(ifield.le.0.or.ifield.gt.nf) then
   206     print("read_field: fatal: you've selected a field that is")
   207     print("out-of-range of the number of fields that you have (" + nf + ").")
   208     exit
   209   end if
   210 ;
   211 ; We don't know what the biggest character array is at this point, so
   212 ; make it bigger than necessary, and then resize later as necessary.
   213 ;
   214   tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   215 
   216   max_len = 0     ; Use to keep track of max lengths of strings.
   217 
   218   do i = 0,nrows-1
   219 ;
   220 ; Special case of first field in row.
   221 ;
   222     if(ifield.eq.1) then
   223       ibeg = 0
   224       iend = indices(i,ifield-1)-1
   225     else
   226 ;
   227 ; Special case of first field in row.
   228 ;
   229       if(ifield.eq.nf) then
   230         ibeg = indices(i,ifield-2)+1
   231         iend = dimsizes(cstrings(i,:))-1
   232 ;
   233 ; Any field between first and last field.
   234 ;
   235       else
   236         ibeg = indices(i,ifield-2)+1
   237         iend = indices(i,ifield-1)-1
   238       end if  
   239     end if
   240     if( (iend-ibeg+1) .gt. max_len) then
   241       max_len = iend-ibeg+1
   242     end if
   243   end do
   244 
   245   return(max_len)
   246 end
   247 
   248 ;----------------------------------------------------------------------
   249 ; Main code.
   250 ;----------------------------------------------------------------------
   251 begin
   252 
   253 ; Set up defaults here.  We are hard-coding the field types here.
   254 ; You can set up this script to try to determine the field types 
   255 ; automatically, but this is a bit tedious. Maybe later.
   256 
   257   station = "USHa1"
   258   year    = 1991
   259   nfields   = 30                        ; # of fields
   260   delimiter = ","                       ; field delimiter
   261  
   262   filename  = station+year+"_L4_m.txt"  ; ASCII" file to read.
   263   cdf_file  = station+year+"_L4_m.nc"   ; netCDF file to write. 
   264 
   265 ; In this case, fields #2-#2 are integers,
   266 ; and the rest of the fields are floats.
   267 
   268   var_types      = new(nfields,string)
   269   var_strlens    = new(nfields,integer)   ; var to hold strlens, just in case.
   270 
   271   var_types      = "float"       ; Most are floats.
   272   var_types(0:1) = "integer"
   273 
   274   if(isfilepresent(cdf_file))
   275     print("Warning: '" + cdf_file + "' exists. Will remove it.")
   276     system("/bin/rm " + cdf_file)
   277   end if
   278 
   279 ; Read in data as strings. This will create a string array that has the
   280 ; same number of strings as there are rows in the file. We will then need
   281 ; to parse each string later.
   282 
   283   read_data = asciiread(filename,-1,"string")
   284   header    = read_data(0)        ; Header. Use for variable names.
   285   data      = read_data(1:)       ; Get rid of first line which is a header.
   286   nrows     = dimsizes(data)      ; Number of rows.
   287 
   288 ; Read in locations of delimiters in each string row.
   289 
   290   hindices = delim_indices(header,nfields,delimiter)   ; header row
   291   dindices = delim_indices(data,nfields,delimiter)     ; rest of file
   292 
   293   print (hindices)
   294   print (dindices)
   295 
   296 ; Read in the field names which will become variable names on
   297 ; the netCDF file.
   298 
   299   var_names = new(nfields,string)
   300 
   301   do i=0,nfields-1
   302     var_names(i) = read_field(header,i+1,hindices,"string")
   303   end do
   304 
   305 ; Write out this netCDF file efficiently so it will be faster.
   306 ; Try to predefine everything before you write to it.
   307 
   308   f = addfile(cdf_file,"c")
   309   setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
   310 
   311 ; Write global attributes to file. It's okay to do this before 
   312 ; predefining the file's variables. We are still in "define" mode.
   313 
   314   fAtt               = True
   315   fAtt@description   = "Data read in from " + filename + " ASCII file."
   316   fAtt@creation_date = systemfunc ("date")        
   317   fileattdef( f, fAtt )        
   318 
   319 ; Write dimension names to file. If there are no character variables,
   320 ; then there's only one dimension name ("nvalues").
   321 
   322   nyear  = -1
   323   nmonth = 12
   324  
   325   dim_names = (/ "year",  "month" /)
   326   dim_sizes = (/ nyear ,  nmonth  /)
   327   dimUnlim  = (/ True  ,  False   /)
   328 
   329   filedimdef( f, dim_names, dim_sizes, dimUnlim )
   330 
   331 ; Define each variable on the file.
   332 
   333   filevardef( f, "year", "integer", "year" )
   334 
   335 ; Don't deal with variable Month (i=0).
   336 
   337   do i=1,nfields-1
   338      filevardef(f, var_names(i), var_types(i), dim_names)
   339   end do
   340 
   341 ; Loop through each field, read the values for that field, print 
   342 ; information about the variable, and then write it to the netCDF
   343 ; file.
   344 
   345   do i=1,nfields-1
   346     ifield = i+1           ; Fields start at #1, not #0.
   347 
   348 ; Note: you can't write strings to a netCDF file, so these have
   349 ; to be written out as character arrays.
   350 
   351     tmp_data = new((/1,nmonth/),var_types(i))
   352 
   353     out_data = read_field(data,ifield,dindices,var_types(i))
   354 
   355     tmp_data(0,:) = out_data(:)
   356 
   357 ; Print some info about the variable.
   358 
   359     print("")
   360     print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
   361     print("Type is " + var_types(i) + ".")
   362     print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
   363 
   364     if(any(ismissing(tmp_data))) then
   365       print("This variable does contain missing values.")
   366     else
   367       print("This variable doesn't contain missing values.")
   368     end if
   369 
   370     f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
   371 
   372     delete(tmp_data)                   ; Delete for next round.
   373     delete(out_data) 
   374   end do
   375 
   376   f->year = year 
   377 end