ameriflux/03.read_ascii.ncl
author Forrest Hoffman <forrest@climatemodeling.org>
Thu, 26 Mar 2009 14:02:21 -0400
changeset 1 4be95183fbcd
permissions -rw-r--r--
Modifications to scoring and graphics production for the final version of code for the C-LAMP paper in GCB.
     1 ;----------------------------------------------------------------------
     2 ; add Month to output
     3 ; add lat, lon to output
     4 ; add year to output
     5 ;
     6 ; This example reads an ASCII file that is formatted a specific way, and
     7 ; writes out the results to a netCDF file.
     8 ;
     9 ; The first line in the ASCII file must be a header, with each field
    10 ; separated by a single character delimiter (like a ","). The rest of
    11 ; the file must be such that each row contains all fields, each
    12 ; separated by the designated delimiter.
    13 ;
    14 ; The fields can be integer, float, double, character, or string.
    15 ; String fields cannot be written to a netCDF file. They have to
    16 ; be read in as character arrays and written out that way.
    17 ;----------------------------------------------------------------------
    18 
    19 ;----------------------------------------------------------------------
    20 ; This function returns the index locations of the given delimiter
    21 ; in a row or several rows of strings.
    22 ;----------------------------------------------------------------------
    23 function delim_indices(strings,nfields,delimiter)
    24 local cstrings, cdelim
    25 begin
    26   nrows = dimsizes(strings)
    27 ;
    28 ; Handle special case if we only have one string. Make sure it
    29 ; is put into a 2D array.
    30 ;
    31   if(nrows.eq.1) then
    32     cstrings = new((/1,strlen(strings)+1/),character)
    33   end if
    34 
    35   cstrings = stringtochar(strings)        ; Convert to characters.
    36   cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
    37 ;
    38 ; Som error checking here. Make sure delimiter is one character.
    39 ;
    40   nc   = dimsizes(cdelim)
    41   rank = dimsizes(nc)
    42   if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
    43     print("delim_indices: fatal: the delimiter you've selected")
    44     print("must be a single character. Can't continue.")
    45     exit
    46   end if
    47 
    48 ;
    49 ; Create array to hold indices of delimiter locations, and then loop
    50 ; through each row and find all the delimiters. Make sure each row has
    51 ; the correct number of delimiters.
    52 ;
    53   ndelims  = nfields-1
    54   cindices = new((/nrows,ndelims/),integer)
    55   do i = 0, nrows-1
    56     ii = ind(cstrings(i,:).eq.cdelim(0))
    57 ;
    58 ; Make sure there were delimiters on this row. If not, we just quit.
    59 ; This could probably be modified to do this more gracefully.
    60 ;
    61     if(any(ismissing(ii))) then
    62       print("delim_indices: fatal: I didn't find any delimiters")
    63       print("('" + delimiter + "') on row " + i + ". Can't continue.")
    64       exit
    65     end if
    66     if(dimsizes(ii).ne.ndelims) then
    67       print("delim_indices: fatal: I expected to find " + ndelims)
    68       print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
    69       print("Can't continue.")
    70       exit
    71     end if
    72 
    73     cindices(i,:) = ii
    74 
    75     delete(ii)            ; For next time through loop
    76   end do
    77 
    78   return(cindices)
    79 end
    80 
    81 ;----------------------------------------------------------------------
    82 ; This function reads in a particular field from a string array,
    83 ; given the field number to read (fields start at #1 and go to #nfield),
    84 ; and the indices of the delimiters.
    85 ;
    86 ; It returns either an integer, float, double, character, or a string,
    87 ; depending on the input flag "return_type".
    88 ;----------------------------------------------------------------------
    89 function read_field(strings,ifield,indices,return_type)
    90 local nstring, cstrings, nf, tmp_str
    91 begin
    92   nrows = dimsizes(strings)
    93 ;
    94 ; Handle special case if we only have one string. Make sure it
    95 ; is put into a 2D array.
    96 ;
    97   if(nrows.eq.1) then
    98     cstrings = new((/1,strlen(strings)+1/),character)
    99   end if
   100 
   101   cstrings = stringtochar(strings)
   102   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   103 
   104 ;
   105 ; Error checking. Make sure user has entered a valid field.
   106 ;
   107   if(ifield.le.0.or.ifield.gt.nf) then
   108     print("read_field: fatal: you've selected a field that is")
   109     print("out-of-range of the number of fields that you have (" + nf + ").")
   110     exit
   111   end if
   112 
   113 ;
   114 ; Set up array to return. For string, int, float, or double arrays,
   115 ; we don't have to do anything special. For character arrays,
   116 ; however, we do.
   117 ;
   118   if(return_type.ne."character") then
   119     return_array = new(nrows,return_type)
   120   else
   121 ;
   122 ; We don't know what the biggest character array is at this point, so
   123 ; make it bigger than necessary, and then resize later as necessary.
   124 ;
   125     tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   126 
   127     max_len = 0     ; Use to keep track of max lengths of strings.
   128   end if
   129 
   130   do i = 0,nrows-1
   131 ;
   132 ; Special case of first field in row.
   133 ;
   134     if(ifield.eq.1) then
   135       ibeg = 0
   136       iend = indices(i,ifield-1)-1
   137     else
   138 ;
   139 ; Special case of first field in row.
   140 ;
   141       if(ifield.eq.nf) then
   142         ibeg = indices(i,ifield-2)+1
   143         iend = dimsizes(cstrings(i,:))-1
   144 ;
   145 ; Any field between first and last field.
   146 ;
   147       else
   148         ibeg = indices(i,ifield-2)+1
   149         iend = indices(i,ifield-1)-1
   150       end if  
   151     end if
   152 ;
   153 ; Here's the code that pulls off the correct string, and converts it
   154 ; to float if desired.
   155 ;
   156     if(return_type.eq."integer") then
   157       return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
   158     end if
   159     if(return_type.eq."float") then
   160       return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
   161     end if
   162     if(return_type.eq."double") then
   163       return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
   164     end if
   165     if(return_type.eq."string") then
   166       return_array(i) = chartostring(cstrings(i,ibeg:iend))
   167     end if
   168     if(return_type.eq."character") then
   169       if( (iend-ibeg+1) .gt. max_len) then
   170         max_len = iend-ibeg+1
   171       end if
   172       tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
   173     end if
   174   end do
   175 
   176   if(return_type.eq."character") then
   177     return_array = new((/nrows,max_len/),"character")
   178     return_array = tmp_return_array(:,0:max_len-1)
   179   end if
   180 
   181   return(return_array)
   182 end
   183 
   184 
   185 ;----------------------------------------------------------------------
   186 ; This function reads in string fields only to get the maximum string
   187 ; length.
   188 ;----------------------------------------------------------------------
   189 function get_maxlen(strings,ifield,indices)
   190 local nstring, cstrings, nf, tmp_str
   191 begin
   192   nrows = dimsizes(strings)
   193 ;
   194 ; Handle special case if we only have one string. Make sure it
   195 ; is put into a 2D array.
   196 ;
   197   if(nrows.eq.1) then
   198     cstrings = new((/1,strlen(strings)+1/),character)
   199   end if
   200 
   201   cstrings = stringtochar(strings)
   202   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   203 
   204 ;
   205 ; Error checking. Make sure user has entered a valid field.
   206 ;
   207   if(ifield.le.0.or.ifield.gt.nf) then
   208     print("read_field: fatal: you've selected a field that is")
   209     print("out-of-range of the number of fields that you have (" + nf + ").")
   210     exit
   211   end if
   212 ;
   213 ; We don't know what the biggest character array is at this point, so
   214 ; make it bigger than necessary, and then resize later as necessary.
   215 ;
   216   tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   217 
   218   max_len = 0     ; Use to keep track of max lengths of strings.
   219 
   220   do i = 0,nrows-1
   221 ;
   222 ; Special case of first field in row.
   223 ;
   224     if(ifield.eq.1) then
   225       ibeg = 0
   226       iend = indices(i,ifield-1)-1
   227     else
   228 ;
   229 ; Special case of first field in row.
   230 ;
   231       if(ifield.eq.nf) then
   232         ibeg = indices(i,ifield-2)+1
   233         iend = dimsizes(cstrings(i,:))-1
   234 ;
   235 ; Any field between first and last field.
   236 ;
   237       else
   238         ibeg = indices(i,ifield-2)+1
   239         iend = indices(i,ifield-1)-1
   240       end if  
   241     end if
   242     if( (iend-ibeg+1) .gt. max_len) then
   243       max_len = iend-ibeg+1
   244     end if
   245   end do
   246 
   247   return(max_len)
   248 end
   249 
   250 ;----------------------------------------------------------------------
   251 ; Main code.
   252 ;----------------------------------------------------------------------
   253 begin
   254 
   255 ; Set up defaults here.  We are hard-coding the field types here.
   256 ; You can set up this script to try to determine the field types 
   257 ; automatically, but this is a bit tedious. Maybe later.
   258 
   259   station = "USHa1"
   260   lat     = 42.5378
   261   lon     = -72.1715 + 360.
   262   year    = 1991
   263   nfields   = 30                        ; # of fields
   264   delimiter = ","                       ; field delimiter
   265  
   266   filename  = station+year+"_L4_m.txt"  ; ASCII" file to read.
   267   cdf_file  = station+year+"_L4_m.nc"   ; netCDF file to write. 
   268 
   269 ; In this case, fields #2-#2 are integers,
   270 ; and the rest of the fields are floats.
   271 
   272   var_types      = new(nfields,string)
   273   var_strlens    = new(nfields,integer)   ; var to hold strlens, just in case.
   274 
   275   var_types      = "float"       ; Most are floats.
   276   var_types(0:1) = "integer"
   277 
   278   if(isfilepresent(cdf_file))
   279     print("Warning: '" + cdf_file + "' exists. Will remove it.")
   280     system("/bin/rm " + cdf_file)
   281   end if
   282 
   283 ; Read in data as strings. This will create a string array that has the
   284 ; same number of strings as there are rows in the file. We will then need
   285 ; to parse each string later.
   286 
   287   read_data = asciiread(filename,-1,"string")
   288   header    = read_data(0)        ; Header. Use for variable names.
   289   data      = read_data(1:)       ; Get rid of first line which is a header.
   290   nrows     = dimsizes(data)      ; Number of rows.
   291 
   292 ; Read in locations of delimiters in each string row.
   293 
   294   hindices = delim_indices(header,nfields,delimiter)   ; header row
   295   dindices = delim_indices(data,nfields,delimiter)     ; rest of file
   296 
   297 ; print (hindices)
   298 ; print (dindices)
   299 
   300 ; Read in the field names which will become variable names on
   301 ; the netCDF file.
   302 
   303   var_names = new(nfields,string)
   304 
   305   do i=0,nfields-1
   306     var_names(i) = read_field(header,i+1,hindices,"string")
   307   end do
   308 
   309 ; Write out this netCDF file efficiently so it will be faster.
   310 ; Try to predefine everything before you write to it.
   311 
   312   f = addfile(cdf_file,"c")
   313   setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
   314 
   315 ; Write global attributes to file. It's okay to do this before 
   316 ; predefining the file's variables. We are still in "define" mode.
   317 
   318   fAtt               = True
   319   fAtt@description   = "Data read in from " + filename + " ASCII file."
   320   fAtt@creation_date = systemfunc ("date")        
   321   fileattdef( f, fAtt )        
   322 
   323 ; Write dimension names to file. If there are no character variables,
   324 ; then there's only one dimension name ("nvalues").
   325 
   326   nyear  = -1
   327   nmonth = 12
   328  
   329   dim_names = (/ "year",  "month" /)
   330   dim_sizes = (/ nyear ,  nmonth  /)
   331   dimUnlim  = (/ True  ,  False   /)
   332 
   333   filedimdef( f, dim_names, dim_sizes, dimUnlim )
   334   filedimdef( f, "lat", 1, False )
   335   filedimdef( f, "lon", 1, False )
   336 
   337 ; Define each variable on the file.
   338 
   339   filevardef( f, "year", "integer", "year" )
   340   filevardef( f, "lat" , "float"  , "lat" )
   341   filevardef( f, "lon" , "float"  , "lon" )
   342 
   343 ; Don't deal with variable Month (i=0).
   344 
   345   do i=0,nfields-1
   346      filevardef(f, var_names(i), var_types(i), dim_names)
   347   end do
   348 
   349 ; Loop through each field, read the values for that field, print 
   350 ; information about the variable, and then write it to the netCDF
   351 ; file.
   352 
   353   do i=0,nfields-1
   354     ifield = i+1           ; Fields start at #1, not #0.
   355 
   356 ; Note: you can't write strings to a netCDF file, so these have
   357 ; to be written out as character arrays.
   358 
   359     tmp_data = new((/1,nmonth/),var_types(i))
   360 
   361     tmp_data(0,:) = read_field(data,ifield,dindices,var_types(i))
   362 
   363 ;   tmp_data(0,:) = out_data(:)
   364 
   365 ; Print some info about the variable.
   366 
   367     print("")
   368     print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
   369     print("Type is " + var_types(i) + ".")
   370     print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
   371 
   372     if(any(ismissing(tmp_data))) then
   373       print("This variable does contain missing values.")
   374     else
   375       print("This variable doesn't contain missing values.")
   376     end if
   377 
   378     f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
   379 
   380     delete(tmp_data)                   ; Delete for next round.
   381 ;   delete(out_data) 
   382   end do
   383 
   384   f->year = year
   385   f->lat  = lat
   386   f->lon  = lon 
   387 end