ameriflux/06.read_ascii.ncl
author Forrest Hoffman <forrest@climatemodeling.org>
Thu, 26 Mar 2009 14:02:21 -0400
changeset 1 4be95183fbcd
permissions -rw-r--r--
Modifications to scoring and graphics production for the final version of code for the C-LAMP paper in GCB.
     1 ;----------------------------------------------------------------------
     2 ; set _FillValue
     3 ; change Month to yyyymm
     4 ; add Month to output
     5 ; add lat, lon to output
     6 ; add year to output
     7 ;
     8 ; This example reads an ASCII file that is formatted a specific way, and
     9 ; writes out the results to a netCDF file.
    10 ;
    11 ; The first line in the ASCII file must be a header, with each field
    12 ; separated by a single character delimiter (like a ","). The rest of
    13 ; the file must be such that each row contains all fields, each
    14 ; separated by the designated delimiter.
    15 ;
    16 ; The fields can be integer, float, double, character, or string.
    17 ; String fields cannot be written to a netCDF file. They have to
    18 ; be read in as character arrays and written out that way.
    19 ;----------------------------------------------------------------------
    20 
    21 ;----------------------------------------------------------------------
    22 ; This function returns the index locations of the given delimiter
    23 ; in a row or several rows of strings.
    24 ;----------------------------------------------------------------------
    25 function delim_indices(strings,nfields,delimiter)
    26 local cstrings, cdelim
    27 begin
    28   nrows = dimsizes(strings)
    29 ;
    30 ; Handle special case if we only have one string. Make sure it
    31 ; is put into a 2D array.
    32 ;
    33   if(nrows.eq.1) then
    34     cstrings = new((/1,strlen(strings)+1/),character)
    35   end if
    36 
    37   cstrings = stringtochar(strings)        ; Convert to characters.
    38   cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
    39 ;
    40 ; Som error checking here. Make sure delimiter is one character.
    41 ;
    42   nc   = dimsizes(cdelim)
    43   rank = dimsizes(nc)
    44   if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
    45     print("delim_indices: fatal: the delimiter you've selected")
    46     print("must be a single character. Can't continue.")
    47     exit
    48   end if
    49 
    50 ;
    51 ; Create array to hold indices of delimiter locations, and then loop
    52 ; through each row and find all the delimiters. Make sure each row has
    53 ; the correct number of delimiters.
    54 ;
    55   ndelims  = nfields-1
    56   cindices = new((/nrows,ndelims/),integer)
    57   do i = 0, nrows-1
    58     ii = ind(cstrings(i,:).eq.cdelim(0))
    59 ;
    60 ; Make sure there were delimiters on this row. If not, we just quit.
    61 ; This could probably be modified to do this more gracefully.
    62 ;
    63     if(any(ismissing(ii))) then
    64       print("delim_indices: fatal: I didn't find any delimiters")
    65       print("('" + delimiter + "') on row " + i + ". Can't continue.")
    66       exit
    67     end if
    68     if(dimsizes(ii).ne.ndelims) then
    69       print("delim_indices: fatal: I expected to find " + ndelims)
    70       print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
    71       print("Can't continue.")
    72       exit
    73     end if
    74 
    75     cindices(i,:) = ii
    76 
    77     delete(ii)            ; For next time through loop
    78   end do
    79 
    80   return(cindices)
    81 end
    82 
    83 ;----------------------------------------------------------------------
    84 ; This function reads in a particular field from a string array,
    85 ; given the field number to read (fields start at #1 and go to #nfield),
    86 ; and the indices of the delimiters.
    87 ;
    88 ; It returns either an integer, float, double, character, or a string,
    89 ; depending on the input flag "return_type".
    90 ;----------------------------------------------------------------------
    91 function read_field(strings,ifield,indices,return_type)
    92 local nstring, cstrings, nf, tmp_str
    93 begin
    94   nrows = dimsizes(strings)
    95 ;
    96 ; Handle special case if we only have one string. Make sure it
    97 ; is put into a 2D array.
    98 ;
    99   if(nrows.eq.1) then
   100     cstrings = new((/1,strlen(strings)+1/),character)
   101   end if
   102 
   103   cstrings = stringtochar(strings)
   104   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   105 
   106 ;
   107 ; Error checking. Make sure user has entered a valid field.
   108 ;
   109   if(ifield.le.0.or.ifield.gt.nf) then
   110     print("read_field: fatal: you've selected a field that is")
   111     print("out-of-range of the number of fields that you have (" + nf + ").")
   112     exit
   113   end if
   114 
   115 ;
   116 ; Set up array to return. For string, int, float, or double arrays,
   117 ; we don't have to do anything special. For character arrays,
   118 ; however, we do.
   119 ;
   120   if(return_type.ne."character") then
   121     return_array = new(nrows,return_type)
   122   else
   123 ;
   124 ; We don't know what the biggest character array is at this point, so
   125 ; make it bigger than necessary, and then resize later as necessary.
   126 ;
   127     tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   128 
   129     max_len = 0     ; Use to keep track of max lengths of strings.
   130   end if
   131 
   132   do i = 0,nrows-1
   133 ;
   134 ; Special case of first field in row.
   135 ;
   136     if(ifield.eq.1) then
   137       ibeg = 0
   138       iend = indices(i,ifield-1)-1
   139     else
   140 ;
   141 ; Special case of first field in row.
   142 ;
   143       if(ifield.eq.nf) then
   144         ibeg = indices(i,ifield-2)+1
   145         iend = dimsizes(cstrings(i,:))-1
   146 ;
   147 ; Any field between first and last field.
   148 ;
   149       else
   150         ibeg = indices(i,ifield-2)+1
   151         iend = indices(i,ifield-1)-1
   152       end if  
   153     end if
   154 ;
   155 ; Here's the code that pulls off the correct string, and converts it
   156 ; to float if desired.
   157 ;
   158     if(return_type.eq."integer") then
   159       return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
   160     end if
   161     if(return_type.eq."float") then
   162       return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
   163     end if
   164     if(return_type.eq."double") then
   165       return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
   166     end if
   167     if(return_type.eq."string") then
   168       return_array(i) = chartostring(cstrings(i,ibeg:iend))
   169     end if
   170     if(return_type.eq."character") then
   171       if( (iend-ibeg+1) .gt. max_len) then
   172         max_len = iend-ibeg+1
   173       end if
   174       tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
   175     end if
   176   end do
   177 
   178   if(return_type.eq."character") then
   179     return_array = new((/nrows,max_len/),"character")
   180     return_array = tmp_return_array(:,0:max_len-1)
   181   end if
   182 
   183   return(return_array)
   184 end
   185 
   186 
   187 ;----------------------------------------------------------------------
   188 ; This function reads in string fields only to get the maximum string
   189 ; length.
   190 ;----------------------------------------------------------------------
   191 function get_maxlen(strings,ifield,indices)
   192 local nstring, cstrings, nf, tmp_str
   193 begin
   194   nrows = dimsizes(strings)
   195 ;
   196 ; Handle special case if we only have one string. Make sure it
   197 ; is put into a 2D array.
   198 ;
   199   if(nrows.eq.1) then
   200     cstrings = new((/1,strlen(strings)+1/),character)
   201   end if
   202 
   203   cstrings = stringtochar(strings)
   204   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   205 
   206 ;
   207 ; Error checking. Make sure user has entered a valid field.
   208 ;
   209   if(ifield.le.0.or.ifield.gt.nf) then
   210     print("read_field: fatal: you've selected a field that is")
   211     print("out-of-range of the number of fields that you have (" + nf + ").")
   212     exit
   213   end if
   214 ;
   215 ; We don't know what the biggest character array is at this point, so
   216 ; make it bigger than necessary, and then resize later as necessary.
   217 ;
   218   tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   219 
   220   max_len = 0     ; Use to keep track of max lengths of strings.
   221 
   222   do i = 0,nrows-1
   223 ;
   224 ; Special case of first field in row.
   225 ;
   226     if(ifield.eq.1) then
   227       ibeg = 0
   228       iend = indices(i,ifield-1)-1
   229     else
   230 ;
   231 ; Special case of first field in row.
   232 ;
   233       if(ifield.eq.nf) then
   234         ibeg = indices(i,ifield-2)+1
   235         iend = dimsizes(cstrings(i,:))-1
   236 ;
   237 ; Any field between first and last field.
   238 ;
   239       else
   240         ibeg = indices(i,ifield-2)+1
   241         iend = indices(i,ifield-1)-1
   242       end if  
   243     end if
   244     if( (iend-ibeg+1) .gt. max_len) then
   245       max_len = iend-ibeg+1
   246     end if
   247   end do
   248 
   249   return(max_len)
   250 end
   251 
   252 ;----------------------------------------------------------------------
   253 ; Main code.
   254 ;----------------------------------------------------------------------
   255 begin
   256 
   257 ;###############################################################
   258 ; Set up defaults.  We are hard-coding here..
   259 
   260   year    = 2005
   261   lat     = 35.9735823
   262   lon     = -79.1004304 + 360.
   263 
   264   nfields   = 30                        ; # of fields
   265   delimiter = ","                       ; field delimiter
   266  
   267   filename  = year+".txt"       ; ASCII" file to read.
   268   cdf_file  = year+"_L4_m.nc"   ; netCDF file to write. 
   269 
   270   if(isfilepresent(cdf_file))
   271     print("Warning: '" + cdf_file + "' exists. Will remove it.")
   272     system("/bin/rm " + cdf_file)
   273   end if
   274 
   275 ; In this case, fields #1-#2 are integers,
   276 ; and the rest of the fields are floats.
   277 
   278   var_types      = new(nfields,string)
   279   var_types      = "float"       ; Most are floats.
   280   var_types(0:1) = "integer"
   281 
   282 ;#####################################################################
   283 
   284 ; Read in data as strings. This will create a string array that has the
   285 ; same number of strings as there are rows in the file. We will then need
   286 ; to parse each string later.
   287 
   288   read_data = asciiread(filename,-1,"string")
   289 
   290   header    = read_data(0)        ; Header. Use for variable names.
   291   data      = read_data(1:)       ; Get rid of first line which is a header.
   292   nmonth    = dimsizes(data)      ; Number of rows == number of month.
   293 
   294 ; Read in locations of delimiters in each string row.
   295 
   296   hindices = delim_indices(header,nfields,delimiter)   ; header row
   297   dindices = delim_indices(data,nfields,delimiter)     ; rest of file
   298 
   299 ; Read in the field names which will become variable names on
   300 ; the netCDF file.
   301 
   302   var_names = new(nfields,string)
   303 
   304   do i=0,nfields-1
   305     var_names(i) = read_field(header,i+1,hindices,"string")
   306   end do
   307 
   308 ;-------------------------------------------------------------------
   309 ; Write out this netCDF file efficiently so it will be faster.
   310 ; Try to predefine everything before you write to it.
   311 
   312   f = addfile(cdf_file,"c")
   313   setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
   314 
   315 ; Write global attributes to file. It's okay to do this before 
   316 ; predefining the file's variables. We are still in "define" mode.
   317 
   318   fAtt               = True
   319   fAtt@description   = "Data read in from " + filename + " ASCII file."
   320   fAtt@creation_date = systemfunc ("date")        
   321   fileattdef( f, fAtt )        
   322 
   323 ; Write dimension names to file.
   324  
   325   dim_names = (/ "year",  "month" /)
   326   dim_sizes = (/ -1    ,  nmonth  /)
   327   dimUnlim  = (/ True  ,  False   /)
   328   filedimdef( f, dim_names, dim_sizes, dimUnlim )
   329 
   330   filedimdef( f, "lat", 1, False )
   331   filedimdef( f, "lon", 1, False )
   332 
   333 ; Define each variable on the file.
   334 
   335   filevardef( f, "year", "integer", "year" )
   336   filevardef( f, "lat" , "float"  , "lat" )
   337   filevardef( f, "lon" , "float"  , "lon" )
   338 
   339   do i=0,nfields-1
   340 
   341 ;    define variable
   342 
   343      filevardef(f, var_names(i), var_types(i), dim_names)
   344 
   345 ;    define variable attributes
   346 
   347      if (i.le.1) then
   348         varAtt = 0
   349 ;       varAtt@_FillValue = -999
   350      else
   351         varAtt = 0.
   352 ;       varAtt@_FillValue = 1.e36
   353      end if
   354 
   355      varAtt@long_name  = var_names(i)
   356 
   357      filevarattdef( f, var_names(i) , varAtt )
   358 
   359      delete (varAtt)
   360   end do
   361 ;-----------------------------------------------------------------
   362 
   363 ; Loop through each field, read the values for that field, print 
   364 ; information about the variable, and then write it to the netCDF file.
   365 
   366   do i=0,nfields-1
   367     ifield = i+1                         ; Fields start at #1, not #0.
   368 
   369     tmp_data = new((/1,nmonth/),var_types(i))
   370 
   371     if (i.le.1) then
   372        tmp_data@_FillValue = -999
   373     else
   374        tmp_data@_FillValue = 1.e36
   375     end if     
   376 
   377     tmp_data(0,:) = read_field(data,ifield,dindices,var_types(i))
   378   
   379     tmp_data = where(tmp_data .le. -9000.,tmp_data@_FillValue,tmp_data)
   380 
   381 ;   change Month to yyyymm
   382 
   383     if (i.eq.0) then
   384        tmp_data(0,:) = tmp_data(0,:) + year*100
   385     end if 
   386 
   387 ; Print some info about the variable.
   388 
   389 ;   print("")
   390 ;   print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
   391 ;   print("Type is " + var_types(i) + ".")
   392 ;   print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
   393 
   394 ;   if(any(ismissing(tmp_data))) then
   395 ;     print("This variable does contain missing values.")
   396 ;   else
   397 ;     print("This variable doesn't contain missing values.")
   398 ;   end if
   399 
   400 ; write variable to file
   401 
   402     f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
   403 
   404     delete(tmp_data)                   ; Delete for next round.
   405   end do
   406 
   407 ; write variable to file
   408 
   409   f->year = year
   410   f->lat  = lat
   411   f->lon  = lon 
   412 end