ameriflux/04.read_ascii.ncl
author Forrest Hoffman <forrest@climatemodeling.org>
Mon, 26 Jan 2009 22:08:20 -0500
changeset 0 0c6405ab2ff4
permissions -rw-r--r--
Initial commit of C-LAMP Diagnostics from Jeff Lee
     1 ;----------------------------------------------------------------------
     2 ; change Month to yyyymm
     3 ; add Month to output
     4 ; add lat, lon to output
     5 ; add year to output
     6 ;
     7 ; This example reads an ASCII file that is formatted a specific way, and
     8 ; writes out the results to a netCDF file.
     9 ;
    10 ; The first line in the ASCII file must be a header, with each field
    11 ; separated by a single character delimiter (like a ","). The rest of
    12 ; the file must be such that each row contains all fields, each
    13 ; separated by the designated delimiter.
    14 ;
    15 ; The fields can be integer, float, double, character, or string.
    16 ; String fields cannot be written to a netCDF file. They have to
    17 ; be read in as character arrays and written out that way.
    18 ;----------------------------------------------------------------------
    19 
    20 ;----------------------------------------------------------------------
    21 ; This function returns the index locations of the given delimiter
    22 ; in a row or several rows of strings.
    23 ;----------------------------------------------------------------------
    24 function delim_indices(strings,nfields,delimiter)
    25 local cstrings, cdelim
    26 begin
    27   nrows = dimsizes(strings)
    28 ;
    29 ; Handle special case if we only have one string. Make sure it
    30 ; is put into a 2D array.
    31 ;
    32   if(nrows.eq.1) then
    33     cstrings = new((/1,strlen(strings)+1/),character)
    34   end if
    35 
    36   cstrings = stringtochar(strings)        ; Convert to characters.
    37   cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
    38 ;
    39 ; Som error checking here. Make sure delimiter is one character.
    40 ;
    41   nc   = dimsizes(cdelim)
    42   rank = dimsizes(nc)
    43   if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
    44     print("delim_indices: fatal: the delimiter you've selected")
    45     print("must be a single character. Can't continue.")
    46     exit
    47   end if
    48 
    49 ;
    50 ; Create array to hold indices of delimiter locations, and then loop
    51 ; through each row and find all the delimiters. Make sure each row has
    52 ; the correct number of delimiters.
    53 ;
    54   ndelims  = nfields-1
    55   cindices = new((/nrows,ndelims/),integer)
    56   do i = 0, nrows-1
    57     ii = ind(cstrings(i,:).eq.cdelim(0))
    58 ;
    59 ; Make sure there were delimiters on this row. If not, we just quit.
    60 ; This could probably be modified to do this more gracefully.
    61 ;
    62     if(any(ismissing(ii))) then
    63       print("delim_indices: fatal: I didn't find any delimiters")
    64       print("('" + delimiter + "') on row " + i + ". Can't continue.")
    65       exit
    66     end if
    67     if(dimsizes(ii).ne.ndelims) then
    68       print("delim_indices: fatal: I expected to find " + ndelims)
    69       print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
    70       print("Can't continue.")
    71       exit
    72     end if
    73 
    74     cindices(i,:) = ii
    75 
    76     delete(ii)            ; For next time through loop
    77   end do
    78 
    79   return(cindices)
    80 end
    81 
    82 ;----------------------------------------------------------------------
    83 ; This function reads in a particular field from a string array,
    84 ; given the field number to read (fields start at #1 and go to #nfield),
    85 ; and the indices of the delimiters.
    86 ;
    87 ; It returns either an integer, float, double, character, or a string,
    88 ; depending on the input flag "return_type".
    89 ;----------------------------------------------------------------------
    90 function read_field(strings,ifield,indices,return_type)
    91 local nstring, cstrings, nf, tmp_str
    92 begin
    93   nrows = dimsizes(strings)
    94 ;
    95 ; Handle special case if we only have one string. Make sure it
    96 ; is put into a 2D array.
    97 ;
    98   if(nrows.eq.1) then
    99     cstrings = new((/1,strlen(strings)+1/),character)
   100   end if
   101 
   102   cstrings = stringtochar(strings)
   103   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   104 
   105 ;
   106 ; Error checking. Make sure user has entered a valid field.
   107 ;
   108   if(ifield.le.0.or.ifield.gt.nf) then
   109     print("read_field: fatal: you've selected a field that is")
   110     print("out-of-range of the number of fields that you have (" + nf + ").")
   111     exit
   112   end if
   113 
   114 ;
   115 ; Set up array to return. For string, int, float, or double arrays,
   116 ; we don't have to do anything special. For character arrays,
   117 ; however, we do.
   118 ;
   119   if(return_type.ne."character") then
   120     return_array = new(nrows,return_type)
   121   else
   122 ;
   123 ; We don't know what the biggest character array is at this point, so
   124 ; make it bigger than necessary, and then resize later as necessary.
   125 ;
   126     tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   127 
   128     max_len = 0     ; Use to keep track of max lengths of strings.
   129   end if
   130 
   131   do i = 0,nrows-1
   132 ;
   133 ; Special case of first field in row.
   134 ;
   135     if(ifield.eq.1) then
   136       ibeg = 0
   137       iend = indices(i,ifield-1)-1
   138     else
   139 ;
   140 ; Special case of first field in row.
   141 ;
   142       if(ifield.eq.nf) then
   143         ibeg = indices(i,ifield-2)+1
   144         iend = dimsizes(cstrings(i,:))-1
   145 ;
   146 ; Any field between first and last field.
   147 ;
   148       else
   149         ibeg = indices(i,ifield-2)+1
   150         iend = indices(i,ifield-1)-1
   151       end if  
   152     end if
   153 ;
   154 ; Here's the code that pulls off the correct string, and converts it
   155 ; to float if desired.
   156 ;
   157     if(return_type.eq."integer") then
   158       return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
   159     end if
   160     if(return_type.eq."float") then
   161       return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
   162     end if
   163     if(return_type.eq."double") then
   164       return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
   165     end if
   166     if(return_type.eq."string") then
   167       return_array(i) = chartostring(cstrings(i,ibeg:iend))
   168     end if
   169     if(return_type.eq."character") then
   170       if( (iend-ibeg+1) .gt. max_len) then
   171         max_len = iend-ibeg+1
   172       end if
   173       tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
   174     end if
   175   end do
   176 
   177   if(return_type.eq."character") then
   178     return_array = new((/nrows,max_len/),"character")
   179     return_array = tmp_return_array(:,0:max_len-1)
   180   end if
   181 
   182   return(return_array)
   183 end
   184 
   185 
   186 ;----------------------------------------------------------------------
   187 ; This function reads in string fields only to get the maximum string
   188 ; length.
   189 ;----------------------------------------------------------------------
   190 function get_maxlen(strings,ifield,indices)
   191 local nstring, cstrings, nf, tmp_str
   192 begin
   193   nrows = dimsizes(strings)
   194 ;
   195 ; Handle special case if we only have one string. Make sure it
   196 ; is put into a 2D array.
   197 ;
   198   if(nrows.eq.1) then
   199     cstrings = new((/1,strlen(strings)+1/),character)
   200   end if
   201 
   202   cstrings = stringtochar(strings)
   203   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   204 
   205 ;
   206 ; Error checking. Make sure user has entered a valid field.
   207 ;
   208   if(ifield.le.0.or.ifield.gt.nf) then
   209     print("read_field: fatal: you've selected a field that is")
   210     print("out-of-range of the number of fields that you have (" + nf + ").")
   211     exit
   212   end if
   213 ;
   214 ; We don't know what the biggest character array is at this point, so
   215 ; make it bigger than necessary, and then resize later as necessary.
   216 ;
   217   tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   218 
   219   max_len = 0     ; Use to keep track of max lengths of strings.
   220 
   221   do i = 0,nrows-1
   222 ;
   223 ; Special case of first field in row.
   224 ;
   225     if(ifield.eq.1) then
   226       ibeg = 0
   227       iend = indices(i,ifield-1)-1
   228     else
   229 ;
   230 ; Special case of first field in row.
   231 ;
   232       if(ifield.eq.nf) then
   233         ibeg = indices(i,ifield-2)+1
   234         iend = dimsizes(cstrings(i,:))-1
   235 ;
   236 ; Any field between first and last field.
   237 ;
   238       else
   239         ibeg = indices(i,ifield-2)+1
   240         iend = indices(i,ifield-1)-1
   241       end if  
   242     end if
   243     if( (iend-ibeg+1) .gt. max_len) then
   244       max_len = iend-ibeg+1
   245     end if
   246   end do
   247 
   248   return(max_len)
   249 end
   250 
   251 ;----------------------------------------------------------------------
   252 ; Main code.
   253 ;----------------------------------------------------------------------
   254 begin
   255 
   256 ;###############################################################
   257 ; Set up defaults.  We are hard-coding here..
   258 
   259   year    = 1991
   260   station = "USHa1"
   261   lat     = 42.5378
   262   lon     = -72.1715 + 360.
   263 
   264   nfields   = 30                        ; # of fields
   265   delimiter = ","                       ; field delimiter
   266  
   267   filename  = station+year+"_L4_m.txt"  ; ASCII" file to read.
   268   cdf_file  = station+year+"_L4_m.nc"   ; netCDF file to write. 
   269 
   270   if(isfilepresent(cdf_file))
   271     print("Warning: '" + cdf_file + "' exists. Will remove it.")
   272     system("/bin/rm " + cdf_file)
   273   end if
   274 
   275 ; In this case, fields #1-#2 are integers,
   276 ; and the rest of the fields are floats.
   277 
   278   var_types      = new(nfields,string)
   279   var_types      = "float"       ; Most are floats.
   280   var_types(0:1) = "integer"
   281 
   282 ;#####################################################################
   283 
   284 ; Read in data as strings. This will create a string array that has the
   285 ; same number of strings as there are rows in the file. We will then need
   286 ; to parse each string later.
   287 
   288   read_data = asciiread(filename,-1,"string")
   289 
   290   header    = read_data(0)        ; Header. Use for variable names.
   291   data      = read_data(1:)       ; Get rid of first line which is a header.
   292   nmonth    = dimsizes(data)      ; Number of rows == number of month.
   293 
   294 ; Read in locations of delimiters in each string row.
   295 
   296   hindices = delim_indices(header,nfields,delimiter)   ; header row
   297   dindices = delim_indices(data,nfields,delimiter)     ; rest of file
   298 
   299 ; Read in the field names which will become variable names on
   300 ; the netCDF file.
   301 
   302   var_names = new(nfields,string)
   303 
   304   do i=0,nfields-1
   305     var_names(i) = read_field(header,i+1,hindices,"string")
   306   end do
   307 
   308 ;-------------------------------------------------------------------
   309 ; Write out this netCDF file efficiently so it will be faster.
   310 ; Try to predefine everything before you write to it.
   311 
   312   f = addfile(cdf_file,"c")
   313   setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
   314 
   315 ; Write global attributes to file. It's okay to do this before 
   316 ; predefining the file's variables. We are still in "define" mode.
   317 
   318   fAtt               = True
   319   fAtt@description   = "Data read in from " + filename + " ASCII file."
   320   fAtt@creation_date = systemfunc ("date")        
   321   fileattdef( f, fAtt )        
   322 
   323 ; Write dimension names to file.
   324  
   325   dim_names = (/ "year",  "month" /)
   326   dim_sizes = (/ -1    ,  nmonth  /)
   327   dimUnlim  = (/ True  ,  False   /)
   328   filedimdef( f, dim_names, dim_sizes, dimUnlim )
   329 
   330   filedimdef( f, "lat", 1, False )
   331   filedimdef( f, "lon", 1, False )
   332 
   333 ; Define each variable on the file.
   334 
   335   filevardef( f, "year", "integer", "year" )
   336   filevardef( f, "lat" , "float"  , "lat" )
   337   filevardef( f, "lon" , "float"  , "lon" )
   338 
   339   do i=0,nfields-1
   340      filevardef(f, var_names(i), var_types(i), dim_names)
   341   end do
   342 ;-----------------------------------------------------------------
   343 
   344 ; Loop through each field, read the values for that field, print 
   345 ; information about the variable, and then write it to the netCDF file.
   346 
   347   do i=0,nfields-1
   348     ifield = i+1                         ; Fields start at #1, not #0.
   349 
   350     tmp_data = new((/1,nmonth/),var_types(i))
   351 
   352     if (i.le.1) then
   353        tmp_data@_FillValue = -999
   354     else
   355        tmp_data@_FillValue = -9999.00
   356     end if     
   357 
   358     tmp_data(0,:) = read_field(data,ifield,dindices,var_types(i))
   359 
   360 ;   change Month to yyyymm
   361 
   362     if (i.eq.0) then
   363        tmp_data(0,:) = tmp_data(0,:) + year*100
   364     end if 
   365 
   366 ; Print some info about the variable.
   367 
   368 ;   print("")
   369 ;   print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
   370 ;   print("Type is " + var_types(i) + ".")
   371 ;   print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
   372 
   373 ;   if(any(ismissing(tmp_data))) then
   374 ;     print("This variable does contain missing values.")
   375 ;   else
   376 ;     print("This variable doesn't contain missing values.")
   377 ;   end if
   378 
   379 ; write variable to file
   380 
   381     f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
   382 
   383     delete(tmp_data)                   ; Delete for next round.
   384   end do
   385 
   386 ; write variable to file
   387 
   388   f->year = year
   389   f->lat  = lat
   390   f->lon  = lon 
   391 end