ameriflux/05.read_ascii.ncl
changeset 1 4be95183fbcd
equal deleted inserted replaced
-1:000000000000 0:ee15840156f5
       
     1 ;----------------------------------------------------------------------
       
     2 ; set _FillValue
       
     3 ; change Month to yyyymm
       
     4 ; add Month to output
       
     5 ; add lat, lon to output
       
     6 ; add year to output
       
     7 ;
       
     8 ; This example reads an ASCII file that is formatted a specific way, and
       
     9 ; writes out the results to a netCDF file.
       
    10 ;
       
    11 ; The first line in the ASCII file must be a header, with each field
       
    12 ; separated by a single character delimiter (like a ","). The rest of
       
    13 ; the file must be such that each row contains all fields, each
       
    14 ; separated by the designated delimiter.
       
    15 ;
       
    16 ; The fields can be integer, float, double, character, or string.
       
    17 ; String fields cannot be written to a netCDF file. They have to
       
    18 ; be read in as character arrays and written out that way.
       
    19 ;----------------------------------------------------------------------
       
    20 
       
    21 ;----------------------------------------------------------------------
       
    22 ; This function returns the index locations of the given delimiter
       
    23 ; in a row or several rows of strings.
       
    24 ;----------------------------------------------------------------------
       
    25 function delim_indices(strings,nfields,delimiter)
       
    26 local cstrings, cdelim
       
    27 begin
       
    28   nrows = dimsizes(strings)
       
    29 ;
       
    30 ; Handle special case if we only have one string. Make sure it
       
    31 ; is put into a 2D array.
       
    32 ;
       
    33   if(nrows.eq.1) then
       
    34     cstrings = new((/1,strlen(strings)+1/),character)
       
    35   end if
       
    36 
       
    37   cstrings = stringtochar(strings)        ; Convert to characters.
       
    38   cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
       
    39 ;
       
    40 ; Som error checking here. Make sure delimiter is one character.
       
    41 ;
       
    42   nc   = dimsizes(cdelim)
       
    43   rank = dimsizes(nc)
       
    44   if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
       
    45     print("delim_indices: fatal: the delimiter you've selected")
       
    46     print("must be a single character. Can't continue.")
       
    47     exit
       
    48   end if
       
    49 
       
    50 ;
       
    51 ; Create array to hold indices of delimiter locations, and then loop
       
    52 ; through each row and find all the delimiters. Make sure each row has
       
    53 ; the correct number of delimiters.
       
    54 ;
       
    55   ndelims  = nfields-1
       
    56   cindices = new((/nrows,ndelims/),integer)
       
    57   do i = 0, nrows-1
       
    58     ii = ind(cstrings(i,:).eq.cdelim(0))
       
    59 ;
       
    60 ; Make sure there were delimiters on this row. If not, we just quit.
       
    61 ; This could probably be modified to do this more gracefully.
       
    62 ;
       
    63     if(any(ismissing(ii))) then
       
    64       print("delim_indices: fatal: I didn't find any delimiters")
       
    65       print("('" + delimiter + "') on row " + i + ". Can't continue.")
       
    66       exit
       
    67     end if
       
    68     if(dimsizes(ii).ne.ndelims) then
       
    69       print("delim_indices: fatal: I expected to find " + ndelims)
       
    70       print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
       
    71       print("Can't continue.")
       
    72       exit
       
    73     end if
       
    74 
       
    75     cindices(i,:) = ii
       
    76 
       
    77     delete(ii)            ; For next time through loop
       
    78   end do
       
    79 
       
    80   return(cindices)
       
    81 end
       
    82 
       
    83 ;----------------------------------------------------------------------
       
    84 ; This function reads in a particular field from a string array,
       
    85 ; given the field number to read (fields start at #1 and go to #nfield),
       
    86 ; and the indices of the delimiters.
       
    87 ;
       
    88 ; It returns either an integer, float, double, character, or a string,
       
    89 ; depending on the input flag "return_type".
       
    90 ;----------------------------------------------------------------------
       
    91 function read_field(strings,ifield,indices,return_type)
       
    92 local nstring, cstrings, nf, tmp_str
       
    93 begin
       
    94   nrows = dimsizes(strings)
       
    95 ;
       
    96 ; Handle special case if we only have one string. Make sure it
       
    97 ; is put into a 2D array.
       
    98 ;
       
    99   if(nrows.eq.1) then
       
   100     cstrings = new((/1,strlen(strings)+1/),character)
       
   101   end if
       
   102 
       
   103   cstrings = stringtochar(strings)
       
   104   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
       
   105 
       
   106 ;
       
   107 ; Error checking. Make sure user has entered a valid field.
       
   108 ;
       
   109   if(ifield.le.0.or.ifield.gt.nf) then
       
   110     print("read_field: fatal: you've selected a field that is")
       
   111     print("out-of-range of the number of fields that you have (" + nf + ").")
       
   112     exit
       
   113   end if
       
   114 
       
   115 ;
       
   116 ; Set up array to return. For string, int, float, or double arrays,
       
   117 ; we don't have to do anything special. For character arrays,
       
   118 ; however, we do.
       
   119 ;
       
   120   if(return_type.ne."character") then
       
   121     return_array = new(nrows,return_type)
       
   122   else
       
   123 ;
       
   124 ; We don't know what the biggest character array is at this point, so
       
   125 ; make it bigger than necessary, and then resize later as necessary.
       
   126 ;
       
   127     tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
       
   128 
       
   129     max_len = 0     ; Use to keep track of max lengths of strings.
       
   130   end if
       
   131 
       
   132   do i = 0,nrows-1
       
   133 ;
       
   134 ; Special case of first field in row.
       
   135 ;
       
   136     if(ifield.eq.1) then
       
   137       ibeg = 0
       
   138       iend = indices(i,ifield-1)-1
       
   139     else
       
   140 ;
       
   141 ; Special case of first field in row.
       
   142 ;
       
   143       if(ifield.eq.nf) then
       
   144         ibeg = indices(i,ifield-2)+1
       
   145         iend = dimsizes(cstrings(i,:))-1
       
   146 ;
       
   147 ; Any field between first and last field.
       
   148 ;
       
   149       else
       
   150         ibeg = indices(i,ifield-2)+1
       
   151         iend = indices(i,ifield-1)-1
       
   152       end if  
       
   153     end if
       
   154 ;
       
   155 ; Here's the code that pulls off the correct string, and converts it
       
   156 ; to float if desired.
       
   157 ;
       
   158     if(return_type.eq."integer") then
       
   159       return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
       
   160     end if
       
   161     if(return_type.eq."float") then
       
   162       return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
       
   163     end if
       
   164     if(return_type.eq."double") then
       
   165       return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
       
   166     end if
       
   167     if(return_type.eq."string") then
       
   168       return_array(i) = chartostring(cstrings(i,ibeg:iend))
       
   169     end if
       
   170     if(return_type.eq."character") then
       
   171       if( (iend-ibeg+1) .gt. max_len) then
       
   172         max_len = iend-ibeg+1
       
   173       end if
       
   174       tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
       
   175     end if
       
   176   end do
       
   177 
       
   178   if(return_type.eq."character") then
       
   179     return_array = new((/nrows,max_len/),"character")
       
   180     return_array = tmp_return_array(:,0:max_len-1)
       
   181   end if
       
   182 
       
   183   return(return_array)
       
   184 end
       
   185 
       
   186 
       
   187 ;----------------------------------------------------------------------
       
   188 ; This function reads in string fields only to get the maximum string
       
   189 ; length.
       
   190 ;----------------------------------------------------------------------
       
   191 function get_maxlen(strings,ifield,indices)
       
   192 local nstring, cstrings, nf, tmp_str
       
   193 begin
       
   194   nrows = dimsizes(strings)
       
   195 ;
       
   196 ; Handle special case if we only have one string. Make sure it
       
   197 ; is put into a 2D array.
       
   198 ;
       
   199   if(nrows.eq.1) then
       
   200     cstrings = new((/1,strlen(strings)+1/),character)
       
   201   end if
       
   202 
       
   203   cstrings = stringtochar(strings)
       
   204   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
       
   205 
       
   206 ;
       
   207 ; Error checking. Make sure user has entered a valid field.
       
   208 ;
       
   209   if(ifield.le.0.or.ifield.gt.nf) then
       
   210     print("read_field: fatal: you've selected a field that is")
       
   211     print("out-of-range of the number of fields that you have (" + nf + ").")
       
   212     exit
       
   213   end if
       
   214 ;
       
   215 ; We don't know what the biggest character array is at this point, so
       
   216 ; make it bigger than necessary, and then resize later as necessary.
       
   217 ;
       
   218   tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
       
   219 
       
   220   max_len = 0     ; Use to keep track of max lengths of strings.
       
   221 
       
   222   do i = 0,nrows-1
       
   223 ;
       
   224 ; Special case of first field in row.
       
   225 ;
       
   226     if(ifield.eq.1) then
       
   227       ibeg = 0
       
   228       iend = indices(i,ifield-1)-1
       
   229     else
       
   230 ;
       
   231 ; Special case of first field in row.
       
   232 ;
       
   233       if(ifield.eq.nf) then
       
   234         ibeg = indices(i,ifield-2)+1
       
   235         iend = dimsizes(cstrings(i,:))-1
       
   236 ;
       
   237 ; Any field between first and last field.
       
   238 ;
       
   239       else
       
   240         ibeg = indices(i,ifield-2)+1
       
   241         iend = indices(i,ifield-1)-1
       
   242       end if  
       
   243     end if
       
   244     if( (iend-ibeg+1) .gt. max_len) then
       
   245       max_len = iend-ibeg+1
       
   246     end if
       
   247   end do
       
   248 
       
   249   return(max_len)
       
   250 end
       
   251 
       
   252 ;----------------------------------------------------------------------
       
   253 ; Main code.
       
   254 ;----------------------------------------------------------------------
       
   255 begin
       
   256 
       
   257 ;###############################################################
       
   258 ; Set up defaults.  We are hard-coding here..
       
   259 
       
   260   year    = 1991
       
   261   station = "USHa1"
       
   262   lat     = 42.5378
       
   263   lon     = -72.1715 + 360.
       
   264 
       
   265   nfields   = 30                        ; # of fields
       
   266   delimiter = ","                       ; field delimiter
       
   267  
       
   268   filename  = station+year+"_L4_m.txt"  ; ASCII" file to read.
       
   269   cdf_file  = station+year+"_L4_m.nc"   ; netCDF file to write. 
       
   270 
       
   271   if(isfilepresent(cdf_file))
       
   272     print("Warning: '" + cdf_file + "' exists. Will remove it.")
       
   273     system("/bin/rm " + cdf_file)
       
   274   end if
       
   275 
       
   276 ; In this case, fields #1-#2 are integers,
       
   277 ; and the rest of the fields are floats.
       
   278 
       
   279   var_types      = new(nfields,string)
       
   280   var_types      = "float"       ; Most are floats.
       
   281   var_types(0:1) = "integer"
       
   282 
       
   283 ;#####################################################################
       
   284 
       
   285 ; Read in data as strings. This will create a string array that has the
       
   286 ; same number of strings as there are rows in the file. We will then need
       
   287 ; to parse each string later.
       
   288 
       
   289   read_data = asciiread(filename,-1,"string")
       
   290 
       
   291   header    = read_data(0)        ; Header. Use for variable names.
       
   292   data      = read_data(1:)       ; Get rid of first line which is a header.
       
   293   nmonth    = dimsizes(data)      ; Number of rows == number of month.
       
   294 
       
   295 ; Read in locations of delimiters in each string row.
       
   296 
       
   297   hindices = delim_indices(header,nfields,delimiter)   ; header row
       
   298   dindices = delim_indices(data,nfields,delimiter)     ; rest of file
       
   299 
       
   300 ; Read in the field names which will become variable names on
       
   301 ; the netCDF file.
       
   302 
       
   303   var_names = new(nfields,string)
       
   304 
       
   305   do i=0,nfields-1
       
   306     var_names(i) = read_field(header,i+1,hindices,"string")
       
   307   end do
       
   308 
       
   309 ;-------------------------------------------------------------------
       
   310 ; Write out this netCDF file efficiently so it will be faster.
       
   311 ; Try to predefine everything before you write to it.
       
   312 
       
   313   f = addfile(cdf_file,"c")
       
   314   setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
       
   315 
       
   316 ; Write global attributes to file. It's okay to do this before 
       
   317 ; predefining the file's variables. We are still in "define" mode.
       
   318 
       
   319   fAtt               = True
       
   320   fAtt@description   = "Data read in from " + filename + " ASCII file."
       
   321   fAtt@creation_date = systemfunc ("date")        
       
   322   fileattdef( f, fAtt )        
       
   323 
       
   324 ; Write dimension names to file.
       
   325  
       
   326   dim_names = (/ "year",  "month" /)
       
   327   dim_sizes = (/ -1    ,  nmonth  /)
       
   328   dimUnlim  = (/ True  ,  False   /)
       
   329   filedimdef( f, dim_names, dim_sizes, dimUnlim )
       
   330 
       
   331   filedimdef( f, "lat", 1, False )
       
   332   filedimdef( f, "lon", 1, False )
       
   333 
       
   334 ; Define each variable on the file.
       
   335 
       
   336   filevardef( f, "year", "integer", "year" )
       
   337   filevardef( f, "lat" , "float"  , "lat" )
       
   338   filevardef( f, "lon" , "float"  , "lon" )
       
   339 
       
   340   do i=0,nfields-1
       
   341 
       
   342 ;    define variable
       
   343 
       
   344      filevardef(f, var_names(i), var_types(i), dim_names)
       
   345 
       
   346 ;    define variable attributes
       
   347 
       
   348      if (i.le.1) then
       
   349         varAtt = 0
       
   350 ;       varAtt@_FillValue = -999
       
   351      else
       
   352         varAtt = 0.
       
   353 ;       varAtt@_FillValue = 1.e36
       
   354      end if
       
   355 
       
   356      varAtt@long_name  = var_names(i)
       
   357 
       
   358      filevarattdef( f, var_names(i) , varAtt )
       
   359 
       
   360      delete (varAtt)
       
   361   end do
       
   362 ;-----------------------------------------------------------------
       
   363 
       
   364 ; Loop through each field, read the values for that field, print 
       
   365 ; information about the variable, and then write it to the netCDF file.
       
   366 
       
   367   do i=0,nfields-1
       
   368     ifield = i+1                         ; Fields start at #1, not #0.
       
   369 
       
   370     tmp_data = new((/1,nmonth/),var_types(i))
       
   371 
       
   372     if (i.le.1) then
       
   373        tmp_data@_FillValue = -999
       
   374     else
       
   375        tmp_data@_FillValue = 1.e36
       
   376     end if     
       
   377 
       
   378     tmp_data(0,:) = read_field(data,ifield,dindices,var_types(i))
       
   379   
       
   380     tmp_data = where(tmp_data .le. -9000.,tmp_data@_FillValue,tmp_data)
       
   381 
       
   382 ;   change Month to yyyymm
       
   383 
       
   384     if (i.eq.0) then
       
   385        tmp_data(0,:) = tmp_data(0,:) + year*100
       
   386     end if 
       
   387 
       
   388 ; Print some info about the variable.
       
   389 
       
   390 ;   print("")
       
   391 ;   print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
       
   392 ;   print("Type is " + var_types(i) + ".")
       
   393 ;   print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
       
   394 
       
   395 ;   if(any(ismissing(tmp_data))) then
       
   396 ;     print("This variable does contain missing values.")
       
   397 ;   else
       
   398 ;     print("This variable doesn't contain missing values.")
       
   399 ;   end if
       
   400 
       
   401 ; write variable to file
       
   402 
       
   403     f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
       
   404 
       
   405     delete(tmp_data)                   ; Delete for next round.
       
   406   end do
       
   407 
       
   408 ; write variable to file
       
   409 
       
   410   f->year = year
       
   411   f->lat  = lat
       
   412   f->lon  = lon 
       
   413 end