diff -r 000000000000 -r 0c6405ab2ff4 ameriflux/06.read_ascii.ncl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ameriflux/06.read_ascii.ncl	Mon Jan 26 22:08:20 2009 -0500
@@ -0,0 +1,412 @@
+;----------------------------------------------------------------------
+; set _FillValue
+; change Month to yyyymm
+; add Month to output
+; add lat, lon to output
+; add year to output
+;
+; This example reads an ASCII file that is formatted a specific way, and
+; writes out the results to a netCDF file.
+;
+; The first line in the ASCII file must be a header, with each field
+; separated by a single character delimiter (like a ","). The rest of
+; the file must be such that each row contains all fields, each
+; separated by the designated delimiter.
+;
+; The fields can be integer, float, double, character, or string.
+; String fields cannot be written to a netCDF file. They have to
+; be read in as character arrays and written out that way.
+;----------------------------------------------------------------------
+
+;----------------------------------------------------------------------
+; This function returns the index locations of the given delimiter
+; in a row or several rows of strings.
+;----------------------------------------------------------------------
+function delim_indices(strings,nfields,delimiter)
+local cstrings, cdelim
+begin
+  nrows = dimsizes(strings)
+;
+; Handle special case if we only have one string. Make sure it
+; is put into a 2D array.
+;
+  if(nrows.eq.1) then
+    cstrings = new((/1,strlen(strings)+1/),character)
+  end if
+
+  cstrings = stringtochar(strings)        ; Convert to characters.
+  cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
+;
+; Som error checking here. Make sure delimiter is one character.
+;
+  nc   = dimsizes(cdelim)
+  rank = dimsizes(nc)
+  if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
+    print("delim_indices: fatal: the delimiter you've selected")
+    print("must be a single character. Can't continue.")
+    exit
+  end if
+
+;
+; Create array to hold indices of delimiter locations, and then loop
+; through each row and find all the delimiters. Make sure each row has
+; the correct number of delimiters.
+;
+  ndelims  = nfields-1
+  cindices = new((/nrows,ndelims/),integer)
+  do i = 0, nrows-1
+    ii = ind(cstrings(i,:).eq.cdelim(0))
+;
+; Make sure there were delimiters on this row. If not, we just quit.
+; This could probably be modified to do this more gracefully.
+;
+    if(any(ismissing(ii))) then
+      print("delim_indices: fatal: I didn't find any delimiters")
+      print("('" + delimiter + "') on row " + i + ". Can't continue.")
+      exit
+    end if
+    if(dimsizes(ii).ne.ndelims) then
+      print("delim_indices: fatal: I expected to find " + ndelims)
+      print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
+      print("Can't continue.")
+      exit
+    end if
+
+    cindices(i,:) = ii
+
+    delete(ii)            ; For next time through loop
+  end do
+
+  return(cindices)
+end
+
+;----------------------------------------------------------------------
+; This function reads in a particular field from a string array,
+; given the field number to read (fields start at #1 and go to #nfield),
+; and the indices of the delimiters.
+;
+; It returns either an integer, float, double, character, or a string,
+; depending on the input flag "return_type".
+;----------------------------------------------------------------------
+function read_field(strings,ifield,indices,return_type)
+local nstring, cstrings, nf, tmp_str
+begin
+  nrows = dimsizes(strings)
+;
+; Handle special case if we only have one string. Make sure it
+; is put into a 2D array.
+;
+  if(nrows.eq.1) then
+    cstrings = new((/1,strlen(strings)+1/),character)
+  end if
+
+  cstrings = stringtochar(strings)
+  nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
+
+;
+; Error checking. Make sure user has entered a valid field.
+;
+  if(ifield.le.0.or.ifield.gt.nf) then
+    print("read_field: fatal: you've selected a field that is")
+    print("out-of-range of the number of fields that you have (" + nf + ").")
+    exit
+  end if
+
+;
+; Set up array to return. For string, int, float, or double arrays,
+; we don't have to do anything special. For character arrays,
+; however, we do.
+;
+  if(return_type.ne."character") then
+    return_array = new(nrows,return_type)
+  else
+;
+; We don't know what the biggest character array is at this point, so
+; make it bigger than necessary, and then resize later as necessary.
+;
+    tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
+
+    max_len = 0     ; Use to keep track of max lengths of strings.
+  end if
+
+  do i = 0,nrows-1
+;
+; Special case of first field in row.
+;
+    if(ifield.eq.1) then
+      ibeg = 0
+      iend = indices(i,ifield-1)-1
+    else
+;
+; Special case of first field in row.
+;
+      if(ifield.eq.nf) then
+        ibeg = indices(i,ifield-2)+1
+        iend = dimsizes(cstrings(i,:))-1
+;
+; Any field between first and last field.
+;
+      else
+        ibeg = indices(i,ifield-2)+1
+        iend = indices(i,ifield-1)-1
+      end if  
+    end if
+;
+; Here's the code that pulls off the correct string, and converts it
+; to float if desired.
+;
+    if(return_type.eq."integer") then
+      return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
+    end if
+    if(return_type.eq."float") then
+      return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
+    end if
+    if(return_type.eq."double") then
+      return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
+    end if
+    if(return_type.eq."string") then
+      return_array(i) = chartostring(cstrings(i,ibeg:iend))
+    end if
+    if(return_type.eq."character") then
+      if( (iend-ibeg+1) .gt. max_len) then
+        max_len = iend-ibeg+1
+      end if
+      tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
+    end if
+  end do
+
+  if(return_type.eq."character") then
+    return_array = new((/nrows,max_len/),"character")
+    return_array = tmp_return_array(:,0:max_len-1)
+  end if
+
+  return(return_array)
+end
+
+
+;----------------------------------------------------------------------
+; This function reads in string fields only to get the maximum string
+; length.
+;----------------------------------------------------------------------
+function get_maxlen(strings,ifield,indices)
+local nstring, cstrings, nf, tmp_str
+begin
+  nrows = dimsizes(strings)
+;
+; Handle special case if we only have one string. Make sure it
+; is put into a 2D array.
+;
+  if(nrows.eq.1) then
+    cstrings = new((/1,strlen(strings)+1/),character)
+  end if
+
+  cstrings = stringtochar(strings)
+  nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
+
+;
+; Error checking. Make sure user has entered a valid field.
+;
+  if(ifield.le.0.or.ifield.gt.nf) then
+    print("read_field: fatal: you've selected a field that is")
+    print("out-of-range of the number of fields that you have (" + nf + ").")
+    exit
+  end if
+;
+; We don't know what the biggest character array is at this point, so
+; make it bigger than necessary, and then resize later as necessary.
+;
+  tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
+
+  max_len = 0     ; Use to keep track of max lengths of strings.
+
+  do i = 0,nrows-1
+;
+; Special case of first field in row.
+;
+    if(ifield.eq.1) then
+      ibeg = 0
+      iend = indices(i,ifield-1)-1
+    else
+;
+; Special case of first field in row.
+;
+      if(ifield.eq.nf) then
+        ibeg = indices(i,ifield-2)+1
+        iend = dimsizes(cstrings(i,:))-1
+;
+; Any field between first and last field.
+;
+      else
+        ibeg = indices(i,ifield-2)+1
+        iend = indices(i,ifield-1)-1
+      end if  
+    end if
+    if( (iend-ibeg+1) .gt. max_len) then
+      max_len = iend-ibeg+1
+    end if
+  end do
+
+  return(max_len)
+end
+
+;----------------------------------------------------------------------
+; Main code.
+;----------------------------------------------------------------------
+begin
+
+;###############################################################
+; Set up defaults.  We are hard-coding here..
+
+  year    = 2005
+  lat     = 35.9735823
+  lon     = -79.1004304 + 360.
+
+  nfields   = 30                        ; # of fields
+  delimiter = ","                       ; field delimiter
+ 
+  filename  = year+".txt"       ; ASCII" file to read.
+  cdf_file  = year+"_L4_m.nc"   ; netCDF file to write. 
+
+  if(isfilepresent(cdf_file))
+    print("Warning: '" + cdf_file + "' exists. Will remove it.")
+    system("/bin/rm " + cdf_file)
+  end if
+
+; In this case, fields #1-#2 are integers,
+; and the rest of the fields are floats.
+
+  var_types      = new(nfields,string)
+  var_types      = "float"       ; Most are floats.
+  var_types(0:1) = "integer"
+
+;#####################################################################
+
+; Read in data as strings. This will create a string array that has the
+; same number of strings as there are rows in the file. We will then need
+; to parse each string later.
+
+  read_data = asciiread(filename,-1,"string")
+
+  header    = read_data(0)        ; Header. Use for variable names.
+  data      = read_data(1:)       ; Get rid of first line which is a header.
+  nmonth    = dimsizes(data)      ; Number of rows == number of month.
+
+; Read in locations of delimiters in each string row.
+
+  hindices = delim_indices(header,nfields,delimiter)   ; header row
+  dindices = delim_indices(data,nfields,delimiter)     ; rest of file
+
+; Read in the field names which will become variable names on
+; the netCDF file.
+
+  var_names = new(nfields,string)
+
+  do i=0,nfields-1
+    var_names(i) = read_field(header,i+1,hindices,"string")
+  end do
+
+;-------------------------------------------------------------------
+; Write out this netCDF file efficiently so it will be faster.
+; Try to predefine everything before you write to it.
+
+  f = addfile(cdf_file,"c")
+  setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
+
+; Write global attributes to file. It's okay to do this before 
+; predefining the file's variables. We are still in "define" mode.
+
+  fAtt               = True
+  fAtt@description   = "Data read in from " + filename + " ASCII file."
+  fAtt@creation_date = systemfunc ("date")        
+  fileattdef( f, fAtt )        
+
+; Write dimension names to file.
+ 
+  dim_names = (/ "year",  "month" /)
+  dim_sizes = (/ -1    ,  nmonth  /)
+  dimUnlim  = (/ True  ,  False   /)
+  filedimdef( f, dim_names, dim_sizes, dimUnlim )
+
+  filedimdef( f, "lat", 1, False )
+  filedimdef( f, "lon", 1, False )
+
+; Define each variable on the file.
+
+  filevardef( f, "year", "integer", "year" )
+  filevardef( f, "lat" , "float"  , "lat" )
+  filevardef( f, "lon" , "float"  , "lon" )
+
+  do i=0,nfields-1
+
+;    define variable
+
+     filevardef(f, var_names(i), var_types(i), dim_names)
+
+;    define variable attributes
+
+     if (i.le.1) then
+        varAtt = 0
+;       varAtt@_FillValue = -999
+     else
+        varAtt = 0.
+;       varAtt@_FillValue = 1.e36
+     end if
+
+     varAtt@long_name  = var_names(i)
+
+     filevarattdef( f, var_names(i) , varAtt )
+
+     delete (varAtt)
+  end do
+;-----------------------------------------------------------------
+
+; Loop through each field, read the values for that field, print 
+; information about the variable, and then write it to the netCDF file.
+
+  do i=0,nfields-1
+    ifield = i+1                         ; Fields start at #1, not #0.
+
+    tmp_data = new((/1,nmonth/),var_types(i))
+
+    if (i.le.1) then
+       tmp_data@_FillValue = -999
+    else
+       tmp_data@_FillValue = 1.e36
+    end if     
+
+    tmp_data(0,:) = read_field(data,ifield,dindices,var_types(i))
+  
+    tmp_data = where(tmp_data .le. -9000.,tmp_data@_FillValue,tmp_data)
+
+;   change Month to yyyymm
+
+    if (i.eq.0) then
+       tmp_data(0,:) = tmp_data(0,:) + year*100
+    end if 
+
+; Print some info about the variable.
+
+;   print("")
+;   print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
+;   print("Type is " + var_types(i) + ".")
+;   print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
+
+;   if(any(ismissing(tmp_data))) then
+;     print("This variable does contain missing values.")
+;   else
+;     print("This variable doesn't contain missing values.")
+;   end if
+
+; write variable to file
+
+    f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
+
+    delete(tmp_data)                   ; Delete for next round.
+  end do
+
+; write variable to file
+
+  f->year = year
+  f->lat  = lat
+  f->lon  = lon 
+end