npp/01.read_81.ncl.y
author Forrest Hoffman <forrest@climatemodeling.org>
Mon, 26 Jan 2009 22:08:20 -0500
changeset 0 0c6405ab2ff4
permissions -rw-r--r--
Initial commit of C-LAMP Diagnostics from Jeff Lee
     1 ;----------------------------------------------------------------------
     2 ; This example reads an ASCII file that is formatted a specific way, and
     3 ; writes out the results to a netCDF file.
     4 ;
     5 ; The first line in the ASCII file must be a header, with each field
     6 ; separated by a single character delimiter (like a ","). The rest of
     7 ; the file must be such that each row contains all fields, each
     8 ; separated by the designated delimiter.
     9 ;
    10 ; The fields can be integer, float, double, character, or string.
    11 ; String fields cannot be written to a netCDF file. They have to
    12 ; be read in as character arrays and written out that way.
    13 ;----------------------------------------------------------------------
    14 
    15 ;----------------------------------------------------------------------
    16 ; This function returns the index locations of the given delimiter
    17 ; in a row or several rows of strings.
    18 ;----------------------------------------------------------------------
    19 function delim_indices(strings,nfields,delimiter)
    20 local cstrings, cdelim
    21 begin
    22   nrows = dimsizes(strings)
    23 ;
    24 ; Handle special case if we only have one string. Make sure it
    25 ; is put into a 2D array.
    26 ;
    27   if(nrows.eq.1) then
    28     cstrings = new((/1,strlen(strings)+1/),character)
    29   end if
    30 
    31   cstrings = stringtochar(strings)        ; Convert to characters.
    32   cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
    33 ;
    34 ; Som error checking here. Make sure delimiter is one character.
    35 ;
    36   nc   = dimsizes(cdelim)
    37   rank = dimsizes(nc)
    38   if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
    39     print("delim_indices: fatal: the delimiter you've selected")
    40     print("must be a single character. Can't continue.")
    41     exit
    42   end if
    43 
    44 ;
    45 ; Create array to hold indices of delimiter locations, and then loop
    46 ; through each row and find all the delimiters. Make sure each row has
    47 ; the correct number of delimiters.
    48 ;
    49   ndelims  = nfields-1
    50   cindices = new((/nrows,ndelims/),integer)
    51   do i = 0, nrows-1
    52     ii = ind(cstrings(i,:).eq.cdelim(0))
    53 ;
    54 ; Make sure there were delimiters on this row. If not, we just quit.
    55 ; This could probably be modified to do this more gracefully.
    56 ;
    57     if(any(ismissing(ii))) then
    58       print("delim_indices: fatal: I didn't find any delimiters")
    59       print("('" + delimiter + "') on row " + i + ". Can't continue.")
    60       exit
    61     end if
    62     if(dimsizes(ii).ne.ndelims) then
    63       print("delim_indices: fatal: I expected to find " + ndelims)
    64       print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
    65       print("Can't continue.")
    66       exit
    67     end if
    68 
    69     cindices(i,:) = ii
    70 
    71     delete(ii)            ; For next time through loop
    72   end do
    73 
    74   return(cindices)
    75 end
    76 
    77 ;----------------------------------------------------------------------
    78 ; This function reads in a particular field from a string array,
    79 ; given the field number to read (fields start at #1 and go to #nfield),
    80 ; and the indices of the delimiters.
    81 ;
    82 ; It returns either an integer, float, double, character, or a string,
    83 ; depending on the input flag "return_type".
    84 ;----------------------------------------------------------------------
    85 function read_field(strings,ifield,indices,return_type)
    86 local nstring, cstrings, nf, tmp_str
    87 begin
    88   nrows = dimsizes(strings)
    89 ;
    90 ; Handle special case if we only have one string. Make sure it
    91 ; is put into a 2D array.
    92 ;
    93   if(nrows.eq.1) then
    94     cstrings = new((/1,strlen(strings)+1/),character)
    95   end if
    96 
    97   cstrings = stringtochar(strings)
    98   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
    99 
   100 ;
   101 ; Error checking. Make sure user has entered a valid field.
   102 ;
   103   if(ifield.le.0.or.ifield.gt.nf) then
   104     print("read_field: fatal: you've selected a field that is")
   105     print("out-of-range of the number of fields that you have (" + nf + ").")
   106     exit
   107   end if
   108 
   109 ;
   110 ; Set up array to return. For string, int, float, or double arrays,
   111 ; we don't have to do anything special. For character arrays,
   112 ; however, we do.
   113 ;
   114   if(return_type.ne."character") then
   115     return_array = new(nrows,return_type)
   116   else
   117 ;
   118 ; We don't know what the biggest character array is at this point, so
   119 ; make it bigger than necessary, and then resize later as necessary.
   120 ;
   121     tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   122 
   123     max_len = 0     ; Use to keep track of max lengths of strings.
   124   end if
   125 
   126   do i = 0,nrows-1
   127 ;
   128 ; Special case of first field in row.
   129 ;
   130     if(ifield.eq.1) then
   131       ibeg = 0
   132       iend = indices(i,ifield-1)-1
   133     else
   134 ;
   135 ; Special case of first field in row.
   136 ;
   137       if(ifield.eq.nf) then
   138         ibeg = indices(i,ifield-2)+1
   139         iend = dimsizes(cstrings(i,:))-1
   140 ;
   141 ; Any field between first and last field.
   142 ;
   143       else
   144         ibeg = indices(i,ifield-2)+1
   145         iend = indices(i,ifield-1)-1
   146       end if  
   147     end if
   148 ;
   149 ; Here's the code that pulls off the correct string, and converts it
   150 ; to float if desired.
   151 ;
   152     if(return_type.eq."integer") then
   153       return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
   154     end if
   155     if(return_type.eq."float") then
   156       return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
   157     end if
   158     if(return_type.eq."double") then
   159       return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
   160     end if
   161     if(return_type.eq."string") then
   162       return_array(i) = chartostring(cstrings(i,ibeg:iend))
   163     end if
   164     if(return_type.eq."character") then
   165       if( (iend-ibeg) .gt. max_len) then
   166         max_len = iend-ibeg
   167       end if
   168       tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
   169     end if
   170   end do
   171 
   172   if(return_type.eq."character") then
   173     return_array = new((/nrows,max_len/),"character")
   174     return_array = tmp_return_array(:,0:max_len-1)
   175   end if
   176 
   177   return(return_array)
   178 end
   179 
   180 
   181 ;----------------------------------------------------------------------
   182 ; This function reads in string fields only to get the maximum string
   183 ; length.
   184 ;----------------------------------------------------------------------
   185 function get_maxlen(strings,ifield,indices)
   186 local nstring, cstrings, nf, tmp_str
   187 begin
   188   nrows = dimsizes(strings)
   189 ;
   190 ; Handle special case if we only have one string. Make sure it
   191 ; is put into a 2D array.
   192 ;
   193   if(nrows.eq.1) then
   194     cstrings = new((/1,strlen(strings)+1/),character)
   195   end if
   196 
   197   cstrings = stringtochar(strings)
   198   nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
   199 
   200 ;
   201 ; Error checking. Make sure user has entered a valid field.
   202 ;
   203   if(ifield.le.0.or.ifield.gt.nf) then
   204     print("read_field: fatal: you've selected a field that is")
   205     print("out-of-range of the number of fields that you have (" + nf + ").")
   206     exit
   207   end if
   208 ;
   209 ; We don't know what the biggest character array is at this point, so
   210 ; make it bigger than necessary, and then resize later as necessary.
   211 ;
   212   tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
   213 
   214   max_len = 0     ; Use to keep track of max lengths of strings.
   215 
   216   do i = 0,nrows-1
   217 ;
   218 ; Special case of first field in row.
   219 ;
   220     if(ifield.eq.1) then
   221       ibeg = 0
   222       iend = indices(i,ifield-1)-1
   223     else
   224 ;
   225 ; Special case of first field in row.
   226 ;
   227       if(ifield.eq.nf) then
   228         ibeg = indices(i,ifield-2)+1
   229         iend = dimsizes(cstrings(i,:))-1
   230 ;
   231 ; Any field between first and last field.
   232 ;
   233       else
   234         ibeg = indices(i,ifield-2)+1
   235         iend = indices(i,ifield-1)-1
   236       end if  
   237     end if
   238     if( (iend-ibeg) .gt. max_len) then
   239       max_len = iend-ibeg
   240     end if
   241   end do
   242 
   243   return(max_len)
   244 end
   245 
   246 ;----------------------------------------------------------------------
   247 ; Main code.
   248 ;----------------------------------------------------------------------
   249 begin
   250 ;
   251 ; Set up defaults here.  We are hard-coding the field types here.
   252 ; You can set up this script to try to determine the field types 
   253 ; automatically, but this is a bit tedious. Maybe later.
   254 ;
   255   filename  = "data.81"                 ; ASCII" file to read.
   256   cdf_file  = filename + ".nc"          ; netCDF file to write.
   257   nfields   = 22                        ; # of fields
   258   delimiter = ","                       ; field delimiter
   259 ;
   260 ; In this case, fields #6-#8 are strings, fields #2, #3, and #11
   261 ; are float, and the rest of the fields are integers.
   262 ;
   263   var_types      = new(nfields,string)
   264   var_strlens    = new(nfields,integer)   ; var to hold strlens, just in case.
   265 
   266   var_types      = "integer"       ; Most are ints.
   267   var_types(5:7) = "character"     ; Corresponds to fields 6-8.
   268   var_types(1:2) = "float"
   269   var_types(10)  = "float"
   270 
   271   if(isfilepresent(cdf_file))
   272     print("Warning: '" + cdf_file + "' exists. Will remove it.")
   273     system("/bin/rm " + cdf_file)
   274   end if
   275 
   276 ;
   277 ; Read in data as strings. This will create a string array that has the
   278 ; same number of strings as there are rows in the file. We will then need
   279 ; to parse each string later.
   280 ;
   281   read_data = asciiread(filename,-1,"string")
   282   header    = read_data(0)        ; Header. Use for variable names.
   283   data      = read_data(1:)       ; Get rid of first line which is a header.
   284   nrows     = dimsizes(data)      ; Number of rows.
   285 
   286 ;
   287 ; Read in locations of delimiters in each string row.
   288 ;
   289   hindices = delim_indices(header,nfields,delimiter)   ; header row
   290   dindices = delim_indices(data,nfields,delimiter)     ; rest of file
   291 
   292 ;
   293 ; Read in the field names which will become variable names on
   294 ; the netCDF file.
   295 ;
   296   var_names = new(nfields,string)
   297 
   298   do i=0,nfields-1
   299     var_names(i) = read_field(header,i+1,hindices,"string")
   300   end do
   301 
   302 ;
   303 ; Write out this netCDF file efficiently so it will be faster.
   304 ; Try to predefine everything before you write to it.
   305 ; 
   306   f = addfile(cdf_file,"c")
   307   setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
   308 
   309 ;
   310 ; Write global attributes to file. It's okay to do this before 
   311 ; predefining the file's variables. We are still in "define" mode.
   312 ;
   313   fAtt               = True
   314   fAtt@description   = "Data read in from " + filename + " ASCII file."
   315   fAtt@creation_date = systemfunc ("date")        
   316   fileattdef( f, fAtt )        
   317 
   318 ;
   319 ; Write dimension names to file. If there are no character variables,
   320 ; then there's only one dimension name ("nvalues").
   321 ;
   322 ; Otherwise, we need to write a dimension name for every character
   323 ; variable, which will indicate the maximum string length for that
   324 ; variable.
   325 ;
   326   indc = ind(var_types.eq."character")
   327   if(.not.any(ismissing(indc))) then
   328 ;
   329 ; We have to treat the character arrays special here. We need to
   330 ; know their sizes so we can write the maximum size of each char
   331 ; array to the netCDF file as a dimension name. This means we
   332 ; need to read in the character variables once to get the string
   333 ; lengths, then we'll read them again later to get the actual values.
   334 ;
   335     do i=0,dimsizes(indc)-1
   336       var_strlens(indc(i)) = get_maxlen(data,indc(i)+1,dindices)
   337     end do
   338 
   339     ndims    = dimsizes(indc) + 1
   340     dimNames = new(ndims,string)
   341     dimSizes = new(ndims,integer)
   342     dimUnlim = new(ndims,logical)
   343 
   344     dimUnlim            = False
   345     dimUnlim(0)         = True
   346     dimNames(0)         = "nvalues"
   347     dimNames(1:ndims-1) = var_names(indc) + "_StrLen"
   348     dimSizes(0)         = -1
   349     dimSizes(1:ndims-1) = var_strlens(indc)
   350     filedimdef(f,dimNames,dimSizes,dimUnlim)
   351   else
   352 ;
   353 ; No character variables, so just write the one dimension name.
   354 ;
   355     filedimdef(f,"nvalues",-1,True)
   356   end if
   357 
   358 ;
   359 ; Define each variable on the file.
   360 ;
   361 ; Don't deal with variables that are of type string.
   362 ;
   363   do i=0,nfields-1
   364     if(var_types(i).ne."string") then
   365       if(var_types(i).ne."character") then
   366         filevardef(f, var_names(i), var_types(i), "nvalues")
   367       else
   368         filevardef(f, var_names(i), var_types(i),  \
   369                    (/"nvalues",var_names(i)+"_StrLen"/))
   370       end if
   371     end if
   372   end do
   373 
   374 ;
   375 ; Loop through each field, read the values for that field, print 
   376 ; information about the variable, and then write it to the netCDF
   377 ; file.
   378 ;
   379   do i=0,nfields-1
   380     ifield = i+1           ; Fields start at #1, not #0.
   381 ;
   382 ; Note: you can't write strings to a netCDF file, so these have
   383 ; to be written out as character arrays.
   384 ;
   385     tmp_data = read_field(data,ifield,dindices,var_types(i))
   386 ;
   387 ; Print some info about the variable.
   388 ;
   389     print("")
   390     print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
   391     print("Type is " + var_types(i) + ".")
   392     if(var_types(i).ne."string".and.var_types(i).ne."character") then
   393       print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
   394     end if
   395 
   396     if(any(ismissing(tmp_data))) then
   397       print("This variable does contain missing values.")
   398     else
   399       print("This variable doesn't contain missing values.")
   400     end if
   401 
   402     f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
   403 
   404     delete(tmp_data)                   ; Delete for next round.
   405   end do
   406 end