ameriflux/05.read_ascii.ncl
author Forrest Hoffman <forrest@climatemodeling.org>
Mon, 26 Jan 2009 22:08:20 -0500
changeset 0 0c6405ab2ff4
permissions -rw-r--r--
Initial commit of C-LAMP Diagnostics from Jeff Lee
forrest@0
     1
;----------------------------------------------------------------------
forrest@0
     2
; set _FillValue
forrest@0
     3
; change Month to yyyymm
forrest@0
     4
; add Month to output
forrest@0
     5
; add lat, lon to output
forrest@0
     6
; add year to output
forrest@0
     7
;
forrest@0
     8
; This example reads an ASCII file that is formatted a specific way, and
forrest@0
     9
; writes out the results to a netCDF file.
forrest@0
    10
;
forrest@0
    11
; The first line in the ASCII file must be a header, with each field
forrest@0
    12
; separated by a single character delimiter (like a ","). The rest of
forrest@0
    13
; the file must be such that each row contains all fields, each
forrest@0
    14
; separated by the designated delimiter.
forrest@0
    15
;
forrest@0
    16
; The fields can be integer, float, double, character, or string.
forrest@0
    17
; String fields cannot be written to a netCDF file. They have to
forrest@0
    18
; be read in as character arrays and written out that way.
forrest@0
    19
;----------------------------------------------------------------------
forrest@0
    20
forrest@0
    21
;----------------------------------------------------------------------
forrest@0
    22
; This function returns the index locations of the given delimiter
forrest@0
    23
; in a row or several rows of strings.
forrest@0
    24
;----------------------------------------------------------------------
forrest@0
    25
function delim_indices(strings,nfields,delimiter)
forrest@0
    26
local cstrings, cdelim
forrest@0
    27
begin
forrest@0
    28
  nrows = dimsizes(strings)
forrest@0
    29
;
forrest@0
    30
; Handle special case if we only have one string. Make sure it
forrest@0
    31
; is put into a 2D array.
forrest@0
    32
;
forrest@0
    33
  if(nrows.eq.1) then
forrest@0
    34
    cstrings = new((/1,strlen(strings)+1/),character)
forrest@0
    35
  end if
forrest@0
    36
forrest@0
    37
  cstrings = stringtochar(strings)        ; Convert to characters.
forrest@0
    38
  cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
forrest@0
    39
;
forrest@0
    40
; Som error checking here. Make sure delimiter is one character.
forrest@0
    41
;
forrest@0
    42
  nc   = dimsizes(cdelim)
forrest@0
    43
  rank = dimsizes(nc)
forrest@0
    44
  if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
forrest@0
    45
    print("delim_indices: fatal: the delimiter you've selected")
forrest@0
    46
    print("must be a single character. Can't continue.")
forrest@0
    47
    exit
forrest@0
    48
  end if
forrest@0
    49
forrest@0
    50
;
forrest@0
    51
; Create array to hold indices of delimiter locations, and then loop
forrest@0
    52
; through each row and find all the delimiters. Make sure each row has
forrest@0
    53
; the correct number of delimiters.
forrest@0
    54
;
forrest@0
    55
  ndelims  = nfields-1
forrest@0
    56
  cindices = new((/nrows,ndelims/),integer)
forrest@0
    57
  do i = 0, nrows-1
forrest@0
    58
    ii = ind(cstrings(i,:).eq.cdelim(0))
forrest@0
    59
;
forrest@0
    60
; Make sure there were delimiters on this row. If not, we just quit.
forrest@0
    61
; This could probably be modified to do this more gracefully.
forrest@0
    62
;
forrest@0
    63
    if(any(ismissing(ii))) then
forrest@0
    64
      print("delim_indices: fatal: I didn't find any delimiters")
forrest@0
    65
      print("('" + delimiter + "') on row " + i + ". Can't continue.")
forrest@0
    66
      exit
forrest@0
    67
    end if
forrest@0
    68
    if(dimsizes(ii).ne.ndelims) then
forrest@0
    69
      print("delim_indices: fatal: I expected to find " + ndelims)
forrest@0
    70
      print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
forrest@0
    71
      print("Can't continue.")
forrest@0
    72
      exit
forrest@0
    73
    end if
forrest@0
    74
forrest@0
    75
    cindices(i,:) = ii
forrest@0
    76
forrest@0
    77
    delete(ii)            ; For next time through loop
forrest@0
    78
  end do
forrest@0
    79
forrest@0
    80
  return(cindices)
forrest@0
    81
end
forrest@0
    82
forrest@0
    83
;----------------------------------------------------------------------
forrest@0
    84
; This function reads in a particular field from a string array,
forrest@0
    85
; given the field number to read (fields start at #1 and go to #nfield),
forrest@0
    86
; and the indices of the delimiters.
forrest@0
    87
;
forrest@0
    88
; It returns either an integer, float, double, character, or a string,
forrest@0
    89
; depending on the input flag "return_type".
forrest@0
    90
;----------------------------------------------------------------------
forrest@0
    91
function read_field(strings,ifield,indices,return_type)
forrest@0
    92
local nstring, cstrings, nf, tmp_str
forrest@0
    93
begin
forrest@0
    94
  nrows = dimsizes(strings)
forrest@0
    95
;
forrest@0
    96
; Handle special case if we only have one string. Make sure it
forrest@0
    97
; is put into a 2D array.
forrest@0
    98
;
forrest@0
    99
  if(nrows.eq.1) then
forrest@0
   100
    cstrings = new((/1,strlen(strings)+1/),character)
forrest@0
   101
  end if
forrest@0
   102
forrest@0
   103
  cstrings = stringtochar(strings)
forrest@0
   104
  nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
forrest@0
   105
forrest@0
   106
;
forrest@0
   107
; Error checking. Make sure user has entered a valid field.
forrest@0
   108
;
forrest@0
   109
  if(ifield.le.0.or.ifield.gt.nf) then
forrest@0
   110
    print("read_field: fatal: you've selected a field that is")
forrest@0
   111
    print("out-of-range of the number of fields that you have (" + nf + ").")
forrest@0
   112
    exit
forrest@0
   113
  end if
forrest@0
   114
forrest@0
   115
;
forrest@0
   116
; Set up array to return. For string, int, float, or double arrays,
forrest@0
   117
; we don't have to do anything special. For character arrays,
forrest@0
   118
; however, we do.
forrest@0
   119
;
forrest@0
   120
  if(return_type.ne."character") then
forrest@0
   121
    return_array = new(nrows,return_type)
forrest@0
   122
  else
forrest@0
   123
;
forrest@0
   124
; We don't know what the biggest character array is at this point, so
forrest@0
   125
; make it bigger than necessary, and then resize later as necessary.
forrest@0
   126
;
forrest@0
   127
    tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
forrest@0
   128
forrest@0
   129
    max_len = 0     ; Use to keep track of max lengths of strings.
forrest@0
   130
  end if
forrest@0
   131
forrest@0
   132
  do i = 0,nrows-1
forrest@0
   133
;
forrest@0
   134
; Special case of first field in row.
forrest@0
   135
;
forrest@0
   136
    if(ifield.eq.1) then
forrest@0
   137
      ibeg = 0
forrest@0
   138
      iend = indices(i,ifield-1)-1
forrest@0
   139
    else
forrest@0
   140
;
forrest@0
   141
; Special case of first field in row.
forrest@0
   142
;
forrest@0
   143
      if(ifield.eq.nf) then
forrest@0
   144
        ibeg = indices(i,ifield-2)+1
forrest@0
   145
        iend = dimsizes(cstrings(i,:))-1
forrest@0
   146
;
forrest@0
   147
; Any field between first and last field.
forrest@0
   148
;
forrest@0
   149
      else
forrest@0
   150
        ibeg = indices(i,ifield-2)+1
forrest@0
   151
        iend = indices(i,ifield-1)-1
forrest@0
   152
      end if  
forrest@0
   153
    end if
forrest@0
   154
;
forrest@0
   155
; Here's the code that pulls off the correct string, and converts it
forrest@0
   156
; to float if desired.
forrest@0
   157
;
forrest@0
   158
    if(return_type.eq."integer") then
forrest@0
   159
      return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
forrest@0
   160
    end if
forrest@0
   161
    if(return_type.eq."float") then
forrest@0
   162
      return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
forrest@0
   163
    end if
forrest@0
   164
    if(return_type.eq."double") then
forrest@0
   165
      return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
forrest@0
   166
    end if
forrest@0
   167
    if(return_type.eq."string") then
forrest@0
   168
      return_array(i) = chartostring(cstrings(i,ibeg:iend))
forrest@0
   169
    end if
forrest@0
   170
    if(return_type.eq."character") then
forrest@0
   171
      if( (iend-ibeg+1) .gt. max_len) then
forrest@0
   172
        max_len = iend-ibeg+1
forrest@0
   173
      end if
forrest@0
   174
      tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend)
forrest@0
   175
    end if
forrest@0
   176
  end do
forrest@0
   177
forrest@0
   178
  if(return_type.eq."character") then
forrest@0
   179
    return_array = new((/nrows,max_len/),"character")
forrest@0
   180
    return_array = tmp_return_array(:,0:max_len-1)
forrest@0
   181
  end if
forrest@0
   182
forrest@0
   183
  return(return_array)
forrest@0
   184
end
forrest@0
   185
forrest@0
   186
forrest@0
   187
;----------------------------------------------------------------------
forrest@0
   188
; This function reads in string fields only to get the maximum string
forrest@0
   189
; length.
forrest@0
   190
;----------------------------------------------------------------------
forrest@0
   191
function get_maxlen(strings,ifield,indices)
forrest@0
   192
local nstring, cstrings, nf, tmp_str
forrest@0
   193
begin
forrest@0
   194
  nrows = dimsizes(strings)
forrest@0
   195
;
forrest@0
   196
; Handle special case if we only have one string. Make sure it
forrest@0
   197
; is put into a 2D array.
forrest@0
   198
;
forrest@0
   199
  if(nrows.eq.1) then
forrest@0
   200
    cstrings = new((/1,strlen(strings)+1/),character)
forrest@0
   201
  end if
forrest@0
   202
forrest@0
   203
  cstrings = stringtochar(strings)
forrest@0
   204
  nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
forrest@0
   205
forrest@0
   206
;
forrest@0
   207
; Error checking. Make sure user has entered a valid field.
forrest@0
   208
;
forrest@0
   209
  if(ifield.le.0.or.ifield.gt.nf) then
forrest@0
   210
    print("read_field: fatal: you've selected a field that is")
forrest@0
   211
    print("out-of-range of the number of fields that you have (" + nf + ").")
forrest@0
   212
    exit
forrest@0
   213
  end if
forrest@0
   214
;
forrest@0
   215
; We don't know what the biggest character array is at this point, so
forrest@0
   216
; make it bigger than necessary, and then resize later as necessary.
forrest@0
   217
;
forrest@0
   218
  tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character")
forrest@0
   219
forrest@0
   220
  max_len = 0     ; Use to keep track of max lengths of strings.
forrest@0
   221
forrest@0
   222
  do i = 0,nrows-1
forrest@0
   223
;
forrest@0
   224
; Special case of first field in row.
forrest@0
   225
;
forrest@0
   226
    if(ifield.eq.1) then
forrest@0
   227
      ibeg = 0
forrest@0
   228
      iend = indices(i,ifield-1)-1
forrest@0
   229
    else
forrest@0
   230
;
forrest@0
   231
; Special case of first field in row.
forrest@0
   232
;
forrest@0
   233
      if(ifield.eq.nf) then
forrest@0
   234
        ibeg = indices(i,ifield-2)+1
forrest@0
   235
        iend = dimsizes(cstrings(i,:))-1
forrest@0
   236
;
forrest@0
   237
; Any field between first and last field.
forrest@0
   238
;
forrest@0
   239
      else
forrest@0
   240
        ibeg = indices(i,ifield-2)+1
forrest@0
   241
        iend = indices(i,ifield-1)-1
forrest@0
   242
      end if  
forrest@0
   243
    end if
forrest@0
   244
    if( (iend-ibeg+1) .gt. max_len) then
forrest@0
   245
      max_len = iend-ibeg+1
forrest@0
   246
    end if
forrest@0
   247
  end do
forrest@0
   248
forrest@0
   249
  return(max_len)
forrest@0
   250
end
forrest@0
   251
forrest@0
   252
;----------------------------------------------------------------------
forrest@0
   253
; Main code.
forrest@0
   254
;----------------------------------------------------------------------
forrest@0
   255
begin
forrest@0
   256
forrest@0
   257
;###############################################################
forrest@0
   258
; Set up defaults.  We are hard-coding here..
forrest@0
   259
forrest@0
   260
  year    = 1991
forrest@0
   261
  station = "USHa1"
forrest@0
   262
  lat     = 42.5378
forrest@0
   263
  lon     = -72.1715 + 360.
forrest@0
   264
forrest@0
   265
  nfields   = 30                        ; # of fields
forrest@0
   266
  delimiter = ","                       ; field delimiter
forrest@0
   267
 
forrest@0
   268
  filename  = station+year+"_L4_m.txt"  ; ASCII" file to read.
forrest@0
   269
  cdf_file  = station+year+"_L4_m.nc"   ; netCDF file to write. 
forrest@0
   270
forrest@0
   271
  if(isfilepresent(cdf_file))
forrest@0
   272
    print("Warning: '" + cdf_file + "' exists. Will remove it.")
forrest@0
   273
    system("/bin/rm " + cdf_file)
forrest@0
   274
  end if
forrest@0
   275
forrest@0
   276
; In this case, fields #1-#2 are integers,
forrest@0
   277
; and the rest of the fields are floats.
forrest@0
   278
forrest@0
   279
  var_types      = new(nfields,string)
forrest@0
   280
  var_types      = "float"       ; Most are floats.
forrest@0
   281
  var_types(0:1) = "integer"
forrest@0
   282
forrest@0
   283
;#####################################################################
forrest@0
   284
forrest@0
   285
; Read in data as strings. This will create a string array that has the
forrest@0
   286
; same number of strings as there are rows in the file. We will then need
forrest@0
   287
; to parse each string later.
forrest@0
   288
forrest@0
   289
  read_data = asciiread(filename,-1,"string")
forrest@0
   290
forrest@0
   291
  header    = read_data(0)        ; Header. Use for variable names.
forrest@0
   292
  data      = read_data(1:)       ; Get rid of first line which is a header.
forrest@0
   293
  nmonth    = dimsizes(data)      ; Number of rows == number of month.
forrest@0
   294
forrest@0
   295
; Read in locations of delimiters in each string row.
forrest@0
   296
forrest@0
   297
  hindices = delim_indices(header,nfields,delimiter)   ; header row
forrest@0
   298
  dindices = delim_indices(data,nfields,delimiter)     ; rest of file
forrest@0
   299
forrest@0
   300
; Read in the field names which will become variable names on
forrest@0
   301
; the netCDF file.
forrest@0
   302
forrest@0
   303
  var_names = new(nfields,string)
forrest@0
   304
forrest@0
   305
  do i=0,nfields-1
forrest@0
   306
    var_names(i) = read_field(header,i+1,hindices,"string")
forrest@0
   307
  end do
forrest@0
   308
forrest@0
   309
;-------------------------------------------------------------------
forrest@0
   310
; Write out this netCDF file efficiently so it will be faster.
forrest@0
   311
; Try to predefine everything before you write to it.
forrest@0
   312
forrest@0
   313
  f = addfile(cdf_file,"c")
forrest@0
   314
  setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
forrest@0
   315
forrest@0
   316
; Write global attributes to file. It's okay to do this before 
forrest@0
   317
; predefining the file's variables. We are still in "define" mode.
forrest@0
   318
forrest@0
   319
  fAtt               = True
forrest@0
   320
  fAtt@description   = "Data read in from " + filename + " ASCII file."
forrest@0
   321
  fAtt@creation_date = systemfunc ("date")        
forrest@0
   322
  fileattdef( f, fAtt )        
forrest@0
   323
forrest@0
   324
; Write dimension names to file.
forrest@0
   325
 
forrest@0
   326
  dim_names = (/ "year",  "month" /)
forrest@0
   327
  dim_sizes = (/ -1    ,  nmonth  /)
forrest@0
   328
  dimUnlim  = (/ True  ,  False   /)
forrest@0
   329
  filedimdef( f, dim_names, dim_sizes, dimUnlim )
forrest@0
   330
forrest@0
   331
  filedimdef( f, "lat", 1, False )
forrest@0
   332
  filedimdef( f, "lon", 1, False )
forrest@0
   333
forrest@0
   334
; Define each variable on the file.
forrest@0
   335
forrest@0
   336
  filevardef( f, "year", "integer", "year" )
forrest@0
   337
  filevardef( f, "lat" , "float"  , "lat" )
forrest@0
   338
  filevardef( f, "lon" , "float"  , "lon" )
forrest@0
   339
forrest@0
   340
  do i=0,nfields-1
forrest@0
   341
forrest@0
   342
;    define variable
forrest@0
   343
forrest@0
   344
     filevardef(f, var_names(i), var_types(i), dim_names)
forrest@0
   345
forrest@0
   346
;    define variable attributes
forrest@0
   347
forrest@0
   348
     if (i.le.1) then
forrest@0
   349
        varAtt = 0
forrest@0
   350
;       varAtt@_FillValue = -999
forrest@0
   351
     else
forrest@0
   352
        varAtt = 0.
forrest@0
   353
;       varAtt@_FillValue = 1.e36
forrest@0
   354
     end if
forrest@0
   355
forrest@0
   356
     varAtt@long_name  = var_names(i)
forrest@0
   357
forrest@0
   358
     filevarattdef( f, var_names(i) , varAtt )
forrest@0
   359
forrest@0
   360
     delete (varAtt)
forrest@0
   361
  end do
forrest@0
   362
;-----------------------------------------------------------------
forrest@0
   363
forrest@0
   364
; Loop through each field, read the values for that field, print 
forrest@0
   365
; information about the variable, and then write it to the netCDF file.
forrest@0
   366
forrest@0
   367
  do i=0,nfields-1
forrest@0
   368
    ifield = i+1                         ; Fields start at #1, not #0.
forrest@0
   369
forrest@0
   370
    tmp_data = new((/1,nmonth/),var_types(i))
forrest@0
   371
forrest@0
   372
    if (i.le.1) then
forrest@0
   373
       tmp_data@_FillValue = -999
forrest@0
   374
    else
forrest@0
   375
       tmp_data@_FillValue = 1.e36
forrest@0
   376
    end if     
forrest@0
   377
forrest@0
   378
    tmp_data(0,:) = read_field(data,ifield,dindices,var_types(i))
forrest@0
   379
  
forrest@0
   380
    tmp_data = where(tmp_data .le. -9000.,tmp_data@_FillValue,tmp_data)
forrest@0
   381
forrest@0
   382
;   change Month to yyyymm
forrest@0
   383
forrest@0
   384
    if (i.eq.0) then
forrest@0
   385
       tmp_data(0,:) = tmp_data(0,:) + year*100
forrest@0
   386
    end if 
forrest@0
   387
forrest@0
   388
; Print some info about the variable.
forrest@0
   389
forrest@0
   390
;   print("")
forrest@0
   391
;   print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").")
forrest@0
   392
;   print("Type is " + var_types(i) + ".")
forrest@0
   393
;   print("min/max = " + min(tmp_data) + "/" + max(tmp_data))
forrest@0
   394
forrest@0
   395
;   if(any(ismissing(tmp_data))) then
forrest@0
   396
;     print("This variable does contain missing values.")
forrest@0
   397
;   else
forrest@0
   398
;     print("This variable doesn't contain missing values.")
forrest@0
   399
;   end if
forrest@0
   400
forrest@0
   401
; write variable to file
forrest@0
   402
forrest@0
   403
    f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
forrest@0
   404
forrest@0
   405
    delete(tmp_data)                   ; Delete for next round.
forrest@0
   406
  end do
forrest@0
   407
forrest@0
   408
; write variable to file
forrest@0
   409
forrest@0
   410
  f->year = year
forrest@0
   411
  f->lat  = lat
forrest@0
   412
  f->lon  = lon 
forrest@0
   413
end