npp/01.read_81.ncl.x
author Forrest Hoffman <forrest@climatemodeling.org>
Mon, 26 Jan 2009 22:08:20 -0500
changeset 0 0c6405ab2ff4
permissions -rw-r--r--
Initial commit of C-LAMP Diagnostics from Jeff Lee
forrest@0
     1
;----------------------------------------------------------------------
forrest@0
     2
; This example reads an ASCII file that is formatted a specific way, and
forrest@0
     3
; writes out the results to a netCDF file.
forrest@0
     4
;
forrest@0
     5
; The first line in the ASCII file must be a header, with each field
forrest@0
     6
; separated by a single character delimiter (like a ","). The rest of
forrest@0
     7
; the file must be such that each row contains all fields, each
forrest@0
     8
; separated by the designated delimiter.
forrest@0
     9
;
forrest@0
    10
; The fields can be integer, float, double, or string.
forrest@0
    11
;----------------------------------------------------------------------
forrest@0
    12
forrest@0
    13
;----------------------------------------------------------------------
forrest@0
    14
; This function returns the index locations of the given delimiter
forrest@0
    15
; in a row or several rows of strings.
forrest@0
    16
;----------------------------------------------------------------------
forrest@0
    17
function delim_indices(strings,nfields,delimiter)
forrest@0
    18
local cstrings, cdelim
forrest@0
    19
begin
forrest@0
    20
  nrows = dimsizes(strings)
forrest@0
    21
;
forrest@0
    22
; Handle special case if we only have one string. Make sure it
forrest@0
    23
; is put into a 2D array.
forrest@0
    24
;
forrest@0
    25
  if(nrows.eq.1) then
forrest@0
    26
    cstrings = new((/1,strlen(strings)+1/),character)
forrest@0
    27
  end if
forrest@0
    28
forrest@0
    29
  cstrings = stringtochar(strings)        ; Convert to characters.
forrest@0
    30
  cdelim   = stringtochar(delimiter)      ; Convert delimiter to character.
forrest@0
    31
;
forrest@0
    32
; Som error checking here. Make sure delimiter is one character.
forrest@0
    33
;
forrest@0
    34
  nc   = dimsizes(cdelim)
forrest@0
    35
  rank = dimsizes(nc)
forrest@0
    36
  if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then
forrest@0
    37
    print("delim_indices: fatal: the delimiter you've selected")
forrest@0
    38
    print("must be a single character. Can't continue.")
forrest@0
    39
    exit
forrest@0
    40
  end if
forrest@0
    41
forrest@0
    42
;
forrest@0
    43
; Create array to hold indices of delimiter locations, and then loop
forrest@0
    44
; through each row and find all the delimiters. Make sure each row has
forrest@0
    45
; the correct number of delimiters.
forrest@0
    46
;
forrest@0
    47
  ndelims  = nfields-1
forrest@0
    48
  cindices = new((/nrows,ndelims/),integer)
forrest@0
    49
  do i = 0, nrows-1
forrest@0
    50
    ii = ind(cstrings(i,:).eq.cdelim(0))
forrest@0
    51
;
forrest@0
    52
; Make sure there were delimiters on this row. If not, we just quit.
forrest@0
    53
; This could probably be modified to do this more gracefully.
forrest@0
    54
;
forrest@0
    55
    if(any(ismissing(ii))) then
forrest@0
    56
      print("delim_indices: fatal: I didn't find any delimiters")
forrest@0
    57
      print("('" + delimiter + "') on row " + i + ". Can't continue.")
forrest@0
    58
      exit
forrest@0
    59
    end if
forrest@0
    60
    if(dimsizes(ii).ne.ndelims) then
forrest@0
    61
      print("delim_indices: fatal: I expected to find " + ndelims)
forrest@0
    62
      print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".")
forrest@0
    63
      print("Can't continue.")
forrest@0
    64
      exit
forrest@0
    65
    end if
forrest@0
    66
forrest@0
    67
    cindices(i,:) = ii
forrest@0
    68
forrest@0
    69
    delete(ii)            ; For next time through loop
forrest@0
    70
  end do
forrest@0
    71
forrest@0
    72
  return(cindices)
forrest@0
    73
end
forrest@0
    74
forrest@0
    75
;----------------------------------------------------------------------
forrest@0
    76
; This function reads in a particular field from a string array,
forrest@0
    77
; given the field number to read (fields start at 1 and go to nfield),
forrest@0
    78
; and the indices of the delimiters.
forrest@0
    79
;
forrest@0
    80
; It returns either an integer, float, double, or a string,
forrest@0
    81
; depending on the input flag "return_type".
forrest@0
    82
;----------------------------------------------------------------------
forrest@0
    83
function read_field(strings,ifield,indices,return_type)
forrest@0
    84
local nstring, cstrings, nf, tmp_str
forrest@0
    85
begin
forrest@0
    86
  nrows = dimsizes(strings)
forrest@0
    87
;
forrest@0
    88
; Handle special case if we only have one string. Make sure it
forrest@0
    89
; is put into a 2D array.
forrest@0
    90
;
forrest@0
    91
  if(nrows.eq.1) then
forrest@0
    92
    cstrings = new((/1,strlen(strings)+1/),character)
forrest@0
    93
  end if
forrest@0
    94
forrest@0
    95
  cstrings = stringtochar(strings)
forrest@0
    96
  nf       = dimsizes(indices(0,:))+1     ; indices is nrows x (nfields-1)
forrest@0
    97
forrest@0
    98
;
forrest@0
    99
; Error checking. Make sure user has entered a valid field.
forrest@0
   100
;
forrest@0
   101
  if(ifield.le.0.or.ifield.gt.nf) then
forrest@0
   102
    print("read_field: fatal: you've selected a field that is")
forrest@0
   103
    print("out-of-range of the number of fields that you have (" + nf + ").")
forrest@0
   104
    exit
forrest@0
   105
  end if
forrest@0
   106
forrest@0
   107
;
forrest@0
   108
; Set up array to return.
forrest@0
   109
;
forrest@0
   110
  return_array = new(nrows,return_type)
forrest@0
   111
forrest@0
   112
  do i = 0,nrows-1
forrest@0
   113
;
forrest@0
   114
; Special case of first field in row.
forrest@0
   115
;
forrest@0
   116
    if(ifield.eq.1) then
forrest@0
   117
      ibeg = 0
forrest@0
   118
      iend = indices(i,ifield-1)-1
forrest@0
   119
    else
forrest@0
   120
;
forrest@0
   121
; Special case of first field in row.
forrest@0
   122
;
forrest@0
   123
      if(ifield.eq.nf) then
forrest@0
   124
        ibeg = indices(i,ifield-2)+1
forrest@0
   125
        iend = dimsizes(cstrings(i,:))-1
forrest@0
   126
;
forrest@0
   127
; Any field between first and last field.
forrest@0
   128
;
forrest@0
   129
      else
forrest@0
   130
        ibeg = indices(i,ifield-2)+1
forrest@0
   131
        iend = indices(i,ifield-1)-1
forrest@0
   132
      end if  
forrest@0
   133
    end if
forrest@0
   134
;
forrest@0
   135
; Here's the code that pulls off the correct string, and converts it
forrest@0
   136
; to float if desired.
forrest@0
   137
;
forrest@0
   138
    if(return_type.eq."integer") then
forrest@0
   139
      return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend)))
forrest@0
   140
    end if
forrest@0
   141
    if(return_type.eq."float") then
forrest@0
   142
      return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend)))
forrest@0
   143
    end if
forrest@0
   144
    if(return_type.eq."double") then
forrest@0
   145
      return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend)))
forrest@0
   146
    end if
forrest@0
   147
    if(return_type.eq."string") then
forrest@0
   148
      return_array(i) = chartostring(cstrings(i,ibeg:iend))
forrest@0
   149
    end if
forrest@0
   150
  end do
forrest@0
   151
forrest@0
   152
  return(return_array)
forrest@0
   153
end
forrest@0
   154
forrest@0
   155
;----------------------------------------------------------------------
forrest@0
   156
; Main code.
forrest@0
   157
;----------------------------------------------------------------------
forrest@0
   158
begin
forrest@0
   159
;
forrest@0
   160
; Set up defaults here.  We have to hard-code the field types,
forrest@0
   161
; because we can't really tell otherwise.
forrest@0
   162
forrest@0
   163
  filename  = "data.81"                 ; ASCII" file to read.
forrest@0
   164
  cdf_file  = filename + ".nc"          ; netCDF file to write.
forrest@0
   165
  nfields   = 22                        ; # of fields
forrest@0
   166
  delimiter = ","                       ; field delimiter
forrest@0
   167
;
forrest@0
   168
; In this case, fields #6-#8 are strings, fields #2, #3, and #11
forrest@0
   169
; are float, and the rest of the fields are integers.
forrest@0
   170
;
forrest@0
   171
  var_types      = new(nfields,string)
forrest@0
   172
  var_types      = "integer"    ; Most are ints.
forrest@0
   173
  var_types(5:7) = "string"     ; corresponds to fields 6-8
forrest@0
   174
  var_types(1:2) = "float"
forrest@0
   175
  var_types(10)  = "float"
forrest@0
   176
forrest@0
   177
  if(isfilepresent(cdf_file))
forrest@0
   178
    print("Warning: '" + cdf_file + "' exists. Will remove it.")
forrest@0
   179
    system("/bin/rm " + cdf_file)
forrest@0
   180
  end if
forrest@0
   181
forrest@0
   182
;
forrest@0
   183
; Read in data as strings. This will create a string array that has the
forrest@0
   184
; same number of strings as there are rows in the file. We will then need
forrest@0
   185
; to parse each string later.
forrest@0
   186
;
forrest@0
   187
  read_data = asciiread(filename,-1,"string")
forrest@0
   188
  header    = read_data(0)        ; Header. Use for variable names.
forrest@0
   189
  data      = read_data(1:)       ; Get rid of first line which is a header.
forrest@0
   190
  nrows     = dimsizes(data)      ; Number of rows.
forrest@0
   191
forrest@0
   192
;
forrest@0
   193
; Read in locations of delimiters in each string row.
forrest@0
   194
;
forrest@0
   195
  hindices = delim_indices(header,nfields,delimiter)   ; header row
forrest@0
   196
  dindices = delim_indices(data,nfields,delimiter)     ; rest of file
forrest@0
   197
forrest@0
   198
;
forrest@0
   199
; Read in the field names which will become variable names on
forrest@0
   200
; the netCDF file.
forrest@0
   201
;
forrest@0
   202
  var_names = new(nfields,string)
forrest@0
   203
forrest@0
   204
  do i=0,nfields-1
forrest@0
   205
    var_names(i) = read_field(header,i+1,hindices,"string")
forrest@0
   206
  end do
forrest@0
   207
;
forrest@0
   208
; Write out this netCDF file efficiently so it will be faster.
forrest@0
   209
; Try to predefine everything before you write to it.
forrest@0
   210
; 
forrest@0
   211
  f = addfile(cdf_file,"c")
forrest@0
   212
  setfileoption(f,"DefineMode",True)       ; Enter predefine phase.
forrest@0
   213
forrest@0
   214
;
forrest@0
   215
; Write global attributes to file. It's okay to do this before 
forrest@0
   216
; predefining the file's variables. We are still in "define" mode.
forrest@0
   217
;
forrest@0
   218
  fAtt               = True
forrest@0
   219
  fAtt@description   = "Data read in from '" + filename + "' ASCII file."
forrest@0
   220
  fAtt@creation_date = systemfunc ("date")        
forrest@0
   221
  fileattdef( f, fAtt )        
forrest@0
   222
forrest@0
   223
;
forrest@0
   224
; Write dimension name. There's only one here.
forrest@0
   225
;
forrest@0
   226
  filedimdef(f,"nvalues",-1,True)
forrest@0
   227
forrest@0
   228
;
forrest@0
   229
; Define each variable on the file, giving it the dimension name
forrest@0
   230
; we just created above.
forrest@0
   231
;
forrest@0
   232
; Don't deal with variables that are of type string.
forrest@0
   233
;
forrest@0
   234
  do i=0,nfields-1
forrest@0
   235
    if(var_types(i).ne."string") then
forrest@0
   236
      filevardef(f, var_names(i), var_types(i), "nvalues")
forrest@0
   237
    end if
forrest@0
   238
  end do
forrest@0
   239
forrest@0
   240
;
forrest@0
   241
; Loop through each field, read the values for that field, print 
forrest@0
   242
; information about the variable, and then write it to the netCDF
forrest@0
   243
; file.
forrest@0
   244
;
forrest@0
   245
  do i=0,nfields-1
forrest@0
   246
    ifield = i+1           ; Fields start at #1, not #0.
forrest@0
   247
;
forrest@0
   248
; You can't write strings to a netCDF file, so skip the string fields.
forrest@0
   249
;
forrest@0
   250
    if(var_types(i).ne."string") then
forrest@0
   251
      tmp_data = read_field(data,ifield,dindices,var_types(i))
forrest@0
   252
;
forrest@0
   253
; Print some info about the variable.
forrest@0
   254
;
forrest@0
   255
      print("")
forrest@0
   256
      print("Writing variable " + var_names(i) + " (field #" + ifield + ").")
forrest@0
   257
      print("Type is " + var_types(i) + ", min/max = " + min(tmp_data) + \
forrest@0
   258
            "/" + max(tmp_data))
forrest@0
   259
forrest@0
   260
      if(any(ismissing(tmp_data))) then
forrest@0
   261
        print("This variable does contain missing values.")
forrest@0
   262
      else
forrest@0
   263
        print("This variable doesn't contain missing values.")
forrest@0
   264
      end if
forrest@0
   265
forrest@0
   266
      f->$var_names(i)$ = tmp_data       ; Write to netCDF file.
forrest@0
   267
forrest@0
   268
      delete(tmp_data)                   ; Delete for next round.
forrest@0
   269
    end if
forrest@0
   270
  end do
forrest@0
   271
end