diff -r 000000000000 -r 0c6405ab2ff4 npp/01.read_ascii_933.ncl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/npp/01.read_ascii_933.ncl Mon Jan 26 22:08:20 2009 -0500 @@ -0,0 +1,406 @@ +;---------------------------------------------------------------------- +; This example reads an ASCII file that is formatted a specific way, and +; writes out the results to a netCDF file. +; +; The first line in the ASCII file must be a header, with each field +; separated by a single character delimiter (like a ","). The rest of +; the file must be such that each row contains all fields, each +; separated by the designated delimiter. +; +; The fields can be integer, float, double, character, or string. +; String fields cannot be written to a netCDF file. They have to +; be read in as character arrays and written out that way. +;---------------------------------------------------------------------- + +;---------------------------------------------------------------------- +; This function returns the index locations of the given delimiter +; in a row or several rows of strings. +;---------------------------------------------------------------------- +function delim_indices(strings,nfields,delimiter) +local cstrings, cdelim +begin + nrows = dimsizes(strings) +; +; Handle special case if we only have one string. Make sure it +; is put into a 2D array. +; + if(nrows.eq.1) then + cstrings = new((/1,strlen(strings)+1/),character) + end if + + cstrings = stringtochar(strings) ; Convert to characters. + cdelim = stringtochar(delimiter) ; Convert delimiter to character. +; +; Som error checking here. Make sure delimiter is one character. +; + nc = dimsizes(cdelim) + rank = dimsizes(nc) + if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then + print("delim_indices: fatal: the delimiter you've selected") + print("must be a single character. Can't continue.") + exit + end if + +; +; Create array to hold indices of delimiter locations, and then loop +; through each row and find all the delimiters. Make sure each row has +; the correct number of delimiters. +; + ndelims = nfields-1 + cindices = new((/nrows,ndelims/),integer) + do i = 0, nrows-1 + ii = ind(cstrings(i,:).eq.cdelim(0)) +; +; Make sure there were delimiters on this row. If not, we just quit. +; This could probably be modified to do this more gracefully. +; + if(any(ismissing(ii))) then + print("delim_indices: fatal: I didn't find any delimiters") + print("('" + delimiter + "') on row " + i + ". Can't continue.") + exit + end if + if(dimsizes(ii).ne.ndelims) then + print("delim_indices: fatal: I expected to find " + ndelims) + print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".") + print("Can't continue.") + exit + end if + + cindices(i,:) = ii + + delete(ii) ; For next time through loop + end do + + return(cindices) +end + +;---------------------------------------------------------------------- +; This function reads in a particular field from a string array, +; given the field number to read (fields start at #1 and go to #nfield), +; and the indices of the delimiters. +; +; It returns either an integer, float, double, character, or a string, +; depending on the input flag "return_type". +;---------------------------------------------------------------------- +function read_field(strings,ifield,indices,return_type) +local nstring, cstrings, nf, tmp_str +begin + nrows = dimsizes(strings) +; +; Handle special case if we only have one string. Make sure it +; is put into a 2D array. +; + if(nrows.eq.1) then + cstrings = new((/1,strlen(strings)+1/),character) + end if + + cstrings = stringtochar(strings) + nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) + +; +; Error checking. Make sure user has entered a valid field. +; + if(ifield.le.0.or.ifield.gt.nf) then + print("read_field: fatal: you've selected a field that is") + print("out-of-range of the number of fields that you have (" + nf + ").") + exit + end if + +; +; Set up array to return. For string, int, float, or double arrays, +; we don't have to do anything special. For character arrays, +; however, we do. +; + if(return_type.ne."character") then + return_array = new(nrows,return_type) + else +; +; We don't know what the biggest character array is at this point, so +; make it bigger than necessary, and then resize later as necessary. +; + tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") + + max_len = 0 ; Use to keep track of max lengths of strings. + end if + + do i = 0,nrows-1 +; +; Special case of first field in row. +; + if(ifield.eq.1) then + ibeg = 0 + iend = indices(i,ifield-1)-1 + else +; +; Special case of first field in row. +; + if(ifield.eq.nf) then + ibeg = indices(i,ifield-2)+1 + iend = dimsizes(cstrings(i,:))-1 +; +; Any field between first and last field. +; + else + ibeg = indices(i,ifield-2)+1 + iend = indices(i,ifield-1)-1 + end if + end if +; +; Here's the code that pulls off the correct string, and converts it +; to float if desired. +; + if(return_type.eq."integer") then + return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend))) + end if + if(return_type.eq."float") then + return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend))) + end if + if(return_type.eq."double") then + return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend))) + end if + if(return_type.eq."string") then + return_array(i) = chartostring(cstrings(i,ibeg:iend)) + end if + if(return_type.eq."character") then + if( (iend-ibeg+1) .gt. max_len) then + max_len = iend-ibeg+1 + end if + tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend) + end if + end do + + if(return_type.eq."character") then + return_array = new((/nrows,max_len/),"character") + return_array = tmp_return_array(:,0:max_len-1) + end if + + return(return_array) +end + + +;---------------------------------------------------------------------- +; This function reads in string fields only to get the maximum string +; length. +;---------------------------------------------------------------------- +function get_maxlen(strings,ifield,indices) +local nstring, cstrings, nf, tmp_str +begin + nrows = dimsizes(strings) +; +; Handle special case if we only have one string. Make sure it +; is put into a 2D array. +; + if(nrows.eq.1) then + cstrings = new((/1,strlen(strings)+1/),character) + end if + + cstrings = stringtochar(strings) + nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) + +; +; Error checking. Make sure user has entered a valid field. +; + if(ifield.le.0.or.ifield.gt.nf) then + print("read_field: fatal: you've selected a field that is") + print("out-of-range of the number of fields that you have (" + nf + ").") + exit + end if +; +; We don't know what the biggest character array is at this point, so +; make it bigger than necessary, and then resize later as necessary. +; + tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") + + max_len = 0 ; Use to keep track of max lengths of strings. + + do i = 0,nrows-1 +; +; Special case of first field in row. +; + if(ifield.eq.1) then + ibeg = 0 + iend = indices(i,ifield-1)-1 + else +; +; Special case of first field in row. +; + if(ifield.eq.nf) then + ibeg = indices(i,ifield-2)+1 + iend = dimsizes(cstrings(i,:))-1 +; +; Any field between first and last field. +; + else + ibeg = indices(i,ifield-2)+1 + iend = indices(i,ifield-1)-1 + end if + end if + if( (iend-ibeg+1) .gt. max_len) then + max_len = iend-ibeg+1 + end if + end do + + return(max_len) +end + +;---------------------------------------------------------------------- +; Main code. +;---------------------------------------------------------------------- +begin +; +; Set up defaults here. We are hard-coding the field types here. +; You can set up this script to try to determine the field types +; automatically, but this is a bit tedious. Maybe later. +; + filename = "data.933" ; ASCII" file to read. + cdf_file = filename + ".nc" ; netCDF file to write. + nfields = 45 ; # of fields + delimiter = "," ; field delimiter +; +; In this case, fields #6-#8 are strings, fields #2, #3, and #11 +; are float, and the rest of the fields are integers. +; + var_types = new(nfields,string) + var_strlens = new(nfields,integer) ; var to hold strlens, just in case. + + var_types = "integer" ; Most are ints. + var_types(5:7) = "character" ; Corresponds to fields 6-8. + var_types(1:2) = "float" + var_types(10) = "float" + + if(isfilepresent(cdf_file)) + print("Warning: '" + cdf_file + "' exists. Will remove it.") + system("/bin/rm " + cdf_file) + end if + +; +; Read in data as strings. This will create a string array that has the +; same number of strings as there are rows in the file. We will then need +; to parse each string later. +; + read_data = asciiread(filename,-1,"string") + header = read_data(0) ; Header. Use for variable names. + data = read_data(1:) ; Get rid of first line which is a header. + nrows = dimsizes(data) ; Number of rows. + +; +; Read in locations of delimiters in each string row. +; + hindices = delim_indices(header,nfields,delimiter) ; header row + dindices = delim_indices(data,nfields,delimiter) ; rest of file + +; +; Read in the field names which will become variable names on +; the netCDF file. +; + var_names = new(nfields,string) + + do i=0,nfields-1 + var_names(i) = read_field(header,i+1,hindices,"string") + end do + +; +; Write out this netCDF file efficiently so it will be faster. +; Try to predefine everything before you write to it. +; + f = addfile(cdf_file,"c") + setfileoption(f,"DefineMode",True) ; Enter predefine phase. + +; +; Write global attributes to file. It's okay to do this before +; predefining the file's variables. We are still in "define" mode. +; + fAtt = True + fAtt@description = "Data read in from " + filename + " ASCII file." + fAtt@creation_date = systemfunc ("date") + fileattdef( f, fAtt ) + +; +; Write dimension names to file. If there are no character variables, +; then there's only one dimension name ("nvalues"). +; +; Otherwise, we need to write a dimension name for every character +; variable, which will indicate the maximum string length for that +; variable. +; + indc = ind(var_types.eq."character") + if(.not.any(ismissing(indc))) then +; +; We have to treat the character arrays special here. We need to +; know their sizes so we can write the maximum size of each char +; array to the netCDF file as a dimension name. This means we +; need to read in the character variables once to get the string +; lengths, then we'll read them again later to get the actual values. +; + do i=0,dimsizes(indc)-1 + var_strlens(indc(i)) = get_maxlen(data,indc(i)+1,dindices) + end do + + ndims = dimsizes(indc) + 1 + dimNames = new(ndims,string) + dimSizes = new(ndims,integer) + dimUnlim = new(ndims,logical) + + dimUnlim = False + dimUnlim(0) = True + dimNames(0) = "nvalues" + dimNames(1:ndims-1) = var_names(indc) + "_StrLen" + dimSizes(0) = -1 + dimSizes(1:ndims-1) = var_strlens(indc) + filedimdef(f,dimNames,dimSizes,dimUnlim) + else +; +; No character variables, so just write the one dimension name. +; + filedimdef(f,"nvalues",-1,True) + end if + +; +; Define each variable on the file. +; +; Don't deal with variables that are of type string. +; + do i=0,nfields-1 + if(var_types(i).ne."string") then + if(var_types(i).ne."character") then + filevardef(f, var_names(i), var_types(i), "nvalues") + else + filevardef(f, var_names(i), var_types(i), \ + (/"nvalues",var_names(i)+"_StrLen"/)) + end if + end if + end do + +; +; Loop through each field, read the values for that field, print +; information about the variable, and then write it to the netCDF +; file. +; + do i=0,nfields-1 + ifield = i+1 ; Fields start at #1, not #0. +; +; Note: you can't write strings to a netCDF file, so these have +; to be written out as character arrays. +; + tmp_data = read_field(data,ifield,dindices,var_types(i)) +; +; Print some info about the variable. +; + print("") + print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").") + print("Type is " + var_types(i) + ".") + if(var_types(i).ne."string".and.var_types(i).ne."character") then + print("min/max = " + min(tmp_data) + "/" + max(tmp_data)) + end if + + if(any(ismissing(tmp_data))) then + print("This variable does contain missing values.") + else + print("This variable doesn't contain missing values.") + end if + + f->$var_names(i)$ = tmp_data ; Write to netCDF file. + + delete(tmp_data) ; Delete for next round. + end do +end