forrest@0: ;---------------------------------------------------------------------- forrest@0: ; This example reads an ASCII file that is formatted a specific way, and forrest@0: ; writes out the results to a netCDF file. forrest@0: ; forrest@0: ; The first line in the ASCII file must be a header, with each field forrest@0: ; separated by a single character delimiter (like a ","). The rest of forrest@0: ; the file must be such that each row contains all fields, each forrest@0: ; separated by the designated delimiter. forrest@0: ; forrest@0: ; The fields can be integer, float, double, character, or string. forrest@0: ; String fields cannot be written to a netCDF file. They have to forrest@0: ; be read in as character arrays and written out that way. forrest@0: ;---------------------------------------------------------------------- forrest@0: forrest@0: ;---------------------------------------------------------------------- forrest@0: ; This function returns the index locations of the given delimiter forrest@0: ; in a row or several rows of strings. forrest@0: ;---------------------------------------------------------------------- forrest@0: function delim_indices(strings,nfields,delimiter) forrest@0: local cstrings, cdelim forrest@0: begin forrest@0: nrows = dimsizes(strings) forrest@0: ; forrest@0: ; Handle special case if we only have one string. Make sure it forrest@0: ; is put into a 2D array. forrest@0: ; forrest@0: if(nrows.eq.1) then forrest@0: cstrings = new((/1,strlen(strings)+1/),character) forrest@0: end if forrest@0: forrest@0: cstrings = stringtochar(strings) ; Convert to characters. forrest@0: cdelim = stringtochar(delimiter) ; Convert delimiter to character. forrest@0: ; forrest@0: ; Som error checking here. Make sure delimiter is one character. forrest@0: ; forrest@0: nc = dimsizes(cdelim) forrest@0: rank = dimsizes(nc) forrest@0: if(rank.ne.1.or.(rank.eq.1.and.nc.ne.2)) then forrest@0: print("delim_indices: fatal: the delimiter you've selected") forrest@0: print("must be a single character. Can't continue.") forrest@0: exit forrest@0: end if forrest@0: forrest@0: ; forrest@0: ; Create array to hold indices of delimiter locations, and then loop forrest@0: ; through each row and find all the delimiters. Make sure each row has forrest@0: ; the correct number of delimiters. forrest@0: ; forrest@0: ndelims = nfields-1 forrest@0: cindices = new((/nrows,ndelims/),integer) forrest@0: do i = 0, nrows-1 forrest@0: ii = ind(cstrings(i,:).eq.cdelim(0)) forrest@0: ; forrest@0: ; Make sure there were delimiters on this row. If not, we just quit. forrest@0: ; This could probably be modified to do this more gracefully. forrest@0: ; forrest@0: if(any(ismissing(ii))) then forrest@0: print("delim_indices: fatal: I didn't find any delimiters") forrest@0: print("('" + delimiter + "') on row " + i + ". Can't continue.") forrest@0: exit forrest@0: end if forrest@0: if(dimsizes(ii).ne.ndelims) then forrest@0: print("delim_indices: fatal: I expected to find " + ndelims) forrest@0: print("delimiters on row " + i + ". Instead, I found " + dimsizes(ii) + ".") forrest@0: print("Can't continue.") forrest@0: exit forrest@0: end if forrest@0: forrest@0: cindices(i,:) = ii forrest@0: forrest@0: delete(ii) ; For next time through loop forrest@0: end do forrest@0: forrest@0: return(cindices) forrest@0: end forrest@0: forrest@0: ;---------------------------------------------------------------------- forrest@0: ; This function reads in a particular field from a string array, forrest@0: ; given the field number to read (fields start at #1 and go to #nfield), forrest@0: ; and the indices of the delimiters. forrest@0: ; forrest@0: ; It returns either an integer, float, double, character, or a string, forrest@0: ; depending on the input flag "return_type". forrest@0: ;---------------------------------------------------------------------- forrest@0: function read_field(strings,ifield,indices,return_type) forrest@0: local nstring, cstrings, nf, tmp_str forrest@0: begin forrest@0: nrows = dimsizes(strings) forrest@0: ; forrest@0: ; Handle special case if we only have one string. Make sure it forrest@0: ; is put into a 2D array. forrest@0: ; forrest@0: if(nrows.eq.1) then forrest@0: cstrings = new((/1,strlen(strings)+1/),character) forrest@0: end if forrest@0: forrest@0: cstrings = stringtochar(strings) forrest@0: nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) forrest@0: forrest@0: ; forrest@0: ; Error checking. Make sure user has entered a valid field. forrest@0: ; forrest@0: if(ifield.le.0.or.ifield.gt.nf) then forrest@0: print("read_field: fatal: you've selected a field that is") forrest@0: print("out-of-range of the number of fields that you have (" + nf + ").") forrest@0: exit forrest@0: end if forrest@0: forrest@0: ; forrest@0: ; Set up array to return. For string, int, float, or double arrays, forrest@0: ; we don't have to do anything special. For character arrays, forrest@0: ; however, we do. forrest@0: ; forrest@0: if(return_type.ne."character") then forrest@0: return_array = new(nrows,return_type) forrest@0: else forrest@0: ; forrest@0: ; We don't know what the biggest character array is at this point, so forrest@0: ; make it bigger than necessary, and then resize later as necessary. forrest@0: ; forrest@0: tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") forrest@0: forrest@0: max_len = 0 ; Use to keep track of max lengths of strings. forrest@0: end if forrest@0: forrest@0: do i = 0,nrows-1 forrest@0: ; forrest@0: ; Special case of first field in row. forrest@0: ; forrest@0: if(ifield.eq.1) then forrest@0: ibeg = 0 forrest@0: iend = indices(i,ifield-1)-1 forrest@0: else forrest@0: ; forrest@0: ; Special case of first field in row. forrest@0: ; forrest@0: if(ifield.eq.nf) then forrest@0: ibeg = indices(i,ifield-2)+1 forrest@0: iend = dimsizes(cstrings(i,:))-1 forrest@0: ; forrest@0: ; Any field between first and last field. forrest@0: ; forrest@0: else forrest@0: ibeg = indices(i,ifield-2)+1 forrest@0: iend = indices(i,ifield-1)-1 forrest@0: end if forrest@0: end if forrest@0: ; forrest@0: ; Here's the code that pulls off the correct string, and converts it forrest@0: ; to float if desired. forrest@0: ; forrest@0: if(return_type.eq."integer") then forrest@0: return_array(i) = stringtointeger(chartostring(cstrings(i,ibeg:iend))) forrest@0: end if forrest@0: if(return_type.eq."float") then forrest@0: return_array(i) = stringtofloat(chartostring(cstrings(i,ibeg:iend))) forrest@0: end if forrest@0: if(return_type.eq."double") then forrest@0: return_array(i) = stringtodouble(chartostring(cstrings(i,ibeg:iend))) forrest@0: end if forrest@0: if(return_type.eq."string") then forrest@0: return_array(i) = chartostring(cstrings(i,ibeg:iend)) forrest@0: end if forrest@0: if(return_type.eq."character") then forrest@0: if( (iend-ibeg+1) .gt. max_len) then forrest@0: max_len = iend-ibeg+1 forrest@0: end if forrest@0: tmp_return_array(i,0:iend-ibeg) = cstrings(i,ibeg:iend) forrest@0: end if forrest@0: end do forrest@0: forrest@0: if(return_type.eq."character") then forrest@0: return_array = new((/nrows,max_len/),"character") forrest@0: return_array = tmp_return_array(:,0:max_len-1) forrest@0: end if forrest@0: forrest@0: return(return_array) forrest@0: end forrest@0: forrest@0: forrest@0: ;---------------------------------------------------------------------- forrest@0: ; This function reads in string fields only to get the maximum string forrest@0: ; length. forrest@0: ;---------------------------------------------------------------------- forrest@0: function get_maxlen(strings,ifield,indices) forrest@0: local nstring, cstrings, nf, tmp_str forrest@0: begin forrest@0: nrows = dimsizes(strings) forrest@0: ; forrest@0: ; Handle special case if we only have one string. Make sure it forrest@0: ; is put into a 2D array. forrest@0: ; forrest@0: if(nrows.eq.1) then forrest@0: cstrings = new((/1,strlen(strings)+1/),character) forrest@0: end if forrest@0: forrest@0: cstrings = stringtochar(strings) forrest@0: nf = dimsizes(indices(0,:))+1 ; indices is nrows x (nfields-1) forrest@0: forrest@0: ; forrest@0: ; Error checking. Make sure user has entered a valid field. forrest@0: ; forrest@0: if(ifield.le.0.or.ifield.gt.nf) then forrest@0: print("read_field: fatal: you've selected a field that is") forrest@0: print("out-of-range of the number of fields that you have (" + nf + ").") forrest@0: exit forrest@0: end if forrest@0: ; forrest@0: ; We don't know what the biggest character array is at this point, so forrest@0: ; make it bigger than necessary, and then resize later as necessary. forrest@0: ; forrest@0: tmp_return_array = new((/nrows,dimsizes(cstrings(0,:))/),"character") forrest@0: forrest@0: max_len = 0 ; Use to keep track of max lengths of strings. forrest@0: forrest@0: do i = 0,nrows-1 forrest@0: ; forrest@0: ; Special case of first field in row. forrest@0: ; forrest@0: if(ifield.eq.1) then forrest@0: ibeg = 0 forrest@0: iend = indices(i,ifield-1)-1 forrest@0: else forrest@0: ; forrest@0: ; Special case of first field in row. forrest@0: ; forrest@0: if(ifield.eq.nf) then forrest@0: ibeg = indices(i,ifield-2)+1 forrest@0: iend = dimsizes(cstrings(i,:))-1 forrest@0: ; forrest@0: ; Any field between first and last field. forrest@0: ; forrest@0: else forrest@0: ibeg = indices(i,ifield-2)+1 forrest@0: iend = indices(i,ifield-1)-1 forrest@0: end if forrest@0: end if forrest@0: if( (iend-ibeg+1) .gt. max_len) then forrest@0: max_len = iend-ibeg+1 forrest@0: end if forrest@0: end do forrest@0: forrest@0: return(max_len) forrest@0: end forrest@0: forrest@0: ;---------------------------------------------------------------------- forrest@0: ; Main code. forrest@0: ;---------------------------------------------------------------------- forrest@0: begin forrest@0: forrest@0: ; Set up defaults here. We are hard-coding the field types here. forrest@0: ; You can set up this script to try to determine the field types forrest@0: ; automatically, but this is a bit tedious. Maybe later. forrest@0: forrest@0: station = "USHa1" forrest@0: year = 1991 forrest@0: nfields = 30 ; # of fields forrest@0: delimiter = "," ; field delimiter forrest@0: forrest@0: filename = station+year+"_L4_m.txt" ; ASCII" file to read. forrest@0: cdf_file = station+year+"_L4_m.nc" ; netCDF file to write. forrest@0: forrest@0: ; In this case, fields #2-#2 are integers, forrest@0: ; and the rest of the fields are floats. forrest@0: forrest@0: var_types = new(nfields,string) forrest@0: var_strlens = new(nfields,integer) ; var to hold strlens, just in case. forrest@0: forrest@0: var_types = "float" ; Most are floats. forrest@0: var_types(0:1) = "integer" forrest@0: forrest@0: if(isfilepresent(cdf_file)) forrest@0: print("Warning: '" + cdf_file + "' exists. Will remove it.") forrest@0: system("/bin/rm " + cdf_file) forrest@0: end if forrest@0: forrest@0: ; Read in data as strings. This will create a string array that has the forrest@0: ; same number of strings as there are rows in the file. We will then need forrest@0: ; to parse each string later. forrest@0: forrest@0: read_data = asciiread(filename,-1,"string") forrest@0: header = read_data(0) ; Header. Use for variable names. forrest@0: data = read_data(1:) ; Get rid of first line which is a header. forrest@0: nrows = dimsizes(data) ; Number of rows. forrest@0: forrest@0: ; Read in locations of delimiters in each string row. forrest@0: forrest@0: hindices = delim_indices(header,nfields,delimiter) ; header row forrest@0: dindices = delim_indices(data,nfields,delimiter) ; rest of file forrest@0: forrest@0: print (hindices) forrest@0: print (dindices) forrest@0: forrest@0: ; Read in the field names which will become variable names on forrest@0: ; the netCDF file. forrest@0: forrest@0: var_names = new(nfields,string) forrest@0: forrest@0: do i=0,nfields-1 forrest@0: var_names(i) = read_field(header,i+1,hindices,"string") forrest@0: end do forrest@0: forrest@0: ; Write out this netCDF file efficiently so it will be faster. forrest@0: ; Try to predefine everything before you write to it. forrest@0: forrest@0: f = addfile(cdf_file,"c") forrest@0: setfileoption(f,"DefineMode",True) ; Enter predefine phase. forrest@0: forrest@0: ; Write global attributes to file. It's okay to do this before forrest@0: ; predefining the file's variables. We are still in "define" mode. forrest@0: forrest@0: fAtt = True forrest@0: fAtt@description = "Data read in from " + filename + " ASCII file." forrest@0: fAtt@creation_date = systemfunc ("date") forrest@0: fileattdef( f, fAtt ) forrest@0: forrest@0: ; Write dimension names to file. If there are no character variables, forrest@0: ; then there's only one dimension name ("nvalues"). forrest@0: forrest@0: nyear = -1 forrest@0: nmonth = 12 forrest@0: forrest@0: dim_names = (/ "year", "month" /) forrest@0: dim_sizes = (/ nyear , nmonth /) forrest@0: dimUnlim = (/ True , False /) forrest@0: forrest@0: filedimdef( f, dim_names, dim_sizes, dimUnlim ) forrest@0: forrest@0: ; Define each variable on the file. forrest@0: ; forrest@0: ; Don't deal with variable Month (i=0). forrest@0: forrest@0: do i=1,nfields-1 forrest@0: filevardef(f, var_names(i), var_types(i), dim_names) forrest@0: end do forrest@0: forrest@0: ; Loop through each field, read the values for that field, print forrest@0: ; information about the variable, and then write it to the netCDF forrest@0: ; file. forrest@0: forrest@0: do i=1,nfields-1 forrest@0: ifield = i+1 ; Fields start at #1, not #0. forrest@0: forrest@0: ; Note: you can't write strings to a netCDF file, so these have forrest@0: ; to be written out as character arrays. forrest@0: forrest@0: tmp_data = new((/1,nmonth/),var_types(i)) forrest@0: forrest@0: out_data = read_field(data,ifield,dindices,var_types(i)) forrest@0: forrest@0: tmp_data(0,:) = out_data(:) forrest@0: forrest@0: ; Print some info about the variable. forrest@0: forrest@0: print("") forrest@0: print("Writing variable '" + var_names(i) + "' (field #" + ifield + ").") forrest@0: print("Type is " + var_types(i) + ".") forrest@0: print("min/max = " + min(tmp_data) + "/" + max(tmp_data)) forrest@0: forrest@0: if(any(ismissing(tmp_data))) then forrest@0: print("This variable does contain missing values.") forrest@0: else forrest@0: print("This variable doesn't contain missing values.") forrest@0: end if forrest@0: forrest@0: f->$var_names(i)$ = tmp_data ; Write to netCDF file. forrest@0: forrest@0: delete(tmp_data) ; Delete for next round. forrest@0: delete(out_data) forrest@0: end do forrest@0: end