gorilla

% Copyright (C) 2012-2017,2018 John E. Davis % % This file is part of the S-Lang Library and may be distributed under the % terms of the GNU General Public License. See the file COPYING for % more information. %--------------------------------------------------------------------------- import ("csv"); private define read_fp_callback (info) { variable line, comment_char = info.comment_char; forever { if (-1 == fgets (&line, info.fp)) return NULL; if ((line[0] == comment_char) && (0 == strnbytecmp (line, info.comment, info.comment_len))) continue; return line; } } private define read_strings_callback (str_info) { variable line; if (str_info.output_crlf) { str_info.output_crlf = 0; return "\n"; } variable i = str_info.i; if (i >= str_info.n) return NULL; line = str_info.strings[i]; str_info.i = i+1; if (line[-1] != '\n') str_info.output_crlf = 1; return line; } private define resize_arrays (list, n) { _for (0, length(list)-1, 1) { variable i = (); variable a = list[i]; variable m = length(a); if (m > n) { list[i] = a[[:n-1]]; continue; } variable b = _typeof(a)[n]; b[[:m-1]] = a; list[i] = b; } } private define atofloat (x) { typecast (atof(x), Float_Type); } private define get_blankrows_bits (val) { if (val == "skip") return CSV_SKIP_BLANK_ROWS; if (val == "stop") return CSV_STOP_BLANK_ROWS; return 0; } private define read_row (csv) { % The blank row handling default is to use that of the csv object. if (qualifier_exists ("blankrows")) { return _csv_decode_row (csv.decoder, get_blankrows_bits (qualifier("blankrows"))); } return _csv_decode_row (csv.decoder); } private define fixup_header_names (names) { if (names == NULL) return names; if (typeof (names) == List_Type) names = list_to_array (names); if (_typeof(names) != String_Type) return names; variable is_scalar = (typeof (names) != Array_Type); if (is_scalar) names = [names]; names = strlow (names); variable i = where (names == ""); names[i] = array_map (String_Type, &sprintf, "col%d", i+1); #iffalse % This code is nolonger necessary since slang now allows arbitrary % structure names. names = strtrans (names, "^\\w", "_"); names = strcompress (names, "_"); _for i (0, length(names)-1, 1) { if ('0' <= names[i][0] <= '9') names[i] = "_" + names[i]; } #endif if (is_scalar) names = names[0]; return names; } private define pop_columns_as_array (n) { if (n == 0) return String_Type[0]; try { % allow a mixture of arrays and scalars variable columns = __pop_list (n); columns = [__push_list(columns)]; return columns; } catch TypeMismatchError: { throw TypeMismatchError, "Column arguments cannot be a mixture of ints and strings"; } } private define read_cols () { if ((_NARGS == 0) || (qualifier_exists ("help"))) { usage("struct = .readcol ([columns] ; qualifiers)\n\ where columns is an optional 1-based array of column numbers,\n\ or array of column names.\n\ Qualifiers:\n\ header=header, fields=[array of field names],\n\ type=value|array|string of 's','i','l','f','d' (str,int,long,float,dbl)\n\ typeNTH=val (specifiy type for NTH column)\n\ snan=\"\", inan=0, lnan=0L, fnan=_NaN, dnan=_NaN (defaults for empty fields),\n\ nanNTH=val (value used for an empty field in the NTH column\n\ " ); } variable columns = NULL; if (_NARGS > 1) { columns = pop_columns_as_array (_NARGS-1); } variable csv = (); variable fields = qualifier ("fields"); variable header = qualifier ("header"); variable types = qualifier ("type"); variable snan = qualifier ("snan", ""); variable dnan = qualifier ("dnan", _NaN); variable fnan = qualifier ("fnan", typecast(_NaN,Float_Type)); variable inan = qualifier ("inan", 0); variable lnan = qualifier ("lnan", 0L); if ((fields != NULL) && (columns != NULL) && (length(fields) != length(columns))) throw InvalidParmError, "The fields qualifier must be the same size as the number of columns"; variable flags = get_blankrows_bits (qualifier("blankrows", "skip")); header = fixup_header_names (header); columns = fixup_header_names (columns); variable columns_are_string = _typeof(columns) == String_Type; if ((header == NULL) && columns_are_string) throw InvalidParmError, "No header was supplied to map column names"; variable column_ints = columns, col, i, j; if (columns_are_string) { column_ints = Int_Type[length(columns)]; _for i (0, length(columns)-1, 1) { col = columns[i]; j = wherefirst (col == header); if (j == NULL) throw InvalidParmError, "Unknown (canonical) column name $col"; column_ints[i] = j+1; } } variable row_data = _csv_decode_row (csv.decoder, flags); if (column_ints == NULL) column_ints = [1:length(row_data)]; if (any(column_ints>length(row_data))) { throw InvalidParmError, "column number is too large for data"; } variable ncols = length(column_ints); variable datastruct = NULL; if (fields == NULL) { if (columns_are_string) fields = columns; else if (header != NULL) fields = header[column_ints-1]; else fields = array_map(String_Type, &sprintf, "col%d", column_ints); } datastruct = @Struct_Type(fields); column_ints -= 1; % make 0-based variable convert_funcs = Ref_Type[ncols], convert_func, val; variable nan_values = {}; loop(ncols) list_append(nan_values, snan); if (types == NULL) { types = qualifier_exists ("auto") ? 'A' : 's'; } if (typeof(types) == List_Type) types = list_to_array (types); if (typeof(types) == String_Type) types = bstring_to_array (types); if ((typeof(types) == Array_Type) && (length(types) != ncols)) throw InvalidParmError, "types array must be equal to the number of columns"; if (typeof (types) != Array_Type) types = types[Int_Type[ncols]]; % single (default) type specified variable i1; _for i (1, ncols, 1) { i1 = i-1; types[i1] = qualifier ("type$i"$, types[i1]); } i = where(types=='i'); convert_funcs[i] = &atoi; nan_values[i] = typecast(inan, Int_Type); i = where(types=='l'); convert_funcs[i] = &atol; nan_values[i] = typecast(lnan, Long_Type); i = where(types=='f'); convert_funcs[i] = &atofloat; nan_values[i] = typecast (fnan, Float_Type); i = where(types=='d'); convert_funcs[i] = &atof; nan_values[i] = typecast(dnan, Double_Type); _for i (1, ncols, 1) { i1 = i-1; if (types[i1] == 'A') { variable type = _slang_guess_type (row_data[i1]); if (type == Double_Type) { convert_funcs[i1] = &atof; nan_values[i1] = dnan; types[i1] = 'd'; } else if (type == Int_Type) { convert_funcs[i1] = &atoi; nan_values[i1] = inan; types[i1] = 'i'; } else types[i1] = 's'; } val = nan_values[i1]; nan_values[i1] = typecast (qualifier ("nan$i"$, val), typeof(val)); } variable list_of_arrays = {}, array; variable init_size = 0x8000; variable dsize = init_size; variable max_allocated = init_size; _for i (0, ncols-1, 1) { val = row_data[column_ints[i]]; array = typeof(nan_values[i])[max_allocated]; ifnot (strbytelen(val)) val = nan_values[i]; else { convert_func = convert_funcs[i]; if (convert_func != NULL) val = (@convert_func)(val); } array[0] = val; list_append (list_of_arrays, array); } variable nread = 1; variable min_row_size = 1+max(column_ints); while (row_data = _csv_decode_row (csv.decoder, flags), row_data != NULL) { if (length (row_data) < min_row_size) { % FIXME-- make what to do here configurable if (length(row_data) == 0) break; continue; } if (nread >= max_allocated) { max_allocated += dsize; resize_arrays (list_of_arrays, max_allocated); } _for i (0, ncols-1, 1) { val = row_data[column_ints[i]]; ifnot (strbytelen(val)) { list_of_arrays[i][nread] = nan_values[i]; continue; } convert_func = convert_funcs[i]; if (convert_func == NULL) { list_of_arrays[i][nread] = val; continue; } list_of_arrays[i][nread] = (@convert_func)(val); } nread++; } resize_arrays (list_of_arrays, nread); set_struct_fields (datastruct, __push_list(list_of_arrays)); return datastruct; } define csv_decoder_new () { if (_NARGS != 1) usage ("\ obj = csv_decoder_new (file|fp|strings ; qualifiers);\n\ Qualifiers:\n\ quote='\"', delim=',', skiplines=0, comment=string"); variable fp = (); variable type = typeof(fp); variable func = &read_fp_callback; variable func_data; variable skiplines = qualifier("skiplines", 0); variable delim = qualifier("delim", ','); variable quote = qualifier("quote", '"'); variable comment = qualifier("comment", NULL); variable comment_char = (comment == NULL) ? NULL : comment[0]; variable flags = get_blankrows_bits (qualifier("blankrows", "skip")); if ((type == Array_Type) || (type == List_Type)) { func = &read_strings_callback; func_data = struct { strings = fp, i = skiplines, n = length(fp), output_crlf = 0, comment_char = comment_char, comment = comment, }; } else { if (type != File_Type) { fp = fopen (fp, "r"); if (fp == NULL) throw OpenError, "Unable to open CSV file"$; } func_data = struct { fp = fp, comment_char = comment_char, comment = comment, comment_len = ((comment == NULL) ? 0 : strbytelen(comment)), }; variable line; loop (skiplines) () = fgets (&line, fp); } variable csv = struct { decoder = _csv_decoder_new (func, func_data, delim, quote, flags), readrow = &read_row, readcol = &read_cols, }; return csv; } % Encoder private define writecol () { if ((_NARGS < 3) || qualifier_exists("help")) { usage("\ writecol (file|fp, list_of_column_data | datastruct | col1,col2,...)\n\ Qualifiers:\n\ names=array-of-column-names, noheader, quoteall, quotesome, rdb\n\ " ); } variable csv, data, file; if (_NARGS == 3) { (csv, file, data) = (); } else { data = __pop_list (_NARGS-2); (csv, file) = (); } variable type = typeof (data); if ((type != List_Type) && (type != Array_Type) && not is_struct_type (data)) data = {data}; variable flags = 0; if (qualifier_exists ("quoteall")) flags |= CSV_QUOTE_ALL; if (qualifier_exists ("quotesome")) flags |= CSV_QUOTE_SOME; variable rdb = qualifier_exists ("rdb"); variable fp = file; if (typeof(file) != File_Type) fp = fopen (file, "wb"); if (fp == NULL) throw OpenError, "Error opening $file in write mode"$; variable names = NULL; ifnot (qualifier_exists ("noheader")) { names = qualifier ("names"); if ((names == NULL) && is_struct_type (data)) names = get_struct_field_names (data); } if (is_struct_type (data)) { variable tmp = {}; data = {(_push_struct_field_values(data), pop())}; list_reverse (data); } EXIT_BLOCK { ifnot (__is_same(file, fp)) { if (-1 == fclose (fp)) throw WriteError, "Error closing $file"$; } } variable ncols = length(data); if (length (data) == 0) return; variable nrows = length(data[0]), i, j; _for i (1, ncols-1, 1) { if (nrows != length(data[i])) throw InvalidParmError, "CSV data columns must be the same length"; } variable str, encoder = csv.encoder; if (names != NULL) { if (typeof (names) == List_Type) names = list_to_array (names); str = _csv_encode_row (encoder, names, flags); if (-1 == fputs (str, fp)) throw WriteError, "Write to CSV file failed"; if (rdb) { variable types = String_Type[ncols]; _for i (0, ncols-1, 1) types[i] = __is_datatype_numeric (_typeof(data[i])) ? "N" : "S"; str = _csv_encode_row (encoder, types, flags); if (-1 == fputs (str, fp)) throw WriteError, "Write to CSV file failed"; } } variable row_data = String_Type[ncols]; _for i (0, nrows-1, 1) { _for j (0, ncols-1, 1) row_data[j] = string (data[j][i]); str = _csv_encode_row (encoder, row_data, flags); if (-1 == fputs (str, fp)) throw WriteError, "Write to CSV file failed"; } } define csv_encoder_new () { if (qualifier_exists ("help")) { usage ("csv = csv_encoder_new ();\n\ Qualifiers:\n\ delim=','\n\ quote='\"'\n\ quotesome, quoteall\n\ rdb\n\ " ); } variable flags = 0; if (qualifier_exists ("quoteall")) flags |= CSV_QUOTE_ALL; if (qualifier_exists ("quotesome")) flags |= CSV_QUOTE_SOME; variable quotechar = qualifier ("quote", '"'); variable delimchar = qualifier ("delim", qualifier_exists ("rdb") ? '\t' : ','); variable csv = struct { encoder = _csv_encoder_new (delimchar, quotechar, flags), writecol = &writecol, }; return csv; } define csv_writecol () { if ((_NARGS < 2) || qualifier_exists("help")) { usage("\ csv_writecol (file|fp, list_of_column_data | datastruct | col1,col2,...)\n\ Qualifiers:\n\ names=array-of-column-names, noheader, quote=val, quoteall, quotesome\n\ " ); } variable args = __pop_list (_NARGS); variable csv = csv_encoder_new (;;__qualifiers); csv.writecol (__push_list(args);;__qualifiers); } private define convert_to_numeric (s, name) { variable val = get_struct_field (s, name); variable num = length (val); if ((num == 0) || (_typeof (val) != String_Type)) return; EXIT_BLOCK { set_struct_field (s, name, val); } variable types = DataType_Type[num]; _for (0, length (val)-1, 1) { variable i = (); variable type = _slang_guess_type (val[i]); if (type == Double_Type) { val = atof (val); return; } types[i] = type; } if (all (types == Int_Type)) { val = atoi (val); return; } if (any (types == Float_Type)) { val = atofloat (val); return; } if (any (types == Long_Type)) { val = atol (val); return; } if (any (types == Int_Type)) { val = atoi (val); return; } val = atof (val); } define csv_readcol () { if ((_NARGS == 0) || qualifier_exists("help")) { usage ("struct = csvreadcol (file|fp [,columns] ;qualifier)\n\ where columns is an optional 1-based array of column numbers,\n\ or array of column names.\n\ Qualifiers:\n\ quote='\"', delim=',', skiplines=0, comment=string, has_header,\n\ header=header, fields=[array of field names],\n\ type=value|array of 's','i','l','f','d' (string,int,long,float,double)\n\ typeNTH=val (specifiy type for NTH column)\n\ snan=\"\", inan=0, lnan=0L, fnan=_NaN, dnan=_NaN (defaults for empty fields),\n\ nanNTH=val (value used for an empty field in the NTH column\n\ " ); } variable file, columns; columns = __pop_list (_NARGS-1); file = (); variable q = __qualifiers (); variable rdb = qualifier_exists ("rdb"); % rdb files are tab-delimited files, # is a comment character, % the first non-comment line contains the field names, the % second line gives the field types. if (rdb) { q = struct { comment = "#", delim = '\t' }; } variable types = NULL; variable csv = csv_decoder_new (file ;; q); if (rdb || qualifier_exists ("has_header")) { variable header = csv.readrow (); q = struct { header=header, @q }; if (rdb) { % The type field consists of an integer, followed by a % type specifier, and a justification character. The % integer and justification characters are for display % purposes. The type specifier is N for numberic, S for % string, M for month. Here, M and S will be treated the % same. types = csv.readrow (); types = strtrans (types, "0-9<>", ""); } } variable s = csv.readcol (__push_list(columns) ;; q); if (rdb) { ifnot (length (columns)) columns = header; header = fixup_header_names (header); foreach (columns) { variable col = (); if (typeof (col) == String_Type) col = fixup_header_names (col); else col = header[col-1]; variable i = wherefirst (col == header); if ((i == NULL) || (types[i] != "N")) continue; convert_to_numeric (s, col); } } return s; }

gorilla

Latest stories

Can You Get Through These 12 Sexy Photos Without Saying WOW? (Open list)

Latest stories

Log In

Sign In

Forgot password?

Your password reset link appears to be invalid or expired.

Log in

Privacy Policy

Add to Collection

No Collections

Hey Friend!Before You Go…

Hey Friend!
Before You Go…