rdbstats  2.0.9
Options.cc
1 #include <cstdlib>
2 #include <string>
3 #include <vector>
4 #include <iostream>
5 #include <algorithm>
6 
7 #include <CLI/CLI.hpp>
8 
9 #include <suplibxx/str.h>
10 
11 #include "RdbStats.h"
12 
13 #include "Options.h"
14 #include "config.h"
15 #include "usage.hpp"
16 
17 Options::Options( int argc, char* argv[] ) {
18 
19  CLI::App app( "Compute statistics on an rdb table" );
20 
21  try {
22  std::string group_str;
23  std::string percentile_str;
24  std::string override_str;
25 
26  app.add_option("-i,--input", input, "Input RDB file" )->check(CLI::ExistingFile);
27 
28  app.add_option("-o,--output", output, "Output RDB file" );
29 
30  auto opt_all = app.add_flag("-a,--all", all, "Generate statistics for all numerical columns" );
31 
32  auto opt_group = app.add_option( "-g,--group,-b,--break", group_str, "group data by the specified columns" )->join(',');
33  auto opt_rows = app.add_option( "-r,--rows", rows, "ranges of rows to operate on" );
34 
35  opt_group->excludes( opt_rows );
36 
37  auto normalize_map = std::map<std::string,Normalize>( {
38  { "ave", Normalize::Average },
39  { "med", Normalize::Median },
40  { "none", Normalize::None }
41  } );
42 
43  app.add_option( "-n,--normalize", normalize, "Normalize the output statistics" )
44  ->transform( CLI::Transformer( normalize_map ));
45 
46  app.add_option("-p,--percentiles", percentile_str, "Generate percentile statistics" )->join(',');
47 
48  app.add_flag( "-q,--quartiles", quartiles, "Generate quartile statistics" );
49 
50  app.add_option("-d,--defn", override_str, "override column definition" )->join();
51 
52  app.add_flag( "--manual,--usage", manual, "output detailed help, then exit" );
53 
54  std::string include_exact_str;
55  app.add_option("--include,-I", include_exact_str, "name of column to include" )->join();
56  std::string include_re_str;
57  app.add_option("--include-re,-J", include_re_str, "regular expression matching column to include" )->join();
58 
59  std::string exclude_exact_str;
60  app.add_option("--exclude,-X", exclude_exact_str, "name of column to exclude" )->join();
61  std::string exclude_re_str;
62  app.add_option("--exclude-re,-Y", exclude_re_str, "regular expression matching column to exclude" )->join();
63 
64  auto opt_columns = app.add_option( "columns", include_exact, "specific columns to process" );
65  opt_all->excludes(opt_columns);
66 
67  bool debug = false;
68  app.add_flag( "-D", debug, "debug" );
69 
70  bool version = false;
71  app.add_flag( "--version,-v", version, "output version, then exit" );
72 
73  app.parse( argc, argv );
74 
75  // parse percentiles
76  {
77  using size_type = std::string::size_type;
78  size_type length = percentile_str.size();
79  size_type next;
80 
81  for( size_type first = 0 ; first < length ; first += next + 1) {
82  std::string label{ percentile_str.substr( first, length - first ) };
83  double percentile;
84  try {
85  percentile = std::stod( label, &next );
86  }
87  catch ( std::invalid_argument& e ) {
88  throw Exception( "error parsing percentile (" + label + ")" );
89  }
90  catch ( std::out_of_range& e ) {
91  throw Exception( "percentile (" + label + "): must be a real number within [ 0.0, 100.0 ]" );
92  }
93 
94  if ( next != length - first && label[next] != ',' )
95  throw Exception( "error parsing percentile (" + label + "): unexpected character" + label[next] );
96  if ( percentile < 0 || percentile > 100 )
97  throw Exception( "percentile (" + label + "): must be a real number within [ 0.0, 100.0 ]" );
98  percentiles.emplace_back( "_p" + label.substr(0,next), percentile );
99  }
100  }
101 
102  // add quartiles if
103  // 1. explicitly requested
104  // 2. normalize by median requested and no percentiles specified
105  if ( quartiles || ( normalize == Normalize::Median && percentiles.empty() ) ) {
106  percentiles.emplace_back( "_fq", 25 );
107  percentiles.emplace_back( "_lq", 75 );
108  }
109 
110  // handle column type redefinitions
111  {
112  std::vector<std::string> override_defs;
113  suplib::tok( override_defs, override_str, "\n" );
114 
115  for ( const auto& override_def : override_defs ) {
116  std::vector<std::string> defspec;
117  suplib::tok( defspec, override_def, "," );
118 
119  switch ( defspec.size() ) {
120 
121  case 1:
122  override_defn.emplace_back( defspec[0], OverrideType::Toggle );
123  break;
124 
125  case 2:
126  if ( defspec[1] == "N" ) override_defn.emplace_back( defspec[0], OverrideType::Number );
127  else if ( defspec[1] == "S" ) override_defn.emplace_back( defspec[0], OverrideType::String );
128  else throw Exception( "Unknown definition override type: " + defspec[1] );
129  break;
130 
131  default:
132  throw Exception( "unable to parse definition override specification: " + override_str );
133  break;
134  }
135  }
136  }
137 
138  suplib::tok( group, group_str, "," );
139 
140  std::vector<std::string> include_opt_exact;
141  suplib::tok( include_opt_exact, include_exact_str, "\n" );
142  std::copy( include_opt_exact.begin(), include_opt_exact.end(), std::back_inserter( include_exact ) );
143 
144  suplib::tok( include_re, include_re_str, "\n" );
145  suplib::tok( exclude_exact, exclude_exact_str, "\n" );
146  suplib::tok( exclude_re, exclude_re_str, "\n" );
147 
148  if ( debug )
149  std::cerr << *this;
150 
151  if ( manual ) {
152  usage();
153  exit_requested = true;
154  exit_value = EXIT_SUCCESS;
155  }
156 
157  if ( version ) {
158  std::cout << PACKAGE_STRING << "\n";
159  exit_requested = true;
160  exit_value = EXIT_SUCCESS;
161  }
162  }
163 
164  catch ( const CLI::ParseError &e ) {
165  exit_requested = true;
166  exit_value = app.exit(e);
167  }
168 
169 }
170 
171 void
172 Options::print( std::ostream& os ) {
173 
174  os << "include columns: " << to_string( include_exact ) << "\n";
175  os << "include columns (re): " << to_string( include_re ) << "\n";
176 
177  os << "exclude columns: " << to_string( exclude_exact ) << "\n";
178  os << "exclude columns (re): " << to_string( exclude_re ) << "\n";
179 
180  os << "all: " << ( all ? "true" : "false" ) << "\n";
181 
182  os << "groups: " << to_string( group ) << "\n";
183 
184  os << "input: " << ( input.empty() ? "stdin" : input ) << "\n";
185  os << "output: " <<( output.empty() ? "stdout" : output ) << "\n";
186 
187  os << "normalize: " << to_string( normalize ) << "\n";
188 
189  os << "override: " << to_string( override_defn ) << "\n";
190  os << "percentiles: " << to_string( percentiles ) << "\n";
191 
192  os << "quartiles: " << (quartiles ? "true" : "false") << "\n";
193  os << "rows: " << to_string( rows ) << "\n";
194 }
195 
196 std::ostream& operator << ( std::ostream& os, Options& a ) {
197  a.print( os );
198  return os;
199 }