October | 2014 | World v1.0 beta

Continuing on from the previous post, there was another side challenge to implement the POSIX utility wc. Someone claimed C++ makes things unnecessarily hard and the challenge was supposed to prove it. Well, it was simple and I threw in a simple (incomplete) SLOC counter as well. The challenger couldn’t argue that C++ made it hard to implement wc, and so decided to nitpick on small things that do not even relate to the challenge at hand, mostly around coding style preferences that have nothing to do with the ease of implementing the core functionality of wc.

I’m by no means the best C++ coder in terms of complexity or style. Judge for yourself whether or not this was impossible to do cleanly in C++:

#include <iostream>
#include <fstream>
#include <sstream>
#include <algorithm>

enum class char_opts
{
  BYTES, 
  CHARS, 
  NUM_OPTS
};

void count(std::istream& _in, unsigned& sloc_count, unsigned &line_count, unsigned &word_count, unsigned &char_count, unsigned &byte_count, unsigned &max_line_length, unsigned &find_count, const std::string &str)
{
  std::string line;
  std::getline(_in,  line);
  bool in_block_comment = false;
  for (unsigned lc = 0; _in; std::getline(_in, line), ++lc)
  {
    byte_count += line.length();
    char_count += line.length();
    if (!_in.eof())
    {
      ++line_count;
      ++byte_count;
      ++char_count;
    }
    max_line_length = std::max<unsigned>(max_line_length, line.length());
    if (!str.empty()) for (auto s = line.find(str); s != std::string::npos; s = line.find(str, s+1), ++find_count);

    std::istringstream line_str{line};
    std::skipws(line_str);
    std::string word;
    line_str >> word;
    for (; line_str; line_str >> word) ++word_count;

    auto trimmed = line;
    trimmed.erase(0, trimmed.find_first_not_of(" t"));
    auto trailing = trimmed.find_last_not_of(" t");
    if (trailing != std::string::npos) trimmed.erase(trailing);
    if (!trimmed.empty() && trimmed != "{" && trimmed != "}" && trimmed.find("//") != 0) ++sloc_count;
  }
}

int main(int _c, char** _v)
{
  char_opts copts = char_opts::NUM_OPTS;
  bool sloc = false;
  bool lines = false;
  bool words = false;
  bool line_length = false;
  std::string str;

  bool opts_supplied = false;

  auto args = _v + 1;
  const auto end = _v + _c;
  for (; args < end; ++args)
  {
    std::string arg{*args};
    if (arg == "-" || arg[0] != '-') break;

    if (arg == "-c" || arg == "-bytes") copts = char_opts::BYTES;
    else if (arg == "-m" || arg == "-chars") copts = char_opts::CHARS;
    else if (arg == "-L" || arg == "-max-line-length") line_length = true;
    else if (arg == "-sloc") sloc = true;
    else if (arg == "-l" || arg == "-lines") lines = true;
    else if (arg == "-w" || arg == "-words") words = true;
    else if (arg == "-o") str = *++args;
    else
    {
      std::cerr << "Invalid argument '" <<  arg << ''' << std::endl;
      return -1;
    }

    opts_supplied = true;
  }

  if (!opts_supplied)
  {
    copts = char_opts::BYTES;
    lines = true;
    words = true;
    line_length = true;
  }

  unsigned file_count = 0;
  unsigned total_sloc_count = 0;
  unsigned total_line_count = 0;
  unsigned total_word_count = 0;
  unsigned total_char_count = 0;
  unsigned total_byte_count = 0;
  unsigned total_max_line_length = 0;
  unsigned total_find_count = 0;
  for (bool no_file = args == end; no_file || args < end; ++args, ++file_count, no_file = false)
  {
    std::string filename{no_file ? "" : *args};
    unsigned sloc_count = 0;
    unsigned line_count = 0;
    unsigned word_count = 0;
    unsigned char_count = 0;
    unsigned byte_count = 0;
    unsigned max_line_length = 0;
    unsigned find_count = 0;
    if (no_file || filename == "-") std::cin.clear();
    count(no_file || filename == "-" ? std::cin : std::move(std::ifstream{filename}), sloc_count, line_count, word_count, char_count, byte_count, max_line_length, find_count, str);
    std::cout << (sloc ? std::to_string(sloc_count) + " " : "")
          << (lines ? std::to_string(line_count) + " " : "")
          << (words ? std::to_string(word_count) + " " : "")
          << (copts != char_opts::NUM_OPTS ? std::to_string(copts == char_opts::BYTES ? byte_count : char_count) + " " : "")
          << (line_length ? std::to_string(max_line_length) + " " : "")
          << (!str.empty() ? std::to_string(find_count) + " " : "")
          << filename <<  std::endl;

    total_sloc_count += sloc_count;
    total_line_count += line_count;
    total_word_count += word_count;
    total_char_count += char_count;
    total_byte_count += byte_count;
    total_max_line_length = std::max(total_max_line_length, max_line_length);
  }

  if (file_count > 1) std::cout << (sloc ? std::to_string(total_sloc_count) + " " : "")
        << (lines ? std::to_string(total_line_count) + " " : "")
        << (words ? std::to_string(total_word_count) + " " : "")
        << (copts != char_opts::NUM_OPTS ? std::to_string(copts == char_opts::BYTES ? total_byte_count : total_char_count) + " " : "")
        << (line_length ? std::to_string(total_max_line_length) + " " : "")
        << (!str.empty() ? std::to_string(total_find_count) + " " : "")
        << "total" <<  std::endl;

  return 0;
}

Recently had a discussion and challenge in comparing two languages, C++ and Python. I think modern C++ is holds up really well to so-called scripting languages to do quick and dirty utility programs. This is a reasonably short implementation of a prime number finder:

#include <cstdio>
inline bool prime(const auto _candidate, const auto *_first, const auto *_last) {
  for (auto p = _first; p != _last && *p * *p <= _candidate; ++p)
    if (_candidate % *p == 0)
      return false;
  return true;
}
int main(int _c, char** _v) {
  const unsigned num_primes = 10000;
  static unsigned primes[num_primes] = {2, 3};
  for (unsigned i = 2; i < num_primes; ++i)
    for (primes[i] = primes[i-1] + 2; !prime(primes[i], primes + 1, primes + i); primes[i] += 2);
  printf("The %uth prime is: %u.n", num_primes, primes[num_primes - 1]);
  return 0;
}

Several things. Mostly I’ve learned to re-embrace the spirit of C/C++ for brevity, such as single statement if and for blocks. But you really have to think about readability when you code in that style. The brief C style is only bad if it’s done without consideration about code aesthetics. The brief style shouldn’t be about reducing line count, but about increasing readability. It is a bit counter-intuitive coming from a university education that told you to put every if block in braces over multiple lines.

When coded in such a manner, modern C++ can approach the ease of writing that languages like Python enjoy.

World v1.0 beta

Re-engineering the world a line at a time

Month: October 2014

Radix sort – another language comparison exercise

More on brevity and clarity

Interesting exercise in brevity and clarity