#include <iostream>
#include <algorithm>
#include <chrono>
#include <vector>
#include <map>
#include <numeric>
#include <utility>
#include <cmath>


inline uint64_t hash64(uint64_t u) {
  uint64_t v = u * 3935559000370003845ul + 2691343689449507681ul;
  v ^= v >> 21;
  v ^= v << 37;
  v ^= v >> 4;
  v *= 4768777513237032717ul;
  v ^= v << 20;
  v ^= v >> 41;
  v ^= v << 5;
  return v;
}

std::vector<uint64_t>
scramble(uint64_t target_sortedness, std::vector<uint64_t> input) {
  std::sort(input.begin(), input.end());

  size_t n = input.size();
  std::vector<uint64_t> redirects;
  for (uint64_t i = 0; i < n; i++) {
    if (((hash64(i) & 0xFFFF) % (uint64_t)100) >= target_sortedness) {
      redirects.push_back((hash64(i+n) & 0xFFFFFFFF) % n);
    } else {
      redirects.push_back(i);
    }
  }

  std::vector<uint64_t> ids;
  for (uint64_t i = 0; i < n; i++) ids.push_back(i);

  std::stable_sort(ids.begin(), ids.end(), [&](uint64_t i1, uint64_t i2) {
    return redirects[i1] < redirects[i2];
  });

  std::vector<uint64_t> result;
  for (uint64_t i = 0; i < n; i++) {
    result.push_back(input[ids[i]]);
  }

  return result;
}

int main(int argc, char* argv[]) {
  auto usage = "Usage: main <n> <k> [<target-sortedness>]\n  (sort and deduplicate n randomly generated elements with approximately k unique elements)";
  if (argc < 3 || argc > 4) {
    std::cout << usage << std::endl;
    return 1;
  }

  long n;
  long k;
  long target_sortedness = 60;
  try {
    n = std::stol(argv[1]); 
    k = std::stol(argv[2]);
    if (argc == 4) {
      target_sortedness = std::stol(argv[3]);
    }
  }
  catch (...) {
    std::cout << usage << std::endl;
    return 1;
  }

  size_t approx_num_unique = k;
  std::cout << "n " << n << std::endl;
  std::cout << "k " << k << std::endl;
  std::cout << "target-sortedness " << target_sortedness << std::endl;

  std::cout << "generating input... (might take a moment)" << std::endl;
  std::vector<uint64_t> data(n);
  for (size_t i = 0; i < n; i++) {
    data[i] = hash64(i) % approx_num_unique;
  }

  std::vector<uint64_t> input = scramble(target_sortedness, data);
  std::cout << "input ";
  for (int i = 0; i < std::min(input.size(), (size_t)20); i++) {
    std::cout << input[i] << " ";
  }
  std::cout << "..." << std::endl;

  std::vector<std::pair<uint64_t, uint64_t>> uniques;

  auto setup = [&]{ uniques.clear(); uniques.resize(0); };
  auto bench = [&]{
    std::map<uint64_t, uint64_t> counts;
    for (uint64_t elem : input) {
      counts[elem]++;
    }
    uniques.assign(counts.begin(), counts.end());
  };


  // =========================================================================
  // WARMUP LOOP
  
  std::cout << "warming up for 3 seconds..." << std::endl;
  auto threshold = std::chrono::duration<double>(3.0);
  auto warmup_start = std::chrono::high_resolution_clock::now();
  while (std::chrono::high_resolution_clock::now() - warmup_start < threshold) {
    setup();
    bench();
  }

  std::cout << "warmup done." << std::endl;

  // =========================================================================
  // TIMING LOOP

  std::vector<std::chrono::duration<double>> times;

  for (int r = 0; r < 10; r++) {
    setup();

    auto start = std::chrono::high_resolution_clock::now();

    bench();

    auto stop = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> diff = stop - start;
    std::cout << "time " << diff.count() << std::endl;

    times.push_back(diff);
  }


  std::chrono::duration<double> total_time = std::accumulate(
    times.begin(), times.end(), std::chrono::duration<double>(0.0));
  std::cout
    << "average "
    << (total_time / times.size()).count()
    << std::endl;


  for (int i = 0; i < std::min(uniques.size(), (size_t)10); i++) {
    std::cout << "(" << uniques[i].first << "," << uniques[i].second << ") ";
  }
  std::cout << "..." << std::endl;
  std::cout << "num unique " << uniques.size() << std::endl;

  return 0;
}
