// Searches for good delimiters to cut streams into relatively well sized
// segments.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <boost/cstdint.hpp>
#include <boost/array.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/thread.hpp>
#include <boost/bind.hpp>
#include <boost/shared_ptr.hpp>
#include <iostream>
#include <vector>
#include <map>

// Desired size range
#define MIN_DESIRED_SIZE 4096
#define MAX_DESIRED_SIZE 131072

#define DELIMITER_SET_SIZE 1
typedef boost::array<boost::uint16_t,DELIMITER_SET_SIZE> DelimArray;

struct BestEntry
{
  DelimArray best;
  double bestScore;
  std::vector<unsigned char> data;
};

boost::mutex bestLock;
boost::mutex outLock;
std::map<std::string,BestEntry> best;

static void runThread(const std::string &fileName)
{
  char tmp[4096];

  boost::mt19937 prng;
  {
    boost::uint32_t seed;
    FILE *ur = fopen("/dev/urandom","r");
    fread((void *)&seed,1,sizeof(seed),ur);
    fclose(ur);
    prng.seed(seed);
  }

  BestEntry *myEntry;
  {
    boost::mutex::scoped_lock l(bestLock);
    myEntry = &(best[fileName]);
    myEntry->bestScore = 99999999.0;
  }

  {
    boost::mutex::scoped_lock l(outLock);

    std::cout << "*** Reading test data from: " << fileName << std::endl;
    FILE *f = fopen(fileName.c_str(),"r");
    if (f) {
      int n;
      while ((n = fread((void *)tmp,1,sizeof(tmp),f)) > 0) {
        for(int i=0;i<n;++i)
          myEntry->data.push_back((unsigned char)tmp[i]);
      }
      fclose(f);
    }

    if (myEntry->data.size() <= 0) {
      std::cout << "Error: no data read." << std::endl;
      exit(1);
    } else std::cout << "*** Read " << myEntry->data.size() << " bytes of test data." << std::endl;

    std::cout.flush();
  }

  DelimArray current;
  for(unsigned int i=0;i<DELIMITER_SET_SIZE;++i)
    current[i] = (boost::uint16_t)prng();

  for(;;) {
    unsigned long numTooShort = 0;
    unsigned long numTooLong = 0;
    unsigned long numGood = 0;

    boost::uint32_t shiftRegister = 0;
    unsigned long segSize = 0;
    for(std::vector<unsigned char>::iterator i=myEntry->data.begin();i!=myEntry->data.end();++i) {
      shiftRegister <<= 1;
      shiftRegister |= (((boost::uint32_t)*i) & 1);

      ++segSize;

      boost::uint16_t transformedShiftRegister = (boost::uint16_t)(shiftRegister);

      for(DelimArray::iterator d=current.begin();d!=current.end();++d) {
        if (transformedShiftRegister == *d) {
          if (segSize < MIN_DESIRED_SIZE)
            ++numTooShort;
          else if (segSize > MAX_DESIRED_SIZE)
            ++numTooLong;
          else ++numGood;
          segSize = 0;
          break;
        }
      }
    }
    if (segSize) {
      if (segSize < MIN_DESIRED_SIZE)
        ++numTooShort;
      else if (segSize > MAX_DESIRED_SIZE)
        ++numTooLong;
      else ++numGood;
    }

    if (numGood) {
      double score = ((double)(numTooShort + numTooLong)) / ((double)numGood);

      if (score < myEntry->bestScore) {
        myEntry->best = current;
        myEntry->bestScore = score;

        boost::mutex::scoped_lock l(outLock);

        std::cout << fileName << ": ";

        for(DelimArray::iterator d=current.begin();d!=current.end();++d) {
          sprintf(tmp,"0x%.4x",(unsigned int)*d);
          if (d != current.begin())
            std::cout << ',';
          std::cout << tmp;
        }

        std::cout << ": " << numTooShort << " / " << numGood << " / " << numTooLong << " (" << score << ")" << std::endl;
        std::cout.flush();

        if ((numTooShort == 0)&&(numTooLong == 0))
          break;
      }
    }

    for(DelimArray::iterator i=current.begin();i!=current.end();++i)
      *i = (boost::uint16_t)prng();
  }
}

int main(int argc,char **argv)
{
  std::vector< boost::shared_ptr<boost::thread> > threads;

  for(int i=1;i<argc;++i) {
    boost::shared_ptr<boost::thread> t(new boost::thread(boost::bind(&runThread,std::string(argv[i]))));
    threads.push_back(t);
  }

  for(std::vector< boost::shared_ptr<boost::thread> >::iterator i=threads.begin();i!=threads.end();++i)
    (*i)->join();

  return 0;
}