ZeroTierOne/attic/historic/anode/libspark/experiments/FindGoodSegmentDelimiters.cpp
2017-05-04 10:42:22 -07:00

162 lines
4.0 KiB
C++

// Searches for good delimiters to cut streams into relatively well sized
// segments.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <boost/cstdint.hpp>
#include <boost/array.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/thread.hpp>
#include <boost/bind.hpp>
#include <boost/shared_ptr.hpp>
#include <iostream>
#include <vector>
#include <map>
// Desired size range
#define MIN_DESIRED_SIZE 4096
#define MAX_DESIRED_SIZE 131072
#define DELIMITER_SET_SIZE 1
typedef boost::array<boost::uint16_t,DELIMITER_SET_SIZE> DelimArray;
struct BestEntry
{
DelimArray best;
double bestScore;
std::vector<unsigned char> data;
};
boost::mutex bestLock;
boost::mutex outLock;
std::map<std::string,BestEntry> best;
static void runThread(const std::string &fileName)
{
char tmp[4096];
boost::mt19937 prng;
{
boost::uint32_t seed;
FILE *ur = fopen("/dev/urandom","r");
fread((void *)&seed,1,sizeof(seed),ur);
fclose(ur);
prng.seed(seed);
}
BestEntry *myEntry;
{
boost::mutex::scoped_lock l(bestLock);
myEntry = &(best[fileName]);
myEntry->bestScore = 99999999.0;
}
{
boost::mutex::scoped_lock l(outLock);
std::cout << "*** Reading test data from: " << fileName << std::endl;
FILE *f = fopen(fileName.c_str(),"r");
if (f) {
int n;
while ((n = fread((void *)tmp,1,sizeof(tmp),f)) > 0) {
for(int i=0;i<n;++i)
myEntry->data.push_back((unsigned char)tmp[i]);
}
fclose(f);
}
if (myEntry->data.size() <= 0) {
std::cout << "Error: no data read." << std::endl;
exit(1);
} else std::cout << "*** Read " << myEntry->data.size() << " bytes of test data." << std::endl;
std::cout.flush();
}
DelimArray current;
for(unsigned int i=0;i<DELIMITER_SET_SIZE;++i)
current[i] = (boost::uint16_t)prng();
for(;;) {
unsigned long numTooShort = 0;
unsigned long numTooLong = 0;
unsigned long numGood = 0;
boost::uint32_t shiftRegister = 0;
unsigned long segSize = 0;
for(std::vector<unsigned char>::iterator i=myEntry->data.begin();i!=myEntry->data.end();++i) {
shiftRegister <<= 1;
shiftRegister |= (((boost::uint32_t)*i) & 1);
++segSize;
boost::uint16_t transformedShiftRegister = (boost::uint16_t)(shiftRegister);
for(DelimArray::iterator d=current.begin();d!=current.end();++d) {
if (transformedShiftRegister == *d) {
if (segSize < MIN_DESIRED_SIZE)
++numTooShort;
else if (segSize > MAX_DESIRED_SIZE)
++numTooLong;
else ++numGood;
segSize = 0;
break;
}
}
}
if (segSize) {
if (segSize < MIN_DESIRED_SIZE)
++numTooShort;
else if (segSize > MAX_DESIRED_SIZE)
++numTooLong;
else ++numGood;
}
if (numGood) {
double score = ((double)(numTooShort + numTooLong)) / ((double)numGood);
if (score < myEntry->bestScore) {
myEntry->best = current;
myEntry->bestScore = score;
boost::mutex::scoped_lock l(outLock);
std::cout << fileName << ": ";
for(DelimArray::iterator d=current.begin();d!=current.end();++d) {
sprintf(tmp,"0x%.4x",(unsigned int)*d);
if (d != current.begin())
std::cout << ',';
std::cout << tmp;
}
std::cout << ": " << numTooShort << " / " << numGood << " / " << numTooLong << " (" << score << ")" << std::endl;
std::cout.flush();
if ((numTooShort == 0)&&(numTooLong == 0))
break;
}
}
for(DelimArray::iterator i=current.begin();i!=current.end();++i)
*i = (boost::uint16_t)prng();
}
}
int main(int argc,char **argv)
{
std::vector< boost::shared_ptr<boost::thread> > threads;
for(int i=1;i<argc;++i) {
boost::shared_ptr<boost::thread> t(new boost::thread(boost::bind(&runThread,std::string(argv[i]))));
threads.push_back(t);
}
for(std::vector< boost::shared_ptr<boost::thread> >::iterator i=threads.begin();i!=threads.end();++i)
(*i)->join();
return 0;
}