Avoid encoding useless tuples, change format for more efficient diff encoding.

This commit is contained in:
Bertold Van den Bergh 2019-08-13 16:40:31 +02:00
parent 836e7df6fa
commit 4269644520
4 changed files with 177 additions and 101 deletions

View file

@ -47,22 +47,34 @@ void errorFatal(std::string what)
exit(1);
}
int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool handleNeg = true)
{
void assert(bool mustBeTrue, std::string what){
if(!mustBeTrue){
errorFatal(what);
}
}
uint64_t encodeSignedToUnsigned(int64_t valueIn){
uint64_t value = valueIn * 2;
if(valueIn < 0) {
value = -valueIn * 2 + 1;
}
if(!handleNeg) {
value = valueIn;
return value;
}
int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool handleNeg = true)
{
uint64_t value = valueIn;
if(handleNeg) {
value = encodeSignedToUnsigned(valueIn);
}
int bytesUsed = 0;
do {
uint8_t byteOut = value & 0x7F;
if(value >= 128) {
byteOut |= 0x80;
byteOut |= 128;
}
output.push_back(byteOut);
bytesUsed ++;
@ -72,47 +84,43 @@ int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool han
return bytesUsed;
}
int64_t doubleToFixedPoint(double input, double scale, unsigned int precision = 32)
{
if(input == Inf){
return INT64_MAX;
}
if(input == -Inf){
return INT64_MIN;
}
double inputScaled = input / scale;
return inputScaled * pow(2, precision-1);
}
struct Point {
Point(double lat = 0, double lon = 0)
Point(double lat = 0, double lon = 0, unsigned int precision = 32)
{
lat_ = lat;
lon_ = lon;
lat_ = doubleToFixedPoint(lat, 90, precision);
lon_ = doubleToFixedPoint(lon, 180, precision);
}
Point operator-(const Point& p)
std::tuple<int64_t, int64_t> value()
{
Point result(lat_ - p.lat_, lon_ - p.lon_);
return result;
return std::make_tuple(lat_, lon_);
}
std::tuple<int64_t, int64_t> toFixedPoint(unsigned int precision = 32)
int encodePointBinary(std::vector<uint8_t>& output)
{
int64_t latFixedPoint = doubleToFixedPoint(lat_, 90, precision);
int64_t lonFixedPoint = doubleToFixedPoint(lon_, 180, precision);
return std::make_tuple(latFixedPoint, lonFixedPoint);
}
int encodePointBinary(std::vector<uint8_t>& output, unsigned int precision = 32)
{
int64_t latFixedPoint, lonFixedPoint;
std::tie(latFixedPoint, lonFixedPoint) = toFixedPoint(precision);
int bytesUsed = encodeVariableLength(output, latFixedPoint);
bytesUsed += encodeVariableLength(output, lonFixedPoint);
int bytesUsed = encodeVariableLength(output, lat_);
bytesUsed += encodeVariableLength(output, lon_);
return bytesUsed;
}
double lat_;
double lon_;
int64_t lat_;
int64_t lon_;
};
struct PolygonData {
@ -146,50 +154,78 @@ struct PolygonData {
metadataId_(id)
{
}
uint64_t encodePointTo64(int64_t lat, int64_t lon){
assert(lat || lon, "Tried to encode 0,0. This is not allowed");
long encodeBinaryData(std::vector<uint8_t>& output, unsigned int precision = 20)
uint64_t latu=encodeSignedToUnsigned(lat);
uint64_t lonu=encodeSignedToUnsigned(lon);
assert(latu < (uint64_t)1<<32, "Unsigned lat overflow");
assert(lonu < (uint64_t)1<<32, "Unsigned lat overflow");
uint64_t point = 0;
for(uint8_t i=31; i<=31; i--){
point <<= 2;
if(latu & (1<<i)){
point |= 1;
}
if(lonu & (1<<i)){
point |= 2;
}
}
return point;
}
bool sameDirection(int64_t x1, int64_t y1, int64_t x2, int64_t y2){
if((x1 > 0 && x2 < 0) || (x1 < 0 && x2 > 0)){
return false;
}
if((y1 > 0 && y2 < 0) || (y1 < 0 && y2 > 0)){
return false;
}
if(x1 == 0){
return x2 == 0;
}
return y2 == (y1*x2/x1);
}
long encodeBinaryData(std::vector<uint8_t>& output)
{
long bytesEncoded = 0;
bool first = true;
int64_t latFixedPoint = 0, lonFixedPoint = 0;
int64_t latFixedPointPrev, lonFixedPointPrev;
uint64_t vertices = 0;
std::vector<uint8_t> tmp;
int64_t diffLatAcc = 0, diffLonAcc = 0, diffLatPrev = 0, diffLonPrev = 0;
for(Point& point: points_) {
for(Point point: points_){
/* The points should first be rounded, and then the integer value is differentiated */
latFixedPointPrev = latFixedPoint;
lonFixedPointPrev = lonFixedPoint;
std::tie(latFixedPoint, lonFixedPoint) = point.toFixedPoint(precision);
std::tie(latFixedPoint, lonFixedPoint) = point.value();
int64_t diffLat = latFixedPoint - latFixedPointPrev;
int64_t diffLon = lonFixedPoint - lonFixedPointPrev;
if(first) {
/* First point is always encoded */
vertices++;
encodeVariableLength(tmp, latFixedPoint);
encodeVariableLength(tmp, lonFixedPoint);
encodeVariableLength(output, encodePointTo64(latFixedPoint, lonFixedPoint), false);
first = false;
} else {
/* Ignore points that are not different */
if(!diffLon && !diffLat) {
continue;
}
if(diffLat != diffLatPrev || diffLon != diffLonPrev) {
if(!sameDirection(diffLat, diffLon, diffLatPrev, diffLonPrev)) {
/* Encode accumulator */
vertices++;
encodeVariableLength(tmp, diffLatAcc);
encodeVariableLength(tmp, diffLonAcc);
diffLatAcc = 0;
diffLonAcc = 0;
if(diffLatAcc || diffLonAcc){
encodeVariableLength(output, encodePointTo64(diffLatAcc, diffLonAcc), false);
diffLatAcc = 0;
diffLonAcc = 0;
}
}
diffLatAcc += diffLat;
diffLonAcc += diffLon;
}
@ -197,16 +233,17 @@ struct PolygonData {
diffLatPrev = diffLat;
diffLonPrev = diffLon;
}
/* Encode final point if needed */
if(diffLonAcc || diffLatAcc) {
encodeVariableLength(output, encodePointTo64(diffLatAcc, diffLonAcc), false);
}
/* Encode final point */
vertices++;
encodeVariableLength(tmp, diffLatAcc);
encodeVariableLength(tmp, diffLonAcc);
/* Encode stop marker */
output.push_back(0);
output.push_back(0);
encodeVariableLength(output, vertices, false);
std::copy(tmp.begin(), tmp.end(), std::back_inserter(output));
return bytesEncoded;
return 0;
}
};
@ -483,7 +520,7 @@ int main(int argc, char ** argv )
}
}
Point p(shapeObject->padfY[j], shapeObject->padfX[j]);
Point p(shapeObject->padfY[j], shapeObject->padfX[j], precision);
polygonData->processPoint(p);
}
@ -510,7 +547,7 @@ int main(int argc, char ** argv )
std::vector<uint8_t> outputData;
for(PolygonData* polygon: polygons_) {
polygon->fileIndex_ = outputData.size();
polygon->encodeBinaryData(outputData, precision);
polygon->encodeBinaryData(outputData);
}
std::cout << "Encoded data section into "<<outputData.size()<<" bytes.\n";
@ -527,8 +564,8 @@ int main(int argc, char ** argv )
int64_t prevFileIndex = 0;
int64_t prevMetaIndex = 0;
for(PolygonData* polygon: polygons_) {
polygon->boundingMin.encodePointBinary(outputBBox, precision);
polygon->boundingMax.encodePointBinary(outputBBox, precision);
polygon->boundingMin.encodePointBinary(outputBBox);
polygon->boundingMax.encodePointBinary(outputBBox);
encodeVariableLength(outputBBox, metadata_.at(polygon->metadataId_).fileIndex_ - prevMetaIndex);
prevMetaIndex = metadata_[polygon->metadataId_].fileIndex_;
@ -544,7 +581,7 @@ int main(int argc, char ** argv )
outputHeader.push_back('L');
outputHeader.push_back('B');
outputHeader.push_back(tableType);
outputHeader.push_back(0);
outputHeader.push_back(1);
outputHeader.push_back(precision);
outputHeader.push_back(fieldNames_.size());
for(unsigned int i=0; i<fieldNames_.size(); i++) {

View file

@ -2,20 +2,20 @@
g++ builder.cpp -o builder -lshp
rm -rf out naturalearth timezone db.zip
mkdir out
mkdir naturalearth; cd naturalearth
wget https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip
unzip ne_10m_admin_0_countries_lakes.zip
#rm -rf out naturalearth timezone db.zip
mkdir -p out
mkdir -p naturalearth; cd naturalearth
#wget https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip
#unzip ne_10m_admin_0_countries_lakes.zip
cd ..
./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country16.bin 16 "Made with Natural Earth, placed in the Public Domain."
./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country21.bin 21 "Made with Natural Earth, placed in the Public Domain."
#./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country16.bin 16 "Made with Natural Earth, placed in the Public Domain."
#./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country21.bin 21 "Made with Natural Earth, placed in the Public Domain."
mkdir timezone; cd timezone
wget https://github.com/evansiroky/timezone-boundary-builder/releases/download/2018i/timezones.shapefile.zip
unzip timezones.shapefile.zip
#wget https://github.com/evansiroky/timezone-boundary-builder/releases/download/2018i/timezones.shapefile.zip
#unzip timezones.shapefile.zip
cd ..
./builder T timezone/dist/combined-shapefile ./out/timezone16.bin 16 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
#./builder T timezone/dist/combined-shapefile ./out/timezone16.bin 16 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
./builder T timezone/dist/combined-shapefile ./out/timezone21.bin 21 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
rm -rf naturalearth
zip db.zip out/*
#rm -rf naturalearth
#zip db.zip out/*

1
demo.c
View file

@ -32,6 +32,7 @@
void printResults(ZoneDetectResult *results, float safezone)
{
if(!results) {
printf("No results\n");
return;
}

View file

@ -95,13 +95,13 @@ static int32_t ZDFloatToFixedPoint(float input, float scale, unsigned int precis
return (int32_t)(inputScaled * (float)(1 << (precision - 1)));
}
static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, uint32_t *index, uint32_t *result)
static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, uint32_t *index, uint64_t *result)
{
if(*index >= (uint32_t)library->length) {
return 0;
}
uint32_t value = 0;
uint64_t value = 0;
unsigned int i = 0;
#if defined(_MSC_VER)
__try {
@ -111,7 +111,7 @@ static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, ui
unsigned int shift = 0;
while(1) {
value |= (uint32_t)((buffer[i] & UINT8_C(0x7F)) << shift);
value |= ((((uint64_t)buffer[i]) & UINT8_C(0x7F)) << shift);
shift += 7u;
if(!(buffer[i] & UINT8_C(0x80))) {
@ -138,17 +138,21 @@ static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, ui
return i;
}
static int64_t ZDDecodeUnsignedToSigned(uint64_t value){
return (value & 1) ? -(int64_t)(value / 2) : (int64_t)(value / 2);
}
static unsigned int ZDDecodeVariableLengthSigned(const ZoneDetect *library, uint32_t *index, int32_t *result)
{
uint32_t value = 0;
uint64_t value = 0;
const unsigned int retVal = ZDDecodeVariableLengthUnsigned(library, index, &value);
*result = (value & 1) ? -(int32_t)(value / 2) : (int32_t)(value / 2);
*result = (int32_t)ZDDecodeUnsignedToSigned(value);
return retVal;
}
static char *ZDParseString(const ZoneDetect *library, uint32_t *index)
{
uint32_t strLength;
uint64_t strLength;
if(!ZDDecodeVariableLengthUnsigned(library, index, &strLength)) {
return NULL;
}
@ -156,7 +160,7 @@ static char *ZDParseString(const ZoneDetect *library, uint32_t *index)
uint32_t strOffset = *index;
unsigned int remoteStr = 0;
if(strLength >= 256) {
strOffset = library->metadataOffset + strLength - 256;
strOffset = library->metadataOffset + (uint32_t)strLength - 256;
remoteStr = 1;
if(!ZDDecodeVariableLengthUnsigned(library, &strOffset, &strLength)) {
@ -189,7 +193,7 @@ static char *ZDParseString(const ZoneDetect *library, uint32_t *index)
}
if(!remoteStr) {
*index += strLength;
*index += (uint32_t)strLength;
}
return str;
@ -221,7 +225,7 @@ static int ZDParseHeader(ZoneDetect *library)
}
#endif
if(library->version != 0) {
if(library->version != 1) {
return -1;
}
@ -237,15 +241,15 @@ static int ZDParseHeader(ZoneDetect *library)
return -1;
}
uint32_t tmp;
uint64_t tmp;
/* Read section sizes */
/* By memset: library->bboxOffset = 0 */
if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp)) return -1;
library->metadataOffset = tmp + library->bboxOffset;
library->metadataOffset = (uint32_t)tmp + library->bboxOffset;
if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp))return -1;
library->dataOffset = tmp + library->metadataOffset;
library->dataOffset = (uint32_t)tmp + library->metadataOffset;
if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp)) return -1;
@ -273,25 +277,55 @@ static int ZDPointInBox(int32_t xl, int32_t x, int32_t xr, int32_t yl, int32_t y
return 0;
}
static void ZDDecodePoint(uint64_t point, int32_t* lat, int32_t* lon){
uint64_t latu = 0;
uint64_t lonu = 0;
for(uint64_t i=0; i<32; i++){
latu <<= 1;
lonu <<= 1;
if((point >> (2*(31-i))) & 1){
latu |= 1;
}
if((point >> (2*(31-i)+1)) & 1){
lonu |= 1;
}
}
*lat = (int32_t)ZDDecodeUnsignedToSigned(latu);
*lon = (int32_t)ZDDecodeUnsignedToSigned(lonu);
}
static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polygonIndex, int32_t latFixedPoint, int32_t lonFixedPoint, uint64_t *distanceSqrMin)
{
uint32_t numVertices;
int32_t pointLat = 0, pointLon = 0, diffLat = 0, diffLon = 0, firstLat = 0, firstLon = 0, prevLat = 0, prevLon = 0;
lonFixedPoint -= 3;
/* Read number of vertices */
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &numVertices)) return ZD_LOOKUP_PARSE_ERROR;
if(numVertices > 1000000) return ZD_LOOKUP_PARSE_ERROR;
int prevQuadrant = 0, winding = 0;
uint8_t done = 0, first = 1;
do{
uint64_t point;
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &point)) return ZD_LOOKUP_PARSE_ERROR;
for(size_t i = 0; i <= (size_t)numVertices; i++) {
if(i < (size_t)numVertices) {
if(!ZDDecodeVariableLengthSigned(library, &polygonIndex, &diffLat)) return ZD_LOOKUP_PARSE_ERROR;
if(!ZDDecodeVariableLengthSigned(library, &polygonIndex, &diffLon)) return ZD_LOOKUP_PARSE_ERROR;
if(!point){
/* This is a special marker */
uint64_t value;
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &value)) return ZD_LOOKUP_PARSE_ERROR;
if(value == 0){
done = 1;
}
}else{
ZDDecodePoint(point, &diffLat, &diffLon);
}
if(!done){
pointLat += diffLat;
pointLon += diffLon;
if(i == 0) {
if(first) {
firstLat = pointLat;
firstLon = pointLon;
}
@ -323,7 +357,7 @@ static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polyg
}
}
if(i > 0) {
if(!first) {
int windingNeedCompare = 0, lineIsStraight = 0;
float a = 0, b = 0;
@ -413,7 +447,11 @@ static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polyg
prevQuadrant = quadrant;
prevLat = pointLat;
prevLon = pointLon;
}
if(first){
first = 0;
}
}while(!done);
if(winding == -4) {
return ZD_LOOKUP_IN_ZONE;
@ -542,7 +580,7 @@ ZoneDetectResult *ZDLookup(const ZoneDetect *library, float lat, float lon, floa
while(bboxIndex < library->metadataOffset) {
int32_t minLat, minLon, maxLat, maxLon, metadataIndexDelta;
uint32_t polygonIndexDelta;
uint64_t polygonIndexDelta;
if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &minLat)) break;
if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &minLon)) break;
if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &maxLat)) break;
@ -551,7 +589,7 @@ ZoneDetectResult *ZDLookup(const ZoneDetect *library, float lat, float lon, floa
if(!ZDDecodeVariableLengthUnsigned(library, &bboxIndex, &polygonIndexDelta)) break;
metadataIndex += (uint32_t)metadataIndexDelta;
polygonIndex += polygonIndexDelta;
polygonIndex += (uint32_t)polygonIndexDelta;
if(latFixedPoint >= minLat) {
if(latFixedPoint <= maxLat &&