Avoid encoding useless tuples, change format for more efficient diff encoding.

This commit is contained in:
Bertold Van den Bergh 2019-08-13 16:40:31 +02:00
parent 836e7df6fa
commit 4269644520
4 changed files with 177 additions and 101 deletions

View file

@ -47,22 +47,34 @@ void errorFatal(std::string what)
exit(1); exit(1);
} }
int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool handleNeg = true) void assert(bool mustBeTrue, std::string what){
{ if(!mustBeTrue){
errorFatal(what);
}
}
uint64_t encodeSignedToUnsigned(int64_t valueIn){
uint64_t value = valueIn * 2; uint64_t value = valueIn * 2;
if(valueIn < 0) { if(valueIn < 0) {
value = -valueIn * 2 + 1; value = -valueIn * 2 + 1;
} }
if(!handleNeg) { return value;
value = valueIn; }
int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool handleNeg = true)
{
uint64_t value = valueIn;
if(handleNeg) {
value = encodeSignedToUnsigned(valueIn);
} }
int bytesUsed = 0; int bytesUsed = 0;
do { do {
uint8_t byteOut = value & 0x7F; uint8_t byteOut = value & 0x7F;
if(value >= 128) { if(value >= 128) {
byteOut |= 0x80; byteOut |= 128;
} }
output.push_back(byteOut); output.push_back(byteOut);
bytesUsed ++; bytesUsed ++;
@ -72,47 +84,43 @@ int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool han
return bytesUsed; return bytesUsed;
} }
int64_t doubleToFixedPoint(double input, double scale, unsigned int precision = 32) int64_t doubleToFixedPoint(double input, double scale, unsigned int precision = 32)
{ {
if(input == Inf){
return INT64_MAX;
}
if(input == -Inf){
return INT64_MIN;
}
double inputScaled = input / scale; double inputScaled = input / scale;
return inputScaled * pow(2, precision-1); return inputScaled * pow(2, precision-1);
} }
struct Point { struct Point {
Point(double lat = 0, double lon = 0) Point(double lat = 0, double lon = 0, unsigned int precision = 32)
{ {
lat_ = lat; lat_ = doubleToFixedPoint(lat, 90, precision);
lon_ = lon; lon_ = doubleToFixedPoint(lon, 180, precision);
} }
Point operator-(const Point& p) std::tuple<int64_t, int64_t> value()
{ {
Point result(lat_ - p.lat_, lon_ - p.lon_); return std::make_tuple(lat_, lon_);
return result;
} }
std::tuple<int64_t, int64_t> toFixedPoint(unsigned int precision = 32) int encodePointBinary(std::vector<uint8_t>& output)
{ {
int64_t latFixedPoint = doubleToFixedPoint(lat_, 90, precision); int bytesUsed = encodeVariableLength(output, lat_);
int64_t lonFixedPoint = doubleToFixedPoint(lon_, 180, precision); bytesUsed += encodeVariableLength(output, lon_);
return std::make_tuple(latFixedPoint, lonFixedPoint);
}
int encodePointBinary(std::vector<uint8_t>& output, unsigned int precision = 32)
{
int64_t latFixedPoint, lonFixedPoint;
std::tie(latFixedPoint, lonFixedPoint) = toFixedPoint(precision);
int bytesUsed = encodeVariableLength(output, latFixedPoint);
bytesUsed += encodeVariableLength(output, lonFixedPoint);
return bytesUsed; return bytesUsed;
} }
double lat_; int64_t lat_;
double lon_; int64_t lon_;
}; };
struct PolygonData { struct PolygonData {
@ -147,47 +155,75 @@ struct PolygonData {
{ {
} }
long encodeBinaryData(std::vector<uint8_t>& output, unsigned int precision = 20) uint64_t encodePointTo64(int64_t lat, int64_t lon){
assert(lat || lon, "Tried to encode 0,0. This is not allowed");
uint64_t latu=encodeSignedToUnsigned(lat);
uint64_t lonu=encodeSignedToUnsigned(lon);
assert(latu < (uint64_t)1<<32, "Unsigned lat overflow");
assert(lonu < (uint64_t)1<<32, "Unsigned lat overflow");
uint64_t point = 0;
for(uint8_t i=31; i<=31; i--){
point <<= 2;
if(latu & (1<<i)){
point |= 1;
}
if(lonu & (1<<i)){
point |= 2;
}
}
return point;
}
bool sameDirection(int64_t x1, int64_t y1, int64_t x2, int64_t y2){
if((x1 > 0 && x2 < 0) || (x1 < 0 && x2 > 0)){
return false;
}
if((y1 > 0 && y2 < 0) || (y1 < 0 && y2 > 0)){
return false;
}
if(x1 == 0){
return x2 == 0;
}
return y2 == (y1*x2/x1);
}
long encodeBinaryData(std::vector<uint8_t>& output)
{ {
long bytesEncoded = 0;
bool first = true; bool first = true;
int64_t latFixedPoint = 0, lonFixedPoint = 0; int64_t latFixedPoint = 0, lonFixedPoint = 0;
int64_t latFixedPointPrev, lonFixedPointPrev; int64_t latFixedPointPrev, lonFixedPointPrev;
uint64_t vertices = 0;
std::vector<uint8_t> tmp;
int64_t diffLatAcc = 0, diffLonAcc = 0, diffLatPrev = 0, diffLonPrev = 0; int64_t diffLatAcc = 0, diffLonAcc = 0, diffLatPrev = 0, diffLonPrev = 0;
for(Point& point: points_) { for(Point point: points_){
/* The points should first be rounded, and then the integer value is differentiated */ /* The points should first be rounded, and then the integer value is differentiated */
latFixedPointPrev = latFixedPoint; latFixedPointPrev = latFixedPoint;
lonFixedPointPrev = lonFixedPoint; lonFixedPointPrev = lonFixedPoint;
std::tie(latFixedPoint, lonFixedPoint) = point.toFixedPoint(precision); std::tie(latFixedPoint, lonFixedPoint) = point.value();
int64_t diffLat = latFixedPoint - latFixedPointPrev; int64_t diffLat = latFixedPoint - latFixedPointPrev;
int64_t diffLon = lonFixedPoint - lonFixedPointPrev; int64_t diffLon = lonFixedPoint - lonFixedPointPrev;
if(first) { if(first) {
/* First point is always encoded */ /* First point is always encoded */
vertices++; encodeVariableLength(output, encodePointTo64(latFixedPoint, lonFixedPoint), false);
encodeVariableLength(tmp, latFixedPoint);
encodeVariableLength(tmp, lonFixedPoint);
first = false; first = false;
} else { } else {
/* Ignore points that are not different */ if(!sameDirection(diffLat, diffLon, diffLatPrev, diffLonPrev)) {
if(!diffLon && !diffLat) {
continue;
}
if(diffLat != diffLatPrev || diffLon != diffLonPrev) {
/* Encode accumulator */ /* Encode accumulator */
vertices++; if(diffLatAcc || diffLonAcc){
encodeVariableLength(tmp, diffLatAcc); encodeVariableLength(output, encodePointTo64(diffLatAcc, diffLonAcc), false);
encodeVariableLength(tmp, diffLonAcc);
diffLatAcc = 0; diffLatAcc = 0;
diffLonAcc = 0; diffLonAcc = 0;
}
} }
diffLatAcc += diffLat; diffLatAcc += diffLat;
@ -198,15 +234,16 @@ struct PolygonData {
diffLonPrev = diffLon; diffLonPrev = diffLon;
} }
/* Encode final point */ /* Encode final point if needed */
vertices++; if(diffLonAcc || diffLatAcc) {
encodeVariableLength(tmp, diffLatAcc); encodeVariableLength(output, encodePointTo64(diffLatAcc, diffLonAcc), false);
encodeVariableLength(tmp, diffLonAcc); }
encodeVariableLength(output, vertices, false); /* Encode stop marker */
std::copy(tmp.begin(), tmp.end(), std::back_inserter(output)); output.push_back(0);
output.push_back(0);
return bytesEncoded; return 0;
} }
}; };
@ -483,7 +520,7 @@ int main(int argc, char ** argv )
} }
} }
Point p(shapeObject->padfY[j], shapeObject->padfX[j]); Point p(shapeObject->padfY[j], shapeObject->padfX[j], precision);
polygonData->processPoint(p); polygonData->processPoint(p);
} }
@ -510,7 +547,7 @@ int main(int argc, char ** argv )
std::vector<uint8_t> outputData; std::vector<uint8_t> outputData;
for(PolygonData* polygon: polygons_) { for(PolygonData* polygon: polygons_) {
polygon->fileIndex_ = outputData.size(); polygon->fileIndex_ = outputData.size();
polygon->encodeBinaryData(outputData, precision); polygon->encodeBinaryData(outputData);
} }
std::cout << "Encoded data section into "<<outputData.size()<<" bytes.\n"; std::cout << "Encoded data section into "<<outputData.size()<<" bytes.\n";
@ -527,8 +564,8 @@ int main(int argc, char ** argv )
int64_t prevFileIndex = 0; int64_t prevFileIndex = 0;
int64_t prevMetaIndex = 0; int64_t prevMetaIndex = 0;
for(PolygonData* polygon: polygons_) { for(PolygonData* polygon: polygons_) {
polygon->boundingMin.encodePointBinary(outputBBox, precision); polygon->boundingMin.encodePointBinary(outputBBox);
polygon->boundingMax.encodePointBinary(outputBBox, precision); polygon->boundingMax.encodePointBinary(outputBBox);
encodeVariableLength(outputBBox, metadata_.at(polygon->metadataId_).fileIndex_ - prevMetaIndex); encodeVariableLength(outputBBox, metadata_.at(polygon->metadataId_).fileIndex_ - prevMetaIndex);
prevMetaIndex = metadata_[polygon->metadataId_].fileIndex_; prevMetaIndex = metadata_[polygon->metadataId_].fileIndex_;
@ -544,7 +581,7 @@ int main(int argc, char ** argv )
outputHeader.push_back('L'); outputHeader.push_back('L');
outputHeader.push_back('B'); outputHeader.push_back('B');
outputHeader.push_back(tableType); outputHeader.push_back(tableType);
outputHeader.push_back(0); outputHeader.push_back(1);
outputHeader.push_back(precision); outputHeader.push_back(precision);
outputHeader.push_back(fieldNames_.size()); outputHeader.push_back(fieldNames_.size());
for(unsigned int i=0; i<fieldNames_.size(); i++) { for(unsigned int i=0; i<fieldNames_.size(); i++) {

View file

@ -2,20 +2,20 @@
g++ builder.cpp -o builder -lshp g++ builder.cpp -o builder -lshp
rm -rf out naturalearth timezone db.zip #rm -rf out naturalearth timezone db.zip
mkdir out mkdir -p out
mkdir naturalearth; cd naturalearth mkdir -p naturalearth; cd naturalearth
wget https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip #wget https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip
unzip ne_10m_admin_0_countries_lakes.zip #unzip ne_10m_admin_0_countries_lakes.zip
cd .. cd ..
./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country16.bin 16 "Made with Natural Earth, placed in the Public Domain." #./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country16.bin 16 "Made with Natural Earth, placed in the Public Domain."
./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country21.bin 21 "Made with Natural Earth, placed in the Public Domain." #./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country21.bin 21 "Made with Natural Earth, placed in the Public Domain."
mkdir timezone; cd timezone mkdir timezone; cd timezone
wget https://github.com/evansiroky/timezone-boundary-builder/releases/download/2018i/timezones.shapefile.zip #wget https://github.com/evansiroky/timezone-boundary-builder/releases/download/2018i/timezones.shapefile.zip
unzip timezones.shapefile.zip #unzip timezones.shapefile.zip
cd .. cd ..
./builder T timezone/dist/combined-shapefile ./out/timezone16.bin 16 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)." #./builder T timezone/dist/combined-shapefile ./out/timezone16.bin 16 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
./builder T timezone/dist/combined-shapefile ./out/timezone21.bin 21 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)." ./builder T timezone/dist/combined-shapefile ./out/timezone21.bin 21 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
rm -rf naturalearth #rm -rf naturalearth
zip db.zip out/* #zip db.zip out/*

1
demo.c
View file

@ -32,6 +32,7 @@
void printResults(ZoneDetectResult *results, float safezone) void printResults(ZoneDetectResult *results, float safezone)
{ {
if(!results) { if(!results) {
printf("No results\n");
return; return;
} }

View file

@ -95,13 +95,13 @@ static int32_t ZDFloatToFixedPoint(float input, float scale, unsigned int precis
return (int32_t)(inputScaled * (float)(1 << (precision - 1))); return (int32_t)(inputScaled * (float)(1 << (precision - 1)));
} }
static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, uint32_t *index, uint32_t *result) static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, uint32_t *index, uint64_t *result)
{ {
if(*index >= (uint32_t)library->length) { if(*index >= (uint32_t)library->length) {
return 0; return 0;
} }
uint32_t value = 0; uint64_t value = 0;
unsigned int i = 0; unsigned int i = 0;
#if defined(_MSC_VER) #if defined(_MSC_VER)
__try { __try {
@ -111,7 +111,7 @@ static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, ui
unsigned int shift = 0; unsigned int shift = 0;
while(1) { while(1) {
value |= (uint32_t)((buffer[i] & UINT8_C(0x7F)) << shift); value |= ((((uint64_t)buffer[i]) & UINT8_C(0x7F)) << shift);
shift += 7u; shift += 7u;
if(!(buffer[i] & UINT8_C(0x80))) { if(!(buffer[i] & UINT8_C(0x80))) {
@ -138,17 +138,21 @@ static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, ui
return i; return i;
} }
static int64_t ZDDecodeUnsignedToSigned(uint64_t value){
return (value & 1) ? -(int64_t)(value / 2) : (int64_t)(value / 2);
}
static unsigned int ZDDecodeVariableLengthSigned(const ZoneDetect *library, uint32_t *index, int32_t *result) static unsigned int ZDDecodeVariableLengthSigned(const ZoneDetect *library, uint32_t *index, int32_t *result)
{ {
uint32_t value = 0; uint64_t value = 0;
const unsigned int retVal = ZDDecodeVariableLengthUnsigned(library, index, &value); const unsigned int retVal = ZDDecodeVariableLengthUnsigned(library, index, &value);
*result = (value & 1) ? -(int32_t)(value / 2) : (int32_t)(value / 2); *result = (int32_t)ZDDecodeUnsignedToSigned(value);
return retVal; return retVal;
} }
static char *ZDParseString(const ZoneDetect *library, uint32_t *index) static char *ZDParseString(const ZoneDetect *library, uint32_t *index)
{ {
uint32_t strLength; uint64_t strLength;
if(!ZDDecodeVariableLengthUnsigned(library, index, &strLength)) { if(!ZDDecodeVariableLengthUnsigned(library, index, &strLength)) {
return NULL; return NULL;
} }
@ -156,7 +160,7 @@ static char *ZDParseString(const ZoneDetect *library, uint32_t *index)
uint32_t strOffset = *index; uint32_t strOffset = *index;
unsigned int remoteStr = 0; unsigned int remoteStr = 0;
if(strLength >= 256) { if(strLength >= 256) {
strOffset = library->metadataOffset + strLength - 256; strOffset = library->metadataOffset + (uint32_t)strLength - 256;
remoteStr = 1; remoteStr = 1;
if(!ZDDecodeVariableLengthUnsigned(library, &strOffset, &strLength)) { if(!ZDDecodeVariableLengthUnsigned(library, &strOffset, &strLength)) {
@ -189,7 +193,7 @@ static char *ZDParseString(const ZoneDetect *library, uint32_t *index)
} }
if(!remoteStr) { if(!remoteStr) {
*index += strLength; *index += (uint32_t)strLength;
} }
return str; return str;
@ -221,7 +225,7 @@ static int ZDParseHeader(ZoneDetect *library)
} }
#endif #endif
if(library->version != 0) { if(library->version != 1) {
return -1; return -1;
} }
@ -237,15 +241,15 @@ static int ZDParseHeader(ZoneDetect *library)
return -1; return -1;
} }
uint32_t tmp; uint64_t tmp;
/* Read section sizes */ /* Read section sizes */
/* By memset: library->bboxOffset = 0 */ /* By memset: library->bboxOffset = 0 */
if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp)) return -1; if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp)) return -1;
library->metadataOffset = tmp + library->bboxOffset; library->metadataOffset = (uint32_t)tmp + library->bboxOffset;
if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp))return -1; if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp))return -1;
library->dataOffset = tmp + library->metadataOffset; library->dataOffset = (uint32_t)tmp + library->metadataOffset;
if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp)) return -1; if(!ZDDecodeVariableLengthUnsigned(library, &index, &tmp)) return -1;
@ -273,25 +277,55 @@ static int ZDPointInBox(int32_t xl, int32_t x, int32_t xr, int32_t yl, int32_t y
return 0; return 0;
} }
static void ZDDecodePoint(uint64_t point, int32_t* lat, int32_t* lon){
uint64_t latu = 0;
uint64_t lonu = 0;
for(uint64_t i=0; i<32; i++){
latu <<= 1;
lonu <<= 1;
if((point >> (2*(31-i))) & 1){
latu |= 1;
}
if((point >> (2*(31-i)+1)) & 1){
lonu |= 1;
}
}
*lat = (int32_t)ZDDecodeUnsignedToSigned(latu);
*lon = (int32_t)ZDDecodeUnsignedToSigned(lonu);
}
static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polygonIndex, int32_t latFixedPoint, int32_t lonFixedPoint, uint64_t *distanceSqrMin) static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polygonIndex, int32_t latFixedPoint, int32_t lonFixedPoint, uint64_t *distanceSqrMin)
{ {
uint32_t numVertices;
int32_t pointLat = 0, pointLon = 0, diffLat = 0, diffLon = 0, firstLat = 0, firstLon = 0, prevLat = 0, prevLon = 0; int32_t pointLat = 0, pointLon = 0, diffLat = 0, diffLon = 0, firstLat = 0, firstLon = 0, prevLat = 0, prevLon = 0;
lonFixedPoint -= 3; lonFixedPoint -= 3;
/* Read number of vertices */
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &numVertices)) return ZD_LOOKUP_PARSE_ERROR;
if(numVertices > 1000000) return ZD_LOOKUP_PARSE_ERROR;
int prevQuadrant = 0, winding = 0; int prevQuadrant = 0, winding = 0;
uint8_t done = 0, first = 1;
for(size_t i = 0; i <= (size_t)numVertices; i++) { do{
if(i < (size_t)numVertices) { uint64_t point;
if(!ZDDecodeVariableLengthSigned(library, &polygonIndex, &diffLat)) return ZD_LOOKUP_PARSE_ERROR; if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &point)) return ZD_LOOKUP_PARSE_ERROR;
if(!ZDDecodeVariableLengthSigned(library, &polygonIndex, &diffLon)) return ZD_LOOKUP_PARSE_ERROR;
if(!point){
/* This is a special marker */
uint64_t value;
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &value)) return ZD_LOOKUP_PARSE_ERROR;
if(value == 0){
done = 1;
}
}else{
ZDDecodePoint(point, &diffLat, &diffLon);
}
if(!done){
pointLat += diffLat; pointLat += diffLat;
pointLon += diffLon; pointLon += diffLon;
if(i == 0) { if(first) {
firstLat = pointLat; firstLat = pointLat;
firstLon = pointLon; firstLon = pointLon;
} }
@ -323,7 +357,7 @@ static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polyg
} }
} }
if(i > 0) { if(!first) {
int windingNeedCompare = 0, lineIsStraight = 0; int windingNeedCompare = 0, lineIsStraight = 0;
float a = 0, b = 0; float a = 0, b = 0;
@ -413,7 +447,11 @@ static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polyg
prevQuadrant = quadrant; prevQuadrant = quadrant;
prevLat = pointLat; prevLat = pointLat;
prevLon = pointLon; prevLon = pointLon;
}
if(first){
first = 0;
}
}while(!done);
if(winding == -4) { if(winding == -4) {
return ZD_LOOKUP_IN_ZONE; return ZD_LOOKUP_IN_ZONE;
@ -542,7 +580,7 @@ ZoneDetectResult *ZDLookup(const ZoneDetect *library, float lat, float lon, floa
while(bboxIndex < library->metadataOffset) { while(bboxIndex < library->metadataOffset) {
int32_t minLat, minLon, maxLat, maxLon, metadataIndexDelta; int32_t minLat, minLon, maxLat, maxLon, metadataIndexDelta;
uint32_t polygonIndexDelta; uint64_t polygonIndexDelta;
if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &minLat)) break; if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &minLat)) break;
if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &minLon)) break; if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &minLon)) break;
if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &maxLat)) break; if(!ZDDecodeVariableLengthSigned(library, &bboxIndex, &maxLat)) break;
@ -551,7 +589,7 @@ ZoneDetectResult *ZDLookup(const ZoneDetect *library, float lat, float lon, floa
if(!ZDDecodeVariableLengthUnsigned(library, &bboxIndex, &polygonIndexDelta)) break; if(!ZDDecodeVariableLengthUnsigned(library, &bboxIndex, &polygonIndexDelta)) break;
metadataIndex += (uint32_t)metadataIndexDelta; metadataIndex += (uint32_t)metadataIndexDelta;
polygonIndex += polygonIndexDelta; polygonIndex += (uint32_t)polygonIndexDelta;
if(latFixedPoint >= minLat) { if(latFixedPoint >= minLat) {
if(latFixedPoint <= maxLat && if(latFixedPoint <= maxLat &&