mirror of
https://github.com/BertoldVdb/ZoneDetect.git
synced 2026-03-25 08:45:56 +00:00
Reduce filesize further by not encoding points multiple times (not well tested, do not use)
This commit is contained in:
parent
4269644520
commit
71a5187802
3 changed files with 348 additions and 94 deletions
|
|
@ -84,26 +84,62 @@ int encodeVariableLength(std::vector<uint8_t>& output, int64_t valueIn, bool han
|
|||
return bytesUsed;
|
||||
}
|
||||
|
||||
uint64_t encodePointTo64(int64_t lat, int64_t lon){
|
||||
assert(lat || lon, "Tried to encode 0,0. This is not allowed");
|
||||
|
||||
uint64_t latu=encodeSignedToUnsigned(lat);
|
||||
uint64_t lonu=encodeSignedToUnsigned(lon);
|
||||
|
||||
assert(latu < (uint64_t)1<<32, "Unsigned lat overflow");
|
||||
assert(lonu < (uint64_t)1<<32, "Unsigned lat overflow");
|
||||
|
||||
uint64_t point = 0;
|
||||
for(uint8_t i=31; i<=31; i--){
|
||||
point <<= 2;
|
||||
if(latu & (1<<i)){
|
||||
point |= 1;
|
||||
}
|
||||
if(lonu & (1<<i)){
|
||||
point |= 2;
|
||||
}
|
||||
}
|
||||
|
||||
return point;
|
||||
}
|
||||
|
||||
|
||||
int64_t doubleToFixedPoint(double input, double scale, unsigned int precision = 32)
|
||||
{
|
||||
if(input == Inf){
|
||||
return INT64_MAX;
|
||||
}
|
||||
if(input == -Inf){
|
||||
return INT64_MIN;
|
||||
}
|
||||
|
||||
double inputScaled = input / scale;
|
||||
return inputScaled * pow(2, precision-1);
|
||||
|
||||
}
|
||||
|
||||
struct Point;
|
||||
struct PolygonData;
|
||||
|
||||
std::unordered_map<uint64_t, Point*> pointMap_;
|
||||
|
||||
struct Point {
|
||||
Point(double lat = 0, double lon = 0, unsigned int precision = 32)
|
||||
static Point* GetPoint(double dlat = 0, double dlon = 0, unsigned int precision = 32){
|
||||
int64_t lat = doubleToFixedPoint(dlat, 90, precision);
|
||||
int64_t lon = doubleToFixedPoint(dlon, 180, precision);
|
||||
|
||||
uint64_t key = encodePointTo64(lat, lon);
|
||||
if(pointMap_.count(key)){
|
||||
return pointMap_[key];
|
||||
}
|
||||
|
||||
Point* p = new Point(lat, lon);
|
||||
p->key_ = key;
|
||||
pointMap_[key] = p;
|
||||
return p;
|
||||
}
|
||||
|
||||
Point(int64_t lat = 0, int64_t lon = 0)
|
||||
{
|
||||
lat_ = doubleToFixedPoint(lat, 90, precision);
|
||||
lon_ = doubleToFixedPoint(lon, 180, precision);
|
||||
lat_ = lat;
|
||||
lon_ = lon;
|
||||
}
|
||||
|
||||
std::tuple<int64_t, int64_t> value()
|
||||
|
|
@ -121,125 +157,267 @@ struct Point {
|
|||
|
||||
int64_t lat_;
|
||||
int64_t lon_;
|
||||
uint64_t key_;
|
||||
PolygonData* parent_ = nullptr;
|
||||
int index_ = 0;
|
||||
bool encoded_ = false;
|
||||
uint64_t encodedOffset_ = 0;
|
||||
};
|
||||
|
||||
struct PolygonData {
|
||||
Point boundingMin;
|
||||
Point boundingMax;
|
||||
std::vector<Point> points_;
|
||||
std::vector<Point*> points_;
|
||||
unsigned long fileIndex_ = 0;
|
||||
unsigned long metadataId_;
|
||||
Point* lastPoint_ = nullptr;
|
||||
|
||||
void processPoint(const Point& p)
|
||||
void processPoint(Point* p)
|
||||
{
|
||||
if(p.lat_ < boundingMin.lat_) {
|
||||
boundingMin.lat_ = p.lat_;
|
||||
if(p->lat_ < boundingMin.lat_) {
|
||||
boundingMin.lat_ = p->lat_;
|
||||
}
|
||||
if(p.lon_ < boundingMin.lon_) {
|
||||
boundingMin.lon_ = p.lon_;
|
||||
if(p->lon_ < boundingMin.lon_) {
|
||||
boundingMin.lon_ = p->lon_;
|
||||
}
|
||||
if(p.lat_ > boundingMax.lat_) {
|
||||
boundingMax.lat_ = p.lat_;
|
||||
if(p->lat_ > boundingMax.lat_) {
|
||||
boundingMax.lat_ = p->lat_;
|
||||
}
|
||||
if(p.lon_ > boundingMax.lon_) {
|
||||
boundingMax.lon_ = p.lon_;
|
||||
if(p->lon_ > boundingMax.lon_) {
|
||||
boundingMax.lon_ = p->lon_;
|
||||
}
|
||||
|
||||
/* Don't encode duplicate points */
|
||||
if(lastPoint_ == p){
|
||||
return;
|
||||
}
|
||||
lastPoint_ = p;
|
||||
|
||||
points_.push_back(p);
|
||||
}
|
||||
|
||||
PolygonData(unsigned long id):
|
||||
boundingMin(Inf, Inf),
|
||||
boundingMax(-Inf, -Inf),
|
||||
boundingMin(INT64_MAX, INT64_MAX),
|
||||
boundingMax(INT64_MIN, INT64_MIN),
|
||||
metadataId_(id)
|
||||
{
|
||||
}
|
||||
|
||||
uint64_t encodePointTo64(int64_t lat, int64_t lon){
|
||||
assert(lat || lon, "Tried to encode 0,0. This is not allowed");
|
||||
|
||||
uint64_t latu=encodeSignedToUnsigned(lat);
|
||||
uint64_t lonu=encodeSignedToUnsigned(lon);
|
||||
|
||||
assert(latu < (uint64_t)1<<32, "Unsigned lat overflow");
|
||||
assert(lonu < (uint64_t)1<<32, "Unsigned lat overflow");
|
||||
|
||||
uint64_t point = 0;
|
||||
for(uint8_t i=31; i<=31; i--){
|
||||
point <<= 2;
|
||||
if(latu & (1<<i)){
|
||||
point |= 1;
|
||||
}
|
||||
if(lonu & (1<<i)){
|
||||
point |= 2;
|
||||
}
|
||||
}
|
||||
|
||||
return point;
|
||||
}
|
||||
struct LineSegment {
|
||||
std::vector<Point*> points_;
|
||||
Point* prevPoint_;
|
||||
PolygonData* parent_;
|
||||
|
||||
bool sameDirection(int64_t x1, int64_t y1, int64_t x2, int64_t y2){
|
||||
if((x1 > 0 && x2 < 0) || (x1 < 0 && x2 > 0)){
|
||||
return false;
|
||||
}
|
||||
if((y1 > 0 && y2 < 0) || (y1 < 0 && y2 > 0)){
|
||||
return false;
|
||||
bool sameDirection(int64_t x1, int64_t y1, int64_t x2, int64_t y2){
|
||||
if(!x2 && !y2){
|
||||
return false;
|
||||
}
|
||||
|
||||
if((x1 > 0 && x2 < 0) || (x1 < 0 && x2 > 0)){
|
||||
return false;
|
||||
}
|
||||
if((y1 > 0 && y2 < 0) || (y1 < 0 && y2 > 0)){
|
||||
return false;
|
||||
}
|
||||
|
||||
if(x1 == 0){
|
||||
return x2 == 0;
|
||||
}
|
||||
|
||||
return y2 == (y1*x2/x1);
|
||||
}
|
||||
|
||||
if(x1 == 0){
|
||||
return x2 == 0;
|
||||
}
|
||||
|
||||
void encodeDelta(std::vector<uint8_t>& output, PolygonData* mark = nullptr, int start = 0, int end = -1){
|
||||
if(end < 0){
|
||||
end = points_.size()-1;
|
||||
}
|
||||
|
||||
return y2 == (y1*x2/x1);
|
||||
}
|
||||
int64_t accDiffLat = 0, accDiffLon = 0;
|
||||
int64_t prevDiffLat = 0, prevDiffLon = 0;
|
||||
|
||||
int64_t prevLat, prevLon;
|
||||
|
||||
Point* prevPoint = prevPoint_;
|
||||
if(start > 0){
|
||||
prevPoint = points_[start-1];
|
||||
}
|
||||
|
||||
std::tie(prevLat, prevLon) = prevPoint->value();
|
||||
|
||||
auto encodePoint = [&](){
|
||||
/* Encode accumulator.
|
||||
* After this the position is equal to that of the previous point */
|
||||
if(accDiffLat || accDiffLon){
|
||||
encodeVariableLength(output, encodePointTo64(accDiffLat, accDiffLon), false);
|
||||
}
|
||||
/* Mark points as encoded if we mark and we are the parent */
|
||||
if(mark && prevPoint->parent_ == mark){
|
||||
prevPoint->encoded_ = true;
|
||||
prevPoint->encodedOffset_ = output.size();
|
||||
}
|
||||
|
||||
/* Reset accumulator */
|
||||
accDiffLat = 0;
|
||||
accDiffLon = 0;
|
||||
};
|
||||
|
||||
for(int i = start; i<=end; i++){
|
||||
Point* point = points_[i];
|
||||
|
||||
int64_t lat, lon;
|
||||
std::tie(lat, lon) = point->value();
|
||||
|
||||
/* Calculate difference */
|
||||
int64_t diffLat = lat - prevLat;
|
||||
int64_t diffLon = lon - prevLon;
|
||||
|
||||
/* Encode delta */
|
||||
if(!sameDirection(diffLat, diffLon, prevDiffLat, prevDiffLon)){
|
||||
encodePoint();
|
||||
}
|
||||
|
||||
accDiffLat += diffLat;
|
||||
accDiffLon += diffLon;
|
||||
|
||||
/* Store previous values */
|
||||
prevDiffLat = diffLat;
|
||||
prevDiffLon = diffLon;
|
||||
prevLat = lat;
|
||||
prevLon = lon;
|
||||
prevPoint = point;
|
||||
}
|
||||
|
||||
/* Encode remainder if needed */
|
||||
encodePoint();
|
||||
}
|
||||
|
||||
bool encodeReference(std::vector<uint8_t>& output){
|
||||
/* Search for first marked point */
|
||||
int end = -1, start = -1;
|
||||
for(int i=0; i<points_.size(); i++){
|
||||
if(points_[i]->encoded_){
|
||||
start = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=points_.size()-1; i>=0; i--){
|
||||
if(points_[i]->encoded_){
|
||||
end = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(end < 0 || start < 0){
|
||||
/* Only unencoded points, then we can only delta encode it ourself */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Encode delta until where we can refer */
|
||||
encodeDelta(output, nullptr, 0, start);
|
||||
|
||||
|
||||
/* Add reference marker if it is still needed */
|
||||
if(start != end){
|
||||
uint64_t startRef = points_[start]->encodedOffset_;
|
||||
uint64_t endRef = points_[end]->encodedOffset_;
|
||||
|
||||
output.push_back(0);
|
||||
output.push_back(1);
|
||||
encodeVariableLength(output, startRef, false);
|
||||
encodeVariableLength(output, endRef - startRef, true);
|
||||
}
|
||||
|
||||
/* Encode delta till the end of the segment */
|
||||
encodeDelta(output, nullptr, end+1);
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
long encodeBinaryData(std::vector<uint8_t>& output)
|
||||
{
|
||||
bool first = true;
|
||||
int64_t latFixedPoint = 0, lonFixedPoint = 0;
|
||||
int64_t latFixedPointPrev, lonFixedPointPrev;
|
||||
std::vector<LineSegment*> lines_;
|
||||
PolygonData* currentParent = nullptr;
|
||||
LineSegment* segment = nullptr;
|
||||
|
||||
int64_t diffLatAcc = 0, diffLonAcc = 0, diffLatPrev = 0, diffLonPrev = 0;
|
||||
/* Step 1: Encode first point */
|
||||
Point* prevPoint = points_[0];
|
||||
encodeVariableLength(output, prevPoint->key_, false);
|
||||
|
||||
for(Point point: points_){
|
||||
/* The points should first be rounded, and then the integer value is differentiated */
|
||||
latFixedPointPrev = latFixedPoint;
|
||||
lonFixedPointPrev = lonFixedPoint;
|
||||
std::tie(latFixedPoint, lonFixedPoint) = point.value();
|
||||
int direction = 0;
|
||||
/* Step 2: Go through the list of points and check which ones already exist.
|
||||
* We skip the first and last one since the first one is already encoded
|
||||
* and the last one is identical to the first */
|
||||
for(int i=1; i<points_.size()-1; i++){
|
||||
Point* point = points_[i];
|
||||
|
||||
int64_t diffLat = latFixedPoint - latFixedPointPrev;
|
||||
int64_t diffLon = lonFixedPoint - lonFixedPointPrev;
|
||||
if(!point->parent_){
|
||||
point->parent_ = this;
|
||||
point->index_ = i;
|
||||
}
|
||||
|
||||
bool newSegment = false;
|
||||
|
||||
if(first) {
|
||||
/* First point is always encoded */
|
||||
encodeVariableLength(output, encodePointTo64(latFixedPoint, lonFixedPoint), false);
|
||||
|
||||
first = false;
|
||||
} else {
|
||||
if(!sameDirection(diffLat, diffLon, diffLatPrev, diffLonPrev)) {
|
||||
/* Encode accumulator */
|
||||
if(diffLatAcc || diffLonAcc){
|
||||
encodeVariableLength(output, encodePointTo64(diffLatAcc, diffLonAcc), false);
|
||||
|
||||
diffLatAcc = 0;
|
||||
diffLonAcc = 0;
|
||||
if(point->parent_ == currentParent){
|
||||
if(direction == 0){
|
||||
direction = point->index_ - prevPoint->index_;
|
||||
if(direction > 1 || direction < -1){
|
||||
newSegment = true;
|
||||
}
|
||||
}else{
|
||||
if(point->index_ != prevPoint->index_ + direction){
|
||||
newSegment = true;
|
||||
}
|
||||
}
|
||||
|
||||
diffLatAcc += diffLat;
|
||||
diffLonAcc += diffLon;
|
||||
}
|
||||
|
||||
diffLatPrev = diffLat;
|
||||
diffLonPrev = diffLon;
|
||||
if(point->parent_ != currentParent || newSegment){
|
||||
if(segment){
|
||||
lines_.push_back(segment);
|
||||
}
|
||||
|
||||
currentParent = point->parent_;
|
||||
|
||||
segment = new LineSegment();
|
||||
segment->prevPoint_ = prevPoint;
|
||||
segment->parent_ = currentParent;
|
||||
direction = 0;
|
||||
}
|
||||
|
||||
segment->points_.push_back(point);
|
||||
|
||||
prevPoint = point;
|
||||
}
|
||||
|
||||
/* Encode final point if needed */
|
||||
if(diffLonAcc || diffLatAcc) {
|
||||
encodeVariableLength(output, encodePointTo64(diffLatAcc, diffLonAcc), false);
|
||||
if(segment){
|
||||
lines_.push_back(segment);
|
||||
}
|
||||
|
||||
/* Encode stop marker */
|
||||
/* Step 3: Encode segments */
|
||||
for(LineSegment* segment: lines_){
|
||||
if(segment->parent_ == this){
|
||||
/* If we are the parent of the segment we must encode and mark it */
|
||||
segment->encodeDelta(output, this);
|
||||
}else{
|
||||
/* We are not the parent, we can encode it or refer to it, depending on
|
||||
* which takes less bytes. In any case we should not mark it. */
|
||||
std::vector<uint8_t> delta;
|
||||
segment->encodeDelta(delta);
|
||||
|
||||
std::vector<uint8_t> reference;
|
||||
bool possible = segment->encodeReference(reference);
|
||||
|
||||
if(!possible || delta.size() <= reference.size()){
|
||||
output.insert(std::end(output), std::begin(delta), std::end(delta));
|
||||
}else{
|
||||
output.insert(std::end(output), std::begin(reference), std::end(reference));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 4: Write end marker */
|
||||
output.push_back(0);
|
||||
output.push_back(0);
|
||||
|
||||
|
|
@ -520,7 +698,7 @@ int main(int argc, char ** argv )
|
|||
}
|
||||
}
|
||||
|
||||
Point p(shapeObject->padfY[j], shapeObject->padfX[j], precision);
|
||||
Point* p = Point::GetPoint(shapeObject->padfY[j], shapeObject->padfX[j], precision);
|
||||
polygonData->processPoint(p);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ mkdir -p naturalearth; cd naturalearth
|
|||
#wget https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries_lakes.zip
|
||||
#unzip ne_10m_admin_0_countries_lakes.zip
|
||||
cd ..
|
||||
#./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country16.bin 16 "Made with Natural Earth, placed in the Public Domain."
|
||||
#./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country21.bin 21 "Made with Natural Earth, placed in the Public Domain."
|
||||
./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country16.bin 16 "Made with Natural Earth, placed in the Public Domain."
|
||||
./builder C naturalearth/ne_10m_admin_0_countries_lakes ./out/country21.bin 21 "Made with Natural Earth, placed in the Public Domain."
|
||||
|
||||
mkdir timezone; cd timezone
|
||||
#wget https://github.com/evansiroky/timezone-boundary-builder/releases/download/2018i/timezones.shapefile.zip
|
||||
#unzip timezones.shapefile.zip
|
||||
cd ..
|
||||
#./builder T timezone/dist/combined-shapefile ./out/timezone16.bin 16 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
|
||||
./builder T timezone/dist/combined-shapefile ./out/timezone16.bin 16 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
|
||||
./builder T timezone/dist/combined-shapefile ./out/timezone21.bin 21 "Contains data from Natural Earth, placed in the Public Domain. Contains information from https://github.com/evansiroky/timezone-boundary-builder, which is made available here under the Open Database License (ODbL)."
|
||||
#rm -rf naturalearth
|
||||
#zip db.zip out/*
|
||||
|
|
|
|||
|
|
@ -138,6 +138,34 @@ static unsigned int ZDDecodeVariableLengthUnsigned(const ZoneDetect *library, ui
|
|||
return i;
|
||||
}
|
||||
|
||||
static unsigned int ZDDecodeVariableLengthUnsignedReverse(const ZoneDetect *library, uint32_t *index, uint64_t *result){
|
||||
uint32_t i = *index;
|
||||
|
||||
if(library->mapping[i] & UINT8_C(0x80)){
|
||||
printf("BUG, reverse mapping final byte is not the end of stream\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(!i){
|
||||
return 0;
|
||||
}
|
||||
i--;
|
||||
|
||||
while(library->mapping[i] & UINT8_C(0x80)){
|
||||
if(!i){
|
||||
return 0;
|
||||
}
|
||||
i--;
|
||||
}
|
||||
|
||||
*index = i;
|
||||
|
||||
i++;
|
||||
|
||||
uint32_t i2 = i;
|
||||
return ZDDecodeVariableLengthUnsigned(library, &i2, result);
|
||||
}
|
||||
|
||||
static int64_t ZDDecodeUnsignedToSigned(uint64_t value){
|
||||
return (value & 1) ? -(int64_t)(value / 2) : (int64_t)(value / 2);
|
||||
}
|
||||
|
|
@ -306,20 +334,64 @@ static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polyg
|
|||
int prevQuadrant = 0, winding = 0;
|
||||
uint8_t done = 0, first = 1;
|
||||
|
||||
uint32_t referenceStart=0, referenceEnd=0;
|
||||
int32_t referenceDirection = 0;
|
||||
|
||||
do{
|
||||
uint64_t point;
|
||||
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &point)) return ZD_LOOKUP_PARSE_ERROR;
|
||||
uint8_t referenceDone = 0;
|
||||
if(!referenceDirection){
|
||||
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &point)) return ZD_LOOKUP_PARSE_ERROR;
|
||||
}else{
|
||||
if(referenceDirection > 0){
|
||||
/* Read reference forward */
|
||||
if(!ZDDecodeVariableLengthUnsigned(library, &referenceStart, &point)) return ZD_LOOKUP_PARSE_ERROR;
|
||||
if(referenceStart >= referenceEnd){
|
||||
referenceDone = 1;
|
||||
}
|
||||
}else if(referenceDirection < 0){
|
||||
/* Read reference backwards */
|
||||
//TODO: This code is wrong (doh)
|
||||
if(!ZDDecodeVariableLengthUnsignedReverse(library, &referenceStart, &point)) return ZD_LOOKUP_PARSE_ERROR;
|
||||
if(referenceStart <= referenceEnd){
|
||||
referenceDone = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: special marker during reference mode is an error
|
||||
if(!point){
|
||||
/* This is a special marker */
|
||||
if(referenceDirection){
|
||||
printf("BUG, marker in reference mode?\n");
|
||||
exit(10);
|
||||
}
|
||||
|
||||
uint64_t value;
|
||||
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, &value)) return ZD_LOOKUP_PARSE_ERROR;
|
||||
|
||||
if(value == 0){
|
||||
done = 1;
|
||||
}else if(value == 1){
|
||||
int32_t diff;
|
||||
int64_t start;
|
||||
if(!ZDDecodeVariableLengthUnsigned(library, &polygonIndex, (uint64_t*)&start)) return ZD_LOOKUP_PARSE_ERROR;
|
||||
if(!ZDDecodeVariableLengthSigned(library, &polygonIndex, &diff)) return ZD_LOOKUP_PARSE_ERROR;
|
||||
|
||||
referenceStart = library->dataOffset+(uint32_t)start;
|
||||
referenceEnd = library->dataOffset+(uint32_t)(start + diff);
|
||||
referenceDirection = diff;
|
||||
if(diff < 0){
|
||||
referenceStart--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}else{
|
||||
ZDDecodePoint(point, &diffLat, &diffLon);
|
||||
if(referenceDirection < 0){
|
||||
diffLat = -diffLat;
|
||||
diffLon = -diffLon;
|
||||
}
|
||||
}
|
||||
|
||||
if(!done){
|
||||
|
|
@ -330,7 +402,7 @@ static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polyg
|
|||
firstLon = pointLon;
|
||||
}
|
||||
} else {
|
||||
/* The polygons should be closed, but just in case */
|
||||
/* Close the polygon (the closing point is not encoded) */
|
||||
pointLat = firstLat;
|
||||
pointLon = firstLon;
|
||||
}
|
||||
|
|
@ -451,6 +523,10 @@ static ZDLookupResult ZDPointInPolygon(const ZoneDetect *library, uint32_t polyg
|
|||
if(first){
|
||||
first = 0;
|
||||
}
|
||||
|
||||
if(referenceDone){
|
||||
referenceDirection = 0;
|
||||
}
|
||||
}while(!done);
|
||||
|
||||
if(winding == -4) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue