From 5578f18e10c255779ea6d8e80970202c9157b9d8 Mon Sep 17 00:00:00 2001
From: mattiZed <mattized@users.noreply.github.com>
Date: Sun, 28 Apr 2024 20:08:48 +0200
Subject: [PATCH 1/5] refactor: encode uses LUT, also work on string buffer
 reference directly refactor: decode uses LUT, improve delta computation

---
 src/lib.rs | 101 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 72 insertions(+), 29 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 96cfedc..c712393 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -46,23 +46,26 @@ where
     }
 }
 
-fn encode(current: f64, previous: f64, factor: i32) -> Result<String, String> {
-    let current = scale(current, factor);
-    let previous = scale(previous, factor);
-    let mut coordinate = (current - previous) << 1;
-    if (current - previous) < 0 {
-        coordinate = !coordinate;
+fn encode(delta: i64, encoded: &mut String) {
+    let mut value = delta << 1;
+    if value < 0 {
+        value = !value;
     }
-    let mut output: String = "".to_string();
-    while coordinate >= 0x20 {
-        let from_char = char::from_u32(((0x20 | (coordinate & 0x1f)) + 63) as u32)
-            .ok_or("Couldn't convert character")?;
-        output.push(from_char);
-        coordinate >>= 5;
+    _encode(value as u64, encoded);
+}
+
+fn _encode(mut value: u64, result: &mut String) {
+    const ENCODING_TABLE: &str =
+        "?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
+
+    while value >= 0x20 {
+        let pos = (value & 0x1F) | 0x20;
+        let from_char = ENCODING_TABLE.as_bytes()[pos as usize] as char;
+        result.push(from_char);
+        value >>= 5;
     }
-    let from_char = char::from_u32((coordinate + 63) as u32).ok_or("Couldn't convert character")?;
-    output.push(from_char);
-    Ok(output)
+    let from_char = ENCODING_TABLE.as_bytes()[value as usize] as char;
+    result.push(from_char);
 }
 
 /// Encodes a Google Encoded Polyline.
@@ -82,20 +85,23 @@ where
 {
     let base: i32 = 10;
     let factor: i32 = base.pow(precision);
+    let mut encoded: String = "".to_string();
 
-    let mut output = "".to_string();
-    let mut b = Coord { x: 0.0, y: 0.0 };
+    let mut current: Coord<i64> = Coord { x: 0, y: 0 };
+    let mut previous: Coord<i64> = Coord { x: 0, y: 0 };
 
     for (i, a) in coordinates.into_iter().enumerate() {
+        current.x = scale(a.x, factor);
+        current.y = scale(a.y, factor);
         check(a.y, (MIN_LATITUDE, MAX_LATITUDE))
             .map_err(|e| format!("Latitude error at position {0}: {1}", i, e))?;
         check(a.x, (MIN_LONGITUDE, MAX_LONGITUDE))
             .map_err(|e| format!("Longitude error at position {0}: {1}", i, e))?;
-        output = output + &encode(a.y, b.y, factor)?;
-        output = output + &encode(a.x, b.x, factor)?;
-        b = a;
+        encode(current.y - previous.y, &mut encoded);
+        encode(current.x - previous.x, &mut encoded);
+        previous = current;
     }
-    Ok(output)
+    Ok(encoded)
 }
 
 /// Decodes a Google Encoded Polyline.
@@ -135,16 +141,46 @@ pub fn decode_polyline(polyline: &str, precision: u32) -> Result<LineString<f64>
 }
 
 fn trans(chars: &[u8], mut index: usize) -> Result<(i64, usize), String> {
+    #[rustfmt::skip]
+    const DECODING_TABLE: &[i8] = &[
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1,  0,  1,  2,  3,  4,  5,  6,
+         7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+        17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+        27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+        37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
+        47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+        57, 58, 59, 60, 61, 62, 63, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+        -1, -1, -1, -1, -1, -1,
+    ];
+
     let mut shift = 0;
     let mut result = 0;
     let mut byte;
     loop {
-        byte = chars[index] as u64;
-        if byte < 63 || (shift > 64 - 5) {
+        byte = DECODING_TABLE[chars[index] as usize];
+        if byte < 1 {
             return Err(format!("Cannot decode character at index {}", index));
         }
-        byte -= 63;
-        result |= (byte & 0x1f) << shift;
+        result |= (byte as u64 & 0x1f) << shift;
         index += 1;
         shift += 5;
         if byte < 0x20 {
@@ -237,15 +273,22 @@ mod tests {
     // TODO: handle this case in the future?
     fn broken_string() {
         let s = "_p~iF~ps|U_u🗑lLnnqC_mqNvxq`@";
-        let res = vec![[-120.2, 38.5], [-120.95, 2306360.53104], [-126.453, 2306363.08304]].into();
-        assert_eq!(decode_polyline(s, 5).unwrap(), res);
+        let expected: Result<_, _> = Err("Cannot decode character at index 12".to_string());
+        assert_eq!(decode_polyline(s, 5), expected);
     }
 
     #[test]
-    #[should_panic]
     fn invalid_string() {
         let s = "invalid_polyline_that_should_be_handled_gracefully";
-        decode_polyline(s, 6).unwrap();
+        let expected: Result<_, _> = Err("Cannot decode character at index 12".to_string());
+        assert_eq!(decode_polyline(s, 5), expected);
+    }
+
+    #[test]
+    fn another_invalid_string() {
+        let s = "ugh_ugh";
+        let expected: Result<_, _> = Err("Cannot decode character at index 12".to_string());
+        assert_eq!(decode_polyline(s, 5), expected);
     }
 
     #[test]

From 10ef75507c91f32044dbedfcca30f6422a3fbae3 Mon Sep 17 00:00:00 2001
From: mattiZed <mattiZed@users.noreply.github.com>
Date: Mon, 29 Apr 2024 10:27:15 +0200
Subject: [PATCH 2/5] fix: differentiate character and overflow errors feat:
 provide naive and LUT-based algorithms for both encode and decode

---
 src/lib.rs | 122 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 93 insertions(+), 29 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index c712393..5da49f9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -46,26 +46,14 @@ where
     }
 }
 
-fn encode(delta: i64, encoded: &mut String) {
-    let mut value = delta << 1;
-    if value < 0 {
-        value = !value;
+// Compute the maximum shift allowed/needed for representing longitude/latitude deltas,
+// based on precision
+fn max_shift(factor: i64, lat: bool) -> u8 {
+    if lat {
+        ((180 * factor).ilog2() + 1) as u8
+    } else {
+        ((360 * factor).ilog2() + 1) as u8
     }
-    _encode(value as u64, encoded);
-}
-
-fn _encode(mut value: u64, result: &mut String) {
-    const ENCODING_TABLE: &str =
-        "?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
-
-    while value >= 0x20 {
-        let pos = (value & 0x1F) | 0x20;
-        let from_char = ENCODING_TABLE.as_bytes()[pos as usize] as char;
-        result.push(from_char);
-        value >>= 5;
-    }
-    let from_char = ENCODING_TABLE.as_bytes()[value as usize] as char;
-    result.push(from_char);
 }
 
 /// Encodes a Google Encoded Polyline.
@@ -104,6 +92,44 @@ where
     Ok(encoded)
 }
 
+// Polyline-encoder entrypoint, expects a scaled (and valid) longitude/latitude delta.
+fn encode(delta: i64, encoded: &mut String) {
+    let mut value = delta << 1;
+    if value < 0 {
+        value = !value;
+    }
+    // value can be encoded using lookup table (_encode() vs _encode_lut).
+    // At least on my machine, plain _encode() is slightly faster (-20%).
+    _encode(value as u64, encoded);
+}
+
+fn _encode(mut value: u64, encoded: &mut String) {
+    while value >= 0x20 {
+        // apply 5bit mask to value and set 6th bit to indicate there is another chunk
+        // needed to encode "value"
+        let byte = ((value & 0b11111) | 0b100000) as u8;
+        encoded.push((byte + 63) as char);
+        value >>= 5;
+    }
+    // encode the remainder
+    encoded.push((value as u8 + 63) as char);
+}
+
+// Approach for optimizing _encode(), utilizing a lookup table.
+fn _encode_lut(mut value: u64, encoded: &mut String) {
+    const ENCODING_TABLE: &str =
+        "?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
+
+    while value >= 0b100000 {
+        let pos = (value & 0b11111) | 0b100000;
+        let from_char = ENCODING_TABLE.as_bytes()[pos as usize] as char;
+        encoded.push(from_char);
+        value >>= 5;
+    }
+    let from_char = ENCODING_TABLE.as_bytes()[value as usize] as char;
+    encoded.push(from_char);
+}
+
 /// Decodes a Google Encoded Polyline.
 ///
 /// # Examples
@@ -123,12 +149,15 @@ pub fn decode_polyline(polyline: &str, precision: u32) -> Result<LineString<f64>
 
     let chars = polyline.as_bytes();
 
+    // polyline can be decoded using lookup table (_decode() vs _decode_lut).
+    // At least on my machine, _decode_lut() is slightly faster (~20-25%).
     while index < chars.len() {
-        let (latitude_change, new_index) = trans(chars, index)?;
+        let (latitude_change, new_index) = _decode_lut(chars, index, max_shift(factor, true))?;
         if new_index >= chars.len() {
             break;
         }
-        let (longitude_change, new_index) = trans(chars, new_index)?;
+        let (longitude_change, new_index) =
+            _decode_lut(chars, new_index, max_shift(factor, false))?;
         index = new_index;
 
         lat += latitude_change;
@@ -140,7 +169,40 @@ pub fn decode_polyline(polyline: &str, precision: u32) -> Result<LineString<f64>
     Ok(coordinates.into())
 }
 
-fn trans(chars: &[u8], mut index: usize) -> Result<(i64, usize), String> {
+fn _decode(chars: &[u8], mut index: usize, max_shift: u8) -> Result<(i64, usize), String> {
+    let mut shift = 0;
+    let mut value = 0;
+    let mut byte;
+
+    loop {
+        byte = chars[index] as u64;
+        if byte < 63 || byte > 127 {
+            return Err(format!("Cannot decode character at index {index}"));
+        }
+        byte -= 63;
+        // apply 5bit mask to byte and reconstruct value
+        value |= (byte & 0b11111) << shift;
+        shift += 5;
+        index += 1;
+        // if the 6th bit is not set, we are done
+        if byte < 0b100000 {
+            break;
+        } else if shift > max_shift {
+            // break-criterion didn't hit and next iteration would provoke
+            // an out-of-bounds longitude/latitude.
+            return Err(format!("Overflow due to character at index {}", index - 1));
+        }
+    }
+
+    let delta = if (value & 1) > 0 {
+        !(value >> 1)
+    } else {
+        value >> 1
+    } as i64;
+    Ok((delta, index))
+}
+
+fn _decode_lut(chars: &[u8], mut index: usize, max_shift: u8) -> Result<(i64, usize), String> {
     #[rustfmt::skip]
     const DECODING_TABLE: &[i8] = &[
         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -177,14 +239,18 @@ fn trans(chars: &[u8], mut index: usize) -> Result<(i64, usize), String> {
     let mut byte;
     loop {
         byte = DECODING_TABLE[chars[index] as usize];
-        if byte < 1 {
+        if byte < 0 {
             return Err(format!("Cannot decode character at index {}", index));
         }
         result |= (byte as u64 & 0x1f) << shift;
-        index += 1;
         shift += 5;
-        if byte < 0x20 {
+        index += 1;
+        if byte < 0b100000 {
             break;
+        } else if shift > max_shift {
+            // break-criterion didn't hit and next iteration would provoke
+            // an out-of-bounds longitude/latitude.
+            return Err(format!("Overflow due to character at index {}", index - 1));
         }
     }
 
@@ -269,8 +335,6 @@ mod tests {
     }
 
     #[test]
-    // emoji is decodable but messes up data
-    // TODO: handle this case in the future?
     fn broken_string() {
         let s = "_p~iF~ps|U_u🗑lLnnqC_mqNvxq`@";
         let expected: Result<_, _> = Err("Cannot decode character at index 12".to_string());
@@ -280,14 +344,14 @@ mod tests {
     #[test]
     fn invalid_string() {
         let s = "invalid_polyline_that_should_be_handled_gracefully";
-        let expected: Result<_, _> = Err("Cannot decode character at index 12".to_string());
+        let expected: Result<_, _> = Err("Overflow due to character at index 5".to_string());
         assert_eq!(decode_polyline(s, 5), expected);
     }
 
     #[test]
     fn another_invalid_string() {
         let s = "ugh_ugh";
-        let expected: Result<_, _> = Err("Cannot decode character at index 12".to_string());
+        let expected: Result<_, _> = Err("Overflow due to character at index 5".to_string());
         assert_eq!(decode_polyline(s, 5), expected);
     }
 

From 66c8af59e63679ba833b00e339fc2eb5a9da4cc4 Mon Sep 17 00:00:00 2001
From: mattiZed <mattiZed@users.noreply.github.com>
Date: Mon, 29 Apr 2024 10:36:41 +0200
Subject: [PATCH 3/5] chore: update changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 02f5aba..a4d4c5a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 ## Unreleased
 
+* inspired by HERE's [flexpolyline](https://github.com/heremaps/flexible-polyline):
+  * provide alt. implementation of both encode() and decode() based on LUT
+  * rework encoder delta-computation (performance improvement)
+* correctly differentiate character and overflow errors when decoding
+
 ## 0.10.2
 
 * fix decoder crashing with out-of-bounds error (https://github.com/georust/polyline/pull/37):

From 094810848227997aa934253df5a90dc454396829 Mon Sep 17 00:00:00 2001
From: mattiZed <mattized@users.noreply.github.com>
Date: Mon, 29 Apr 2024 20:07:05 +0200
Subject: [PATCH 4/5] style: also use binary literal in decode_lut

---
 src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index 5da49f9..b29b55f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -242,7 +242,7 @@ fn _decode_lut(chars: &[u8], mut index: usize, max_shift: u8) -> Result<(i64, us
         if byte < 0 {
             return Err(format!("Cannot decode character at index {}", index));
         }
-        result |= (byte as u64 & 0x1f) << shift;
+        result |= (byte as u64 & 0b11111) << shift;
         shift += 5;
         index += 1;
         if byte < 0b100000 {

From e11dd90861c7ae419adce8e645ae85db32c38d82 Mon Sep 17 00:00:00 2001
From: mattiZed <mattiZed@users.noreply.github.com>
Date: Thu, 2 May 2024 07:21:29 +0200
Subject: [PATCH 5/5] revert: remove string allocation/concatenation
 improvement for better traceability of performance gains

---
 CHANGELOG.md |  2 +-
 src/lib.rs   | 16 ++++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a4d4c5a..7a418b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@
 
 * inspired by HERE's [flexpolyline](https://github.com/heremaps/flexible-polyline):
   * provide alt. implementation of both encode() and decode() based on LUT
-  * rework encoder delta-computation (performance improvement)
+  * rework encoder delta-computation
 * correctly differentiate character and overflow errors when decoding
 
 ## 0.10.2
diff --git a/src/lib.rs b/src/lib.rs
index b29b55f..c7745cd 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -85,25 +85,26 @@ where
             .map_err(|e| format!("Latitude error at position {0}: {1}", i, e))?;
         check(a.x, (MIN_LONGITUDE, MAX_LONGITUDE))
             .map_err(|e| format!("Longitude error at position {0}: {1}", i, e))?;
-        encode(current.y - previous.y, &mut encoded);
-        encode(current.x - previous.x, &mut encoded);
+        encoded = encoded + &encode(current.y - previous.y);
+        encoded = encoded + &encode(current.x - previous.x);
         previous = current;
     }
     Ok(encoded)
 }
 
 // Polyline-encoder entrypoint, expects a scaled (and valid) longitude/latitude delta.
-fn encode(delta: i64, encoded: &mut String) {
+fn encode(delta: i64) -> String {
     let mut value = delta << 1;
     if value < 0 {
         value = !value;
     }
     // value can be encoded using lookup table (_encode() vs _encode_lut).
     // At least on my machine, plain _encode() is slightly faster (-20%).
-    _encode(value as u64, encoded);
+    return _encode(value as u64);
 }
 
-fn _encode(mut value: u64, encoded: &mut String) {
+fn _encode(mut value: u64) -> String {
+    let mut encoded: String = String::new();
     while value >= 0x20 {
         // apply 5bit mask to value and set 6th bit to indicate there is another chunk
         // needed to encode "value"
@@ -113,10 +114,12 @@ fn _encode(mut value: u64, encoded: &mut String) {
     }
     // encode the remainder
     encoded.push((value as u8 + 63) as char);
+    return encoded
 }
 
 // Approach for optimizing _encode(), utilizing a lookup table.
-fn _encode_lut(mut value: u64, encoded: &mut String) {
+fn _encode_lut(mut value: u64) -> String {
+    let mut encoded: String = String::new();
     const ENCODING_TABLE: &str =
         "?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
 
@@ -128,6 +131,7 @@ fn _encode_lut(mut value: u64, encoded: &mut String) {
     }
     let from_char = ENCODING_TABLE.as_bytes()[value as usize] as char;
     encoded.push(from_char);
+    return encoded
 }
 
 /// Decodes a Google Encoded Polyline.