Update parser documentation

cppalliance · Jan 29, 2025 · ec15a38 · ec15a38
1 parent 7f348db
commit ec15a38
Show file tree

Hide file tree

Showing 5 changed files with 451 additions and 216 deletions.
diff --git a/doc/modules/ROOT/pages/design_requirements/parser.adoc b/doc/modules/ROOT/pages/design_requirements/parser.adoc
@@ -49,6 +49,31 @@ significant computational resources. For example:
   chunk directly after the current without having to perform memory movement
   due to the existence of a chunk header.
 
+== Two-Phase parsing
+
+The parser must return immediately after parsing the header and must not process
+the body until the next `parse()` call. For bodiless messages and head
+responses, it must transition directly to the `complete_in_place` state after
+parsing the header, making further `parse()` calls unnecessary (but still
+valid).
+
+This two-phase parsing brings a few benefits with almost no complications on the
+usage side of the API:
+
+- It introduces an optimization opportunity for users who want to attach a body
+  immediately after the header is parsed (which seems to be the case most of the
+  time), as there is no need to dedicate an internal buffer for the message
+  body. This means all the extra space can be utilized for the input buffer.
+- Because parsing the body might result in an error, returning after the header
+  is parsed allows users to access the header and, on the next call to `parse`,
+  encounter the error.
+- Setting the body limit in the middle of parsing the body or after it doesn't
+  make much sense, so returning right after the header is parsed provides a
+  window for setting such limits.
+- If users attach a body immediately after the header is parsed, we avoid the
+  need for an extra buffer copy operation (in case the user wants to attach an
+  elastic buffer).
+
 == Use Cases and Interfaces
 
 To keep things simple, we will use the following synchronous free functions to
@@ -57,38 +82,55 @@ demonstrate the flow of the parse operation in each example:
 [source,cpp]
 ----
 void
-read_some(stream& s, parser& pr)
+read_some(stream& s, parser& pr, error_code& ec)
 {
-    system::error_code ec;
-    if(pr.need_data())
+    pr.parse(ec);
+    if(ec != condition::need_more_input)
+        return;
+
+    auto n = s.read_some(pr.prepare(), ec);
+    pr.commit(n);
+    if(ec == asio::error::eof)
     {
-        auto n = s.read_some(pr.prepare(), ec);
-        pr.commit(n);
-        if(ec == asio::error::eof)
-        {
-            pr.commit_eof();
-            ec = {};
-        }
-        if(ec.failed())
-            throw system::system_error{ec};
+        pr.commit_eof();
+        ec = {};
     }
+    else if(ec.failed())
+    {
+        return;
+    }
+
     pr.parse(ec);
-    if(ec.failed() && ec != condition::need_more_input)
-        throw system::system_error{ec};
 }
 
 void
 read_header(stream& s, parser& pr)
 {
-    while(!pr.got_header())
-        read_some(s, pr);
+    do
+    {
+        error_code ec;
+        read_some(s, pr, ec);
+        if(ec == condition::need_more_input)
+            continue;
+        if(ec.failed())
+            throw system::system_error(ec);
+    }
+    while(! pr.got_header());
 }
 
 void
 read(stream& s, parser& pr)
-{
-    while(!pr.is_complete())
-        read_some(s, pr);
+{      
+    do
+    {
+        error_code ec;
+        read_some(s, pr, ec);
+        if(ec == condition::need_more_input)
+            continue;
+        if(ec.failed())
+            throw system::system_error(ec);
+    }
+    while(! pr.is_complete());
 }
 ----
 

diff --git a/include/boost/http_proto/header_limits.hpp b/include/boost/http_proto/header_limits.hpp
@@ -43,7 +43,9 @@ struct header_limits
             @li <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-2.1"
                 >2.1.  Message Format (rfc9112)</a>
             @li <a href="https://datatracker.ietf.org/doc/html/rfc9112#section-5"
-                >5.  Field Syntax (rfc9112)</a>
+                >5.  Field Syntax (rfc9112)</a>@see
+            @li <a href="https://stackoverflow.com/questions/686217/maximum-on-http-header-values"
+                >Maximum on HTTP header values (Stackoverflow)</a>
     */
     std::size_t max_size = 8 * 1024;