From e5699fa8c157d0fbf22d2373fbde7f83254edecc Mon Sep 17 00:00:00 2001 From: Xavier Dufour Date: Sun, 19 Jan 2025 16:16:53 -0500 Subject: [PATCH 1/7] Added optional buffer in get_ch() for line continuation consumption --- pnut.c | 29 ++++++++++ tests/_exe/line_continuation.c | 89 +++++++++++++++++++++++++++++ tests/_exe/line_continuation.golden | 1 + tests/_sh/line_continuation.c | 19 ++++++ tests/_sh/line_continuation.golden | 1 + 5 files changed, 139 insertions(+) create mode 100644 tests/_exe/line_continuation.c create mode 100644 tests/_exe/line_continuation.golden create mode 100644 tests/_sh/line_continuation.c create mode 100644 tests/_sh/line_continuation.golden diff --git a/pnut.c b/pnut.c index 2ebe14cc..3b4a561c 100644 --- a/pnut.c +++ b/pnut.c @@ -44,6 +44,8 @@ // Use positional parameter directly for function parameters that are constants #define OPTIMIZE_CONSTANT_PARAM_not #define SUPPORT_ADDRESS_OF_OP_not +// Make get_ch() use a length-1 character buffer to lookahead and skip line continuations +#define SUPPORT_LINE_CONTINUATION // Shell backend codegen options #ifndef SH_AVOID_PRINTF_USE_NOT @@ -242,7 +244,11 @@ void syntax_error(char *msg) { // tokenizer +#define CHBUF_SIZE 1 int ch; +int chbuf[CHBUF_SIZE]; +int chbuf_head = -1; +int chbuf_tail = 0; #ifdef DEBUG_EXPAND_INCLUDES int prev_ch = EOF; #endif @@ -607,7 +613,30 @@ void output_declaration_c_code(bool no_header) { #endif void get_ch() { +#ifdef SUPPORT_LINE_CONTINUATION + if(chbuf_head > -1) { + ch = chbuf[chbuf_head++]; + if(chbuf_head == chbuf_tail) + chbuf_head = -1; + } else { + ch = fgetc(fp); + + if(ch == '\\') { + ch = fgetc(fp); + + if(ch != '\n'){ // The character isn't a newline, so this is an escape sequence: + chbuf[0] = ch; // Put the character back in the character buffer for future consumption + chbuf_tail = 1; // Set the character buffer size + chbuf_head = 0; // Set the pointer to the character buffer + ch = '\\'; // Restore the character that was read on this call + } else { // The character is a newline, so this is a line continuation which we want to bypass + ch = fgetc(fp); // Consume yet another character, the next one for logical parsing + } + } + } +#else ch = fgetc(fp); +#endif if (ch == EOF) { // If it's not the last file on the stack, EOF means that we need to switch to the next file if (include_stack->next != 0) { diff --git a/tests/_exe/line_continuation.c b/tests/_exe/line_continuation.c new file mode 100644 index 00000000..cb80916f --- /dev/null +++ b/tests/_exe/line_continuation.c @@ -0,0 +1,89 @@ +#include + +#ifdef PNUT_CC + +typedef char *va_list; + +#define va_start(ap,last) ap = ((char *)&(last)) + ((sizeof last+3)&~3) +#define va_arg(ap,type) (ap += (sizeof(type)+3)&~3, *(type *)(ap - ((sizeof(type)+3)&~3))) +#define va_end(ap) + +#else + +#include + +#endif + +void putint_aux(int n, int base) { + int d = n % base; + int top = n / base; + if (n == 0) return; + putint_aux(top, base); + putchar("0123456789abcdef"[d & 15]); +} + +void putint(int n, int base) { + if (n < 0) { + putchar('-'); + putint_aux(-n, base); + } else { + putint_aux(n, base); + } +} + +void putstr(char *s) { + while (*s) { + putchar(*s); + s++; + } +} + +// A simple version of printf that supports %d, %c, %x, %s +void simple_printf(char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + while (*fmt) { + if (*fmt == '%') { + fmt++; + switch (*fmt) { + case 'd': + putint(va_arg(ap, int), 10); + break; + case 'c': + putchar(va_arg(ap, int)); + break; + case 'x': + putint(va_arg(ap, int), 16); + break; + case 's': + putstr(va_arg(ap, char *)); + break; + default: + putchar(*fmt); + break; + } + } else { + putchar(*fmt); + } + fmt++; + } + va_end(ap); +} + +void main() { + +/**/ +int foo = 0; + +/\ +* +*/ fo\ +o +\ += 0\ +x\ +10\ +200; + + simple_printf("%d", foo); + return 0; +} diff --git a/tests/_exe/line_continuation.golden b/tests/_exe/line_continuation.golden new file mode 100644 index 00000000..ff66ef74 --- /dev/null +++ b/tests/_exe/line_continuation.golden @@ -0,0 +1 @@ +66048 \ No newline at end of file diff --git a/tests/_sh/line_continuation.c b/tests/_sh/line_continuation.c new file mode 100644 index 00000000..a3918fa9 --- /dev/null +++ b/tests/_sh/line_continuation.c @@ -0,0 +1,19 @@ +#include + +void main() { + +/**/ +int foo = 0; + +/\ +* +*/ fo\ +o +\ += 0\ +x\ +10\ +200; + + printf("%d", foo); + return 0; +} diff --git a/tests/_sh/line_continuation.golden b/tests/_sh/line_continuation.golden new file mode 100644 index 00000000..ff66ef74 --- /dev/null +++ b/tests/_sh/line_continuation.golden @@ -0,0 +1 @@ +66048 \ No newline at end of file From 97d8b2f7d7104b6a79c3915b3c4a2ac37d567d1c Mon Sep 17 00:00:00 2001 From: Xavier Dufour Date: Sun, 19 Jan 2025 19:57:23 -0500 Subject: [PATCH 2/7] Changes following PR comments --- pnut.c | 7 +- tests/_all/line_continuation.c | 36 ++++++++ tests/{_exe => _all}/line_continuation.golden | 0 tests/_exe/line_continuation.c | 89 ------------------- tests/_sh/line_continuation.c | 19 ---- tests/_sh/line_continuation.golden | 1 - 6 files changed, 42 insertions(+), 110 deletions(-) create mode 100644 tests/_all/line_continuation.c rename tests/{_exe => _all}/line_continuation.golden (100%) delete mode 100644 tests/_exe/line_continuation.c delete mode 100644 tests/_sh/line_continuation.c delete mode 100644 tests/_sh/line_continuation.golden diff --git a/pnut.c b/pnut.c index 3b4a561c..8493f284 100644 --- a/pnut.c +++ b/pnut.c @@ -616,8 +616,9 @@ void get_ch() { #ifdef SUPPORT_LINE_CONTINUATION if(chbuf_head > -1) { ch = chbuf[chbuf_head++]; - if(chbuf_head == chbuf_tail) + if(chbuf_head == chbuf_tail) { chbuf_head = -1; + } } else { ch = fgetc(fp); @@ -631,6 +632,10 @@ void get_ch() { ch = '\\'; // Restore the character that was read on this call } else { // The character is a newline, so this is a line continuation which we want to bypass ch = fgetc(fp); // Consume yet another character, the next one for logical parsing +#ifdef INCLUDE_LINE_NUMBER_ON_ERROR + line_number += 1; + column_number = 0; +#endif } } } diff --git a/tests/_all/line_continuation.c b/tests/_all/line_continuation.c new file mode 100644 index 00000000..2aef9343 --- /dev/null +++ b/tests/_all/line_continuation.c @@ -0,0 +1,36 @@ +#include + +void putint_aux(int n, int base) { + int d = n % base; + int top = n / base; + if (n == 0) return; + putint_aux(top, base); + putchar("0123456789abcdef"[d & 15]); +} + +void putint(int n, int base) { + if (n < 0) { + putchar('-'); + putint_aux(-n, base); + } else { + putint_aux(n, base); + } +} + +void main() { + +/**/ +int foo = 0; + +/\ +* +*/ fo\ +o +\ += 0\ +x\ +10\ +200; + + putint(foo, 10); + return 0; +} \ No newline at end of file diff --git a/tests/_exe/line_continuation.golden b/tests/_all/line_continuation.golden similarity index 100% rename from tests/_exe/line_continuation.golden rename to tests/_all/line_continuation.golden diff --git a/tests/_exe/line_continuation.c b/tests/_exe/line_continuation.c deleted file mode 100644 index cb80916f..00000000 --- a/tests/_exe/line_continuation.c +++ /dev/null @@ -1,89 +0,0 @@ -#include - -#ifdef PNUT_CC - -typedef char *va_list; - -#define va_start(ap,last) ap = ((char *)&(last)) + ((sizeof last+3)&~3) -#define va_arg(ap,type) (ap += (sizeof(type)+3)&~3, *(type *)(ap - ((sizeof(type)+3)&~3))) -#define va_end(ap) - -#else - -#include - -#endif - -void putint_aux(int n, int base) { - int d = n % base; - int top = n / base; - if (n == 0) return; - putint_aux(top, base); - putchar("0123456789abcdef"[d & 15]); -} - -void putint(int n, int base) { - if (n < 0) { - putchar('-'); - putint_aux(-n, base); - } else { - putint_aux(n, base); - } -} - -void putstr(char *s) { - while (*s) { - putchar(*s); - s++; - } -} - -// A simple version of printf that supports %d, %c, %x, %s -void simple_printf(char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - while (*fmt) { - if (*fmt == '%') { - fmt++; - switch (*fmt) { - case 'd': - putint(va_arg(ap, int), 10); - break; - case 'c': - putchar(va_arg(ap, int)); - break; - case 'x': - putint(va_arg(ap, int), 16); - break; - case 's': - putstr(va_arg(ap, char *)); - break; - default: - putchar(*fmt); - break; - } - } else { - putchar(*fmt); - } - fmt++; - } - va_end(ap); -} - -void main() { - -/**/ -int foo = 0; - -/\ -* -*/ fo\ -o +\ -= 0\ -x\ -10\ -200; - - simple_printf("%d", foo); - return 0; -} diff --git a/tests/_sh/line_continuation.c b/tests/_sh/line_continuation.c deleted file mode 100644 index a3918fa9..00000000 --- a/tests/_sh/line_continuation.c +++ /dev/null @@ -1,19 +0,0 @@ -#include - -void main() { - -/**/ -int foo = 0; - -/\ -* -*/ fo\ -o +\ -= 0\ -x\ -10\ -200; - - printf("%d", foo); - return 0; -} diff --git a/tests/_sh/line_continuation.golden b/tests/_sh/line_continuation.golden deleted file mode 100644 index ff66ef74..00000000 --- a/tests/_sh/line_continuation.golden +++ /dev/null @@ -1 +0,0 @@ -66048 \ No newline at end of file From 23b6ec2640dd7501167f852c457857c637b368cb Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 19 Feb 2025 18:43:36 -0500 Subject: [PATCH 3/7] Simplify get_ch by splitting it in 2 --- pnut.c | 63 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/pnut.c b/pnut.c index dfea6691..0ef6c1d7 100644 --- a/pnut.c +++ b/pnut.c @@ -49,8 +49,9 @@ // Use positional parameter directly for function parameters that are constants #define OPTIMIZE_CONSTANT_PARAM_not #define SUPPORT_ADDRESS_OF_OP_not + // Make get_ch() use a length-1 character buffer to lookahead and skip line continuations -#define SUPPORT_LINE_CONTINUATION +#define SUPPORT_LINE_CONTINUATION_not // Shell backend codegen options #ifndef SH_AVOID_PRINTF_USE_NOT @@ -249,11 +250,7 @@ void syntax_error(char *msg) { // tokenizer -#define CHBUF_SIZE 1 int ch; -int chbuf[CHBUF_SIZE]; -int chbuf_head = -1; -int chbuf_tail = 0; #ifdef DEBUG_EXPAND_INCLUDES int prev_ch = EOF; #endif @@ -762,36 +759,44 @@ void output_declaration_c_code(bool no_header) { } #endif -void get_ch() { #ifdef SUPPORT_LINE_CONTINUATION - if(chbuf_head > -1) { - ch = chbuf[chbuf_head++]; - if(chbuf_head == chbuf_tail) { - chbuf_head = -1; - } - } else { - ch = fgetc(fp); - - if(ch == '\\') { - ch = fgetc(fp); - - if(ch != '\n'){ // The character isn't a newline, so this is an escape sequence: - chbuf[0] = ch; // Put the character back in the character buffer for future consumption - chbuf_tail = 1; // Set the character buffer size - chbuf_head = 0; // Set the pointer to the character buffer - ch = '\\'; // Restore the character that was read on this call - } else { // The character is a newline, so this is a line continuation which we want to bypass - ch = fgetc(fp); // Consume yet another character, the next one for logical parsing -#ifdef INCLUDE_LINE_NUMBER_ON_ERROR - line_number += 1; - column_number = 0; -#endif +// get_ch_ is reponsible for reading the next character from the input file, +// switching to the next file if necessary and updating the line number. +// get_ch is then responsible for skipping line continuations. +void get_ch_(); + +int line_continutation_prev_char = -2; // -1 is EOF, -2 is uninitialized +void get_ch() { + if (line_continutation_prev_char == -2) { + while (1) { // Loop as long as we're reading line continuations + get_ch_(); // Read the next character + if (ch == '\\') { + get_ch_(); // Skip backslash + if (ch == '\n') { + continue; // Loop again to read the next character + } else { + // '\' is not followed by newline, so we save the current character + // and make '\' the current character + line_continutation_prev_char = ch; + ch = '\\'; + break; + } + } else { + break; } } + } else { + ch = line_continutation_prev_char; + line_continutation_prev_char = -2; } +} + +void get_ch_() { #else - ch = fgetc(fp); +void get_ch() { #endif + ch = fgetc(fp); + if (ch == EOF) { // If it's not the last file on the stack, EOF means that we need to switch to the next file if (include_stack->next != 0) { From 391e186d173de7bafde7d46498ec3e58a4e7811d Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 19 Feb 2025 18:44:01 -0500 Subject: [PATCH 4/7] Add consecutive line continuations to test file --- tests/_all/line_continuation.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/_all/line_continuation.c b/tests/_all/line_continuation.c index 2aef9343..5f5ce00c 100644 --- a/tests/_all/line_continuation.c +++ b/tests/_all/line_continuation.c @@ -1,3 +1,4 @@ +// comp_pnut_opt: -DSUPPORT_LINE_CONTINUATION #include void putint_aux(int n, int base) { @@ -17,7 +18,7 @@ void putint(int n, int base) { } } -void main() { +int main() { /**/ int foo = 0; @@ -25,6 +26,8 @@ int foo = 0; /\ * */ fo\ +\ +\ o +\ = 0\ x\ @@ -33,4 +36,4 @@ x\ putint(foo, 10); return 0; -} \ No newline at end of file +} From 8421f1567435fad15e09c1e6b9775d18e9a13f4b Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 20 Feb 2025 09:10:58 -0500 Subject: [PATCH 5/7] Rewrite putint for zsh compatibility --- tests/_all/line_continuation.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/_all/line_continuation.c b/tests/_all/line_continuation.c index 5f5ce00c..efddfca2 100644 --- a/tests/_all/line_continuation.c +++ b/tests/_all/line_continuation.c @@ -1,20 +1,17 @@ // comp_pnut_opt: -DSUPPORT_LINE_CONTINUATION #include -void putint_aux(int n, int base) { - int d = n % base; - int top = n / base; - if (n == 0) return; - putint_aux(top, base); - putchar("0123456789abcdef"[d & 15]); +void putint_aux(int n) { + if (n >= 10) putint_aux(n / 10); + putchar('0' + (n % 10)); } -void putint(int n, int base) { +void putint(int n) { if (n < 0) { putchar('-'); - putint_aux(-n, base); + putint_aux(-n); } else { - putint_aux(n, base); + putint_aux(n); } } @@ -34,6 +31,6 @@ x\ 10\ 200; - putint(foo, 10); + putint(foo); return 0; } From b7d57f48044aebbe0fd23a769dc0c5b1a6d0d173 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 20 Feb 2025 11:07:57 -0500 Subject: [PATCH 6/7] Change constant in test for zsh compatibility zsh doesn't handle octal and hex literals properly, so use a decimal literal instead. --- tests/_all/line_continuation.c | 4 ++-- tests/_all/line_continuation.golden | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/_all/line_continuation.c b/tests/_all/line_continuation.c index efddfca2..ad8dff4f 100644 --- a/tests/_all/line_continuation.c +++ b/tests/_all/line_continuation.c @@ -26,8 +26,8 @@ int foo = 0; \ \ o +\ -= 0\ -x\ += 1\ +\ 10\ 200; diff --git a/tests/_all/line_continuation.golden b/tests/_all/line_continuation.golden index ff66ef74..65c7afb4 100644 --- a/tests/_all/line_continuation.golden +++ b/tests/_all/line_continuation.golden @@ -1 +1 @@ -66048 \ No newline at end of file +110200 \ No newline at end of file From c4656ee0ced96ea2c42ae3af2f7ffa2c5ce9699e Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 20 Feb 2025 11:08:36 -0500 Subject: [PATCH 7/7] pnut-sh: add leading 0 to octal literal --- sh.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sh.c b/sh.c index 01476a25..62d8393a 100644 --- a/sh.c +++ b/sh.c @@ -281,6 +281,7 @@ void print_escaped_text(text t, bool for_printf) { putchar('0'); putchar('x'); puthex_unsigned(TEXT_TO_INT(text_pool[t + 1])); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER_OCT)) { + putchar('0'); // Note: This is not supported by zsh by default putoct_unsigned(TEXT_TO_INT(text_pool[t + 1])); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_STRING)) { print_escaped_string((char*) text_pool[t + 1], (char*) text_pool[t + 2], for_printf); @@ -315,6 +316,7 @@ void print_text(text t) { putchar('0'); putchar('x'); puthex_unsigned(TEXT_TO_INT(text_pool[t + 1])); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER_OCT)) { + putchar('0'); // Note: This is not supported by zsh by default putoct_unsigned(TEXT_TO_INT(text_pool[t + 1])); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_STRING)) { if (TEXT_TO_INT(text_pool[t + 2]) == 0) { // null-terminated string