split heredoc handling in the scanner

2024-09-01 19:56:22 +00:00 · 2024-09-01 19:56:22 +00:00 · 7e1e51e90b
commit 7e1e51e90b
parent 8be7417a61
30 changed files with 663 additions and 416 deletions
--- a/Filelist.sh.mk
+++ b/Filelist.sh.mk
@ -14,21 +14,26 @@ src/hashmap/env/env_utils                                                     \
 src/vec/ast/ast                                                               \
 src/vec/ast/ast_functions2                                                    \
 src/vec/ast/ast_functions3                                                    \
+src/vec/ast/ast_functions4                                                    \
 src/vec/ast/ast_sort                                                          \
 src/vec/estr/estr                                                             \
 src/vec/estr/estr_functions2                                                  \
 src/vec/estr/estr_functions3                                                  \
+src/vec/estr/estr_functions4                                                  \
 src/vec/estr/estr_sort                                                        \
 src/vec/heredoc/heredoc                                                       \
 src/vec/heredoc/heredoc_functions2                                            \
 src/vec/heredoc/heredoc_functions3                                            \
+src/vec/heredoc/heredoc_functions4                                            \
 src/vec/heredoc/heredoc_sort                                                  \
 src/vec/pid/pid                                                               \
 src/vec/pid/pid_functions2                                                    \
 src/vec/pid/pid_functions3                                                    \
+src/vec/pid/pid_functions4                                                    \
 src/vec/pid/pid_sort                                                          \
 src/vec/str/str                                                               \
 src/vec/str/str_functions2                                                    \
 src/vec/str/str_functions3                                                    \
+src/vec/str/str_functions4                                                    \
 src/vec/str/str_sort                                                          \

--- a/input.toml
+++ b/input.toml
@ -5,6 +5,7 @@ sources = [
 	"stdme/generic_sources/src/vec/C__PREFIX___sort.c__TEMPLATE__",
 	"stdme/generic_sources/src/vec/C__PREFIX___functions2.c__TEMPLATE__",
 	"stdme/generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__",
+	"stdme/generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__",
 ]
 replace.C__TYPENAME__ = "type"
 replace.C__TYPEHEADER__ = "header_include"
--- a/output/include/me/vec/vec_ast.h
+++ b/output/include/me/vec/vec_ast.h
@ -120,4 +120,14 @@ void	vec_ast_sort(t_vec_ast		  *vec, t_vec_ast_sort_fn is_sorted);
 /// @return true if the operation failed, false otherwise
 t_error vec_ast_back(t_vec_ast *vec, t_ast_node **out);

+/// @brief Get a pointer to the i'th element, or NULL otherwise
+/// @param vec The vec_ast to get the element from
+/// @return A pointer to the element or NULL
+t_ast_node *vec_ast_get(t_vec_ast *vec, t_usize i);
+
+/// @brief Get a pointer to the last element, or NULL otherwise
+/// @param vec The vec_ast to get the element from
+/// @return A pointer to the last element or NULL
+t_ast_node *vec_ast_last(t_vec_ast *vec);
+
 #endif
--- a/output/include/me/vec/vec_estr.h
+++ b/output/include/me/vec/vec_estr.h
@ -120,4 +120,14 @@ void	vec_estr_sort(t_vec_estr		  *vec, t_vec_estr_sort_fn is_sorted);
 /// @return true if the operation failed, false otherwise
 t_error vec_estr_back(t_vec_estr *vec, t_expandable_str **out);

+/// @brief Get a pointer to the i'th element, or NULL otherwise
+/// @param vec The vec_estr to get the element from
+/// @return A pointer to the element or NULL
+t_expandable_str *vec_estr_get(t_vec_estr *vec, t_usize i);
+
+/// @brief Get a pointer to the last element, or NULL otherwise
+/// @param vec The vec_estr to get the element from
+/// @return A pointer to the last element or NULL
+t_expandable_str *vec_estr_last(t_vec_estr *vec);
+
 #endif
--- a/output/include/me/vec/vec_heredoc.h
+++ b/output/include/me/vec/vec_heredoc.h
@ -120,4 +120,14 @@ void	vec_heredoc_sort(t_vec_heredoc		  *vec, t_vec_heredoc_sort_fn is_sorted);
 /// @return true if the operation failed, false otherwise
 t_error vec_heredoc_back(t_vec_heredoc *vec, t_heredoc **out);

+/// @brief Get a pointer to the i'th element, or NULL otherwise
+/// @param vec The vec_heredoc to get the element from
+/// @return A pointer to the element or NULL
+t_heredoc *vec_heredoc_get(t_vec_heredoc *vec, t_usize i);
+
+/// @brief Get a pointer to the last element, or NULL otherwise
+/// @param vec The vec_heredoc to get the element from
+/// @return A pointer to the last element or NULL
+t_heredoc *vec_heredoc_last(t_vec_heredoc *vec);
+
 #endif
--- a/output/include/me/vec/vec_pid.h
+++ b/output/include/me/vec/vec_pid.h
@ -120,4 +120,14 @@ void	vec_pid_sort(t_vec_pid		  *vec, t_vec_pid_sort_fn is_sorted);
 /// @return true if the operation failed, false otherwise
 t_error vec_pid_back(t_vec_pid *vec, t_pid **out);

+/// @brief Get a pointer to the i'th element, or NULL otherwise
+/// @param vec The vec_pid to get the element from
+/// @return A pointer to the element or NULL
+t_pid *vec_pid_get(t_vec_pid *vec, t_usize i);
+
+/// @brief Get a pointer to the last element, or NULL otherwise
+/// @param vec The vec_pid to get the element from
+/// @return A pointer to the last element or NULL
+t_pid *vec_pid_last(t_vec_pid *vec);
+
 #endif
--- a/output/include/me/vec/vec_str.h
+++ b/output/include/me/vec/vec_str.h
@ -120,4 +120,14 @@ void	vec_str_sort(t_vec_str		  *vec, t_vec_str_sort_fn is_sorted);
 /// @return true if the operation failed, false otherwise
 t_error vec_str_back(t_vec_str *vec, t_str **out);

+/// @brief Get a pointer to the i'th element, or NULL otherwise
+/// @param vec The vec_str to get the element from
+/// @return A pointer to the element or NULL
+t_str *vec_str_get(t_vec_str *vec, t_usize i);
+
+/// @brief Get a pointer to the last element, or NULL otherwise
+/// @param vec The vec_str to get the element from
+/// @return A pointer to the last element or NULL
+t_str *vec_str_last(t_vec_str *vec);
+
 #endif
--- a/output/src/vec/ast/ast_functions3.c
+++ b/output/src/vec/ast/ast_functions3.c
@ -10,8 +10,6 @@
 /*                                                                            */
 /* ************************************************************************** */

-#include "me/mem/mem.h"
-#include "me/mem/mem.h"
 #include "me/mem/mem.h"
 #include "me/types.h"
 #include "me/vec/vec_ast.h"
--- a/output/src/vec/ast/ast_functions4.c
+++ b/output/src/vec/ast/ast_functions4.c
@ -0,0 +1,30 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   vec_ast.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2023/12/30 17:59:28 by maiboyer          #+#    #+#             */
+/*   Updated: 2023/12/30 17:59:28 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/mem/mem.h"
+#include "me/types.h"
+#include "me/vec/vec_ast.h"
+#include <stdlib.h>
+
+t_ast_node *vec_ast_get(t_vec_ast *vec, t_usize i)
+{
+	if (vec->len >= i)
+		return (NULL);
+	return (&vec->buffer[i]);
+}
+
+t_ast_node *vec_ast_last(t_vec_ast *vec)
+{
+	if (vec->len == 0)
+		return (NULL);
+	return (&vec->buffer[vec->len - 1]);
+}
--- a/output/src/vec/estr/estr_functions3.c
+++ b/output/src/vec/estr/estr_functions3.c
@ -10,8 +10,6 @@
 /*                                                                            */
 /* ************************************************************************** */

-#include "me/mem/mem.h"
-#include "me/mem/mem.h"
 #include "me/mem/mem.h"
 #include "me/types.h"
 #include "me/vec/vec_estr.h"
--- a/output/src/vec/estr/estr_functions4.c
+++ b/output/src/vec/estr/estr_functions4.c
@ -0,0 +1,30 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   vec_estr.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2023/12/30 17:59:28 by maiboyer          #+#    #+#             */
+/*   Updated: 2023/12/30 17:59:28 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/mem/mem.h"
+#include "me/types.h"
+#include "me/vec/vec_estr.h"
+#include <stdlib.h>
+
+t_expandable_str *vec_estr_get(t_vec_estr *vec, t_usize i)
+{
+	if (vec->len >= i)
+		return (NULL);
+	return (&vec->buffer[i]);
+}
+
+t_expandable_str *vec_estr_last(t_vec_estr *vec)
+{
+	if (vec->len == 0)
+		return (NULL);
+	return (&vec->buffer[vec->len - 1]);
+}
--- a/output/src/vec/heredoc/heredoc_functions3.c
+++ b/output/src/vec/heredoc/heredoc_functions3.c
@ -10,8 +10,6 @@
 /*                                                                            */
 /* ************************************************************************** */

-#include "me/mem/mem.h"
-#include "me/mem/mem.h"
 #include "me/mem/mem.h"
 #include "me/types.h"
 #include "me/vec/vec_heredoc.h"
--- a/output/src/vec/heredoc/heredoc_functions4.c
+++ b/output/src/vec/heredoc/heredoc_functions4.c
@ -0,0 +1,30 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   vec_heredoc.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2023/12/30 17:59:28 by maiboyer          #+#    #+#             */
+/*   Updated: 2023/12/30 17:59:28 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/mem/mem.h"
+#include "me/types.h"
+#include "me/vec/vec_heredoc.h"
+#include <stdlib.h>
+
+t_heredoc *vec_heredoc_get(t_vec_heredoc *vec, t_usize i)
+{
+	if (vec->len >= i)
+		return (NULL);
+	return (&vec->buffer[i]);
+}
+
+t_heredoc *vec_heredoc_last(t_vec_heredoc *vec)
+{
+	if (vec->len == 0)
+		return (NULL);
+	return (&vec->buffer[vec->len - 1]);
+}
--- a/output/src/vec/pid/pid_functions3.c
+++ b/output/src/vec/pid/pid_functions3.c
@ -10,8 +10,6 @@
 /*                                                                            */
 /* ************************************************************************** */

-#include "me/mem/mem.h"
-#include "me/mem/mem.h"
 #include "me/mem/mem.h"
 #include "me/types.h"
 #include "me/vec/vec_pid.h"
--- a/output/src/vec/pid/pid_functions4.c
+++ b/output/src/vec/pid/pid_functions4.c
@ -0,0 +1,30 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   vec_pid.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2023/12/30 17:59:28 by maiboyer          #+#    #+#             */
+/*   Updated: 2023/12/30 17:59:28 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/mem/mem.h"
+#include "me/types.h"
+#include "me/vec/vec_pid.h"
+#include <stdlib.h>
+
+t_pid *vec_pid_get(t_vec_pid *vec, t_usize i)
+{
+	if (vec->len >= i)
+		return (NULL);
+	return (&vec->buffer[i]);
+}
+
+t_pid *vec_pid_last(t_vec_pid *vec)
+{
+	if (vec->len == 0)
+		return (NULL);
+	return (&vec->buffer[vec->len - 1]);
+}
--- a/output/src/vec/str/str_functions3.c
+++ b/output/src/vec/str/str_functions3.c
@ -10,8 +10,6 @@
 /*                                                                            */
 /* ************************************************************************** */

-#include "me/mem/mem.h"
-#include "me/mem/mem.h"
 #include "me/mem/mem.h"
 #include "me/types.h"
 #include "me/vec/vec_str.h"
--- a/output/src/vec/str/str_functions4.c
+++ b/output/src/vec/str/str_functions4.c
@ -0,0 +1,30 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   vec_str.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2023/12/30 17:59:28 by maiboyer          #+#    #+#             */
+/*   Updated: 2023/12/30 17:59:28 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/mem/mem.h"
+#include "me/types.h"
+#include "me/vec/vec_str.h"
+#include <stdlib.h>
+
+t_str *vec_str_get(t_vec_str *vec, t_usize i)
+{
+	if (vec->len >= i)
+		return (NULL);
+	return (&vec->buffer[i]);
+}
+
+t_str *vec_str_last(t_vec_str *vec)
+{
+	if (vec->len == 0)
+		return (NULL);
+	return (&vec->buffer[vec->len - 1]);
+}
--- a/parser/Filelist.parser.mk
+++ b/parser/Filelist.parser.mk
@ -33,7 +33,10 @@ parser                                                                        \
 point/point_funcs1                                                            \
 point/point_funcs2                                                            \
 scanner                                                                       \
+scanner/advance_words                                                         \
 scanner/deserialize                                                           \
+scanner/heredoc                                                               \
+scanner/heredoc_functions                                                     \
 scanner/serialize                                                             \
 stack/stack_add_link                                                          \
 stack/stack_funcs1                                                            \
--- a/parser/include/parser/inner/heredoc.h
+++ b/parser/include/parser/inner/heredoc.h
@ -6,7 +6,7 @@
 /*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/01 15:06:56 by maiboyer          #+#    #+#             */
-/*   Updated: 2024/09/01 15:08:47 by maiboyer         ###   ########.fr       */
+/*   Updated: 2024/09/01 19:01:16 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

@ -46,4 +46,10 @@ static inline void reset_heredoc(t_heredoc *heredoc)
 	string_clear(&heredoc->delimiter);
 }

+static inline void heredoc_free(t_heredoc heredoc)
+{
+	string_free(heredoc.delimiter);
+	string_free(heredoc.current_leading_word);
+}
+
 #endif /* HEREDOC_TYPE_H */
--- a/parser/include/parser/inner/scanner.h
+++ b/parser/include/parser/inner/scanner.h
@ -2,9 +2,8 @@
 #define SCANNER_H

 #include "me/types.h"
-#include "parser/inner/heredoc.h"
 #include "me/vec/vec_heredoc.h"
-#include "parser/array.h"
+#include "parser/parser.h"

 typedef struct s_scanner t_scanner;

@ -16,4 +15,41 @@ struct s_scanner
 	t_vec_heredoc heredocs;
 };

+enum e_token_type
+{
+	HEREDOC_START,
+	SIMPLE_HEREDOC_BODY,
+	HEREDOC_BODY_BEGINNING,
+	HEREDOC_CONTENT,
+	HEREDOC_END,
+	FILE_DESCRIPTOR,
+	EMPTY_VALUE,
+	CONCAT,
+	VARIABLE_NAME,
+	REGEX,
+	EXPANSION_WORD,
+	EXTGLOB_PATTERN,
+	BARE_DOLLAR,
+	IMMEDIATE_DOUBLE_HASH,
+	HEREDOC_ARROW,
+	HEREDOC_ARROW_DASH,
+	NEWLINE,
+	OPENING_PAREN,
+	ESAC,
+	ERROR_RECOVERY,
+};
+
+struct s_heredoc_scan_state
+{
+	t_scanner		 *scanner;
+	TSLexer			 *lexer;
+	enum e_token_type middle_type;
+	enum e_token_type end_type;
+	bool			  did_advance;
+	t_heredoc		 *heredoc;
+	bool			  return_value;
+};
+
+bool advance_word(TSLexer *lexer, t_string *unquoted_word);
+
 #endif
--- a/parser/src/scanner.c
+++ b/parser/src/scanner.c
@ -6,112 +6,42 @@
 /*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/01 14:17:17 by maiboyer          #+#    #+#             */
-/*   Updated: 2024/09/01 18:50:23 by maiboyer         ###   ########.fr       */
+/*   Updated: 2024/09/01 19:55:43 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

+#include "parser/inner/scanner.h"
 #include "me/char/char.h"
 #include "me/str/str.h"
 #include "me/string/string.h"
-#include "parser/inner/heredoc.h"
-#include "parser/inner/scanner.h"
 #include "me/types.h"
+#include "me/vec/vec_heredoc.h"
 #include "parser/array.h"
+#include "parser/inner/heredoc.h"
 #include "parser/parser.h"
 #include <assert.h>

-enum e_token_type
-{
-	HEREDOC_START,
-	SIMPLE_HEREDOC_BODY,
-	HEREDOC_BODY_BEGINNING,
-	HEREDOC_CONTENT,
-	HEREDOC_END,
-	FILE_DESCRIPTOR,
-	EMPTY_VALUE,
-	CONCAT,
-	VARIABLE_NAME,
-	REGEX,
-	EXPANSION_WORD,
-	EXTGLOB_PATTERN,
-	BARE_DOLLAR,
-	IMMEDIATE_DOUBLE_HASH,
-	HEREDOC_ARROW,
-	HEREDOC_ARROW_DASH,
-	NEWLINE,
-	OPENING_PAREN,
-	ESAC,
-	ERROR_RECOVERY,
-};
-
-void advance(TSLexer *lexer)
-{
-	lexer->advance(lexer, false);
-}
-
-void skip(TSLexer *lexer)
-{
-	lexer->advance(lexer, true);
-}
-
-bool in_error_recovery(const bool *valid_symbols)
-{
-	return (valid_symbols[ERROR_RECOVERY]);
-}
+bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type);
+bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer);
+bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer);
+bool advance_word(TSLexer *lexer, t_string *unquoted_word);

 void reset(t_scanner *scanner)
 {
 	t_usize i;

 	i = 0;
-	while (i < scanner->heredocs.size)
-		reset_heredoc(array_get(&scanner->heredocs, i++));
-}
-
-/**
- * Consume a "word" in POSIX parlance, and returns it unquoted.
- *
- * This is an approximate implementation that doesn't deal with any
- * POSIX-mandated substitution, and assumes the default value for
- * IFS.
- */
-bool advance_word(TSLexer *lexer, t_string *unquoted_word)
-{
-	bool  empty;
-	t_i32 quote;
-
-	empty = true;
-	quote = 0;
-	if (lexer->lookahead == '\'' || lexer->lookahead == '"')
-	{
-		quote = lexer->lookahead;
-		advance(lexer);
-	}
-	while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) ||
-								 (!quote && (me_isspace(lexer->lookahead)))))
-	{
-		if (lexer->lookahead == '\\')
-		{
-			advance(lexer);
-			if (!lexer->lookahead)
-				return (false);
-		}
-		empty = false;
-		string_push_char(unquoted_word, lexer->lookahead);
-		advance(lexer);
-	}
-	if (quote && lexer->lookahead == quote)
-		advance(lexer);
-	return (!empty);
+	while (i < scanner->heredocs.len)
+		reset_heredoc(&scanner->heredocs.buffer[i++]);
 }

 bool scan_bare_dollar(TSLexer *lexer)
 {
 	while (me_isspace(lexer->lookahead) && lexer->lookahead != '\n' && !lexer->eof(lexer))
-		skip(lexer);
+		lexer->advance(lexer, true);
 	if (lexer->lookahead == '$')
 	{
-		advance(lexer);
+		lexer->advance(lexer, false);
 		lexer->result_symbol = BARE_DOLLAR;
 		lexer->mark_end(lexer);
 		return (me_isspace(lexer->lookahead) || lexer->eof(lexer) || lexer->lookahead == '\"');
@ -119,207 +49,50 @@ bool scan_bare_dollar(TSLexer *lexer)
 	return (false);
 }

-bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
-{
-	bool found_delimiter;
-
-	while (me_isspace(lexer->lookahead))
-		skip(lexer);
-	lexer->result_symbol = HEREDOC_START;
-	heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
-	found_delimiter = advance_word(lexer, &heredoc->delimiter);
-	if (!found_delimiter)
-		return (string_clear(&heredoc->delimiter), false);
-	return (found_delimiter);
-}
-
-bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
-{
-	t_i32 size;
-
-	size = 0;
-	string_clear(&heredoc->current_leading_word);
-	if (heredoc->delimiter.len > 0)
-	{
-		while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead &&
-			   heredoc->current_leading_word.len < heredoc->delimiter.len)
-		{
-			string_push_char(&heredoc->current_leading_word, lexer->lookahead);
-			advance(lexer);
-			size++;
-		}
-	}
-	return heredoc->delimiter.len == 0 ? false : str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf);
-}
-
-bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type)
-{
-	bool	   did_advance = false;
-	t_heredoc *heredoc = array_back(&scanner->heredocs);
-
-	while (true)
-	{
-		if (lexer->lookahead == '\0')
-		{
-			if (lexer->eof(lexer) && did_advance)
-			{
-				reset_heredoc(heredoc);
-				lexer->result_symbol = end_type;
-				return (true);
-			}
-			return (false);
-		}
-		else if (lexer->lookahead == '\\')
-		{
-			did_advance = true;
-			advance(lexer);
-			advance(lexer);
-		}
-		else if (lexer->lookahead == '$')
-		{
-			if (heredoc->is_raw)
-			{
-				did_advance = true;
-				advance(lexer);
-			}
-			if (did_advance)
-			{
-				lexer->mark_end(lexer);
-				lexer->result_symbol = middle_type;
-				heredoc->started = true;
-				advance(lexer);
-				if (me_isalpha(lexer->lookahead) || lexer->lookahead == '{' || lexer->lookahead == '(')
-					return true;
-			}
-			if (middle_type == HEREDOC_BODY_BEGINNING && lexer->get_column(lexer) == 0)
-			{
-				lexer->result_symbol = middle_type;
-				heredoc->started = true;
-				return true;
-			}
-			return false;
-		}
-		else if (lexer->lookahead == '\n')
-		{
-			if (!did_advance)
-			{
-				skip(lexer);
-			}
-			else
-			{
-				advance(lexer);
-			}
-			did_advance = true;
-			if (heredoc->allows_indent)
-			{
-				while (me_isspace(lexer->lookahead))
-					advance(lexer);
-			}
-			lexer->result_symbol = end_type;
-			if (heredoc->started)
-				lexer->result_symbol = middle_type;
-			lexer->mark_end(lexer);
-			if (scan_heredoc_end_identifier(heredoc, lexer))
-			{
-				if (lexer->result_symbol == HEREDOC_END)
-					(void)array_pop(&scanner->heredocs);
-				return (true);
-			}
-		}
-		else
-		{
-			if (lexer->get_column(lexer) == 0)
-			{
-				while (me_isspace(lexer->lookahead))
-				{
-					if (did_advance)
-						advance(lexer);
-					else
-						skip(lexer);
-				}
-				if (end_type != SIMPLE_HEREDOC_BODY)
-				{
-					lexer->result_symbol = middle_type;
-					if (scan_heredoc_end_identifier(heredoc, lexer))
-						return true;
-				}
-				if (end_type == SIMPLE_HEREDOC_BODY)
-				{
-					lexer->result_symbol = end_type;
-					lexer->mark_end(lexer);
-					if (scan_heredoc_end_identifier(heredoc, lexer))
-						return true;
-				}
-			}
-			did_advance = true;
-			advance(lexer);
-		}
-	}
-	return (false);
-}
-
 bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 {
-	if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols))
+	if (valid_symbols[CONCAT] && !valid_symbols[ERROR_RECOVERY])
 	{
 		if (!(lexer->lookahead == 0 || me_isspace(lexer->lookahead) || lexer->lookahead == '>' || lexer->lookahead == '<' ||
 			  lexer->lookahead == ')' || lexer->lookahead == '(' || lexer->lookahead == ';' || lexer->lookahead == '&' ||
 			  lexer->lookahead == '|' || lexer->lookahead == '{' || lexer->lookahead == '}'))
 		{
 			lexer->result_symbol = CONCAT;
-			// So for a`b`, we want to return a concat. We check if the
-			// 2nd backtick has whitespace after it, and if it does we
-			// return concat.
 			if (lexer->lookahead == '`')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				while (lexer->lookahead != '`' && !lexer->eof(lexer))
-				{
-					advance(lexer);
-				}
+					lexer->advance(lexer, false);
 				if (lexer->eof(lexer))
-				{
 					return false;
-				}
 				if (lexer->lookahead == '`')
-				{
-					advance(lexer);
-				}
+					lexer->advance(lexer, false);
 				return me_isspace(lexer->lookahead) || lexer->eof(lexer);
 			}
-			// strings w/ expansions that contains escaped quotes or
-			// backslashes need this to return a concat
 			if (lexer->lookahead == '\\')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (lexer->lookahead == '"' || lexer->lookahead == '\'' || lexer->lookahead == '\\')
-				{
 					return true;
-				}
 				if (lexer->eof(lexer))
-				{
 					return false;
 			}
-			}
 			else
-			{
 				return true;
 		}
 	}
-	}

-	if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !in_error_recovery(valid_symbols))
+	if (valid_symbols[IMMEDIATE_DOUBLE_HASH] && !valid_symbols[ERROR_RECOVERY])
 	{
-		// advance two # and ensure not } after
 		if (lexer->lookahead == '#')
 		{
 			lexer->mark_end(lexer);
-			advance(lexer);
+			lexer->advance(lexer, false);
 			if (lexer->lookahead == '#')
 			{
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (lexer->lookahead != '}')
 				{
 					lexer->result_symbol = IMMEDIATE_DOUBLE_HASH;
@ -339,50 +112,43 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 		}
 	}

-	if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.size > 0 &&
-		!array_back(&scanner->heredocs)->started && !in_error_recovery(valid_symbols))
-	{
+	if ((valid_symbols[HEREDOC_BODY_BEGINNING] || valid_symbols[SIMPLE_HEREDOC_BODY]) && scanner->heredocs.len > 0 &&
+		!vec_heredoc_last(&scanner->heredocs)->started && !valid_symbols[ERROR_RECOVERY])
 		return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING, SIMPLE_HEREDOC_BODY);
-	}

-	if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0)
+	if (valid_symbols[HEREDOC_END] && scanner->heredocs.len > 0)
 	{
-		t_heredoc *heredoc = array_back(&scanner->heredocs);
+		t_heredoc *heredoc = vec_heredoc_last(&scanner->heredocs);
 		if (scan_heredoc_end_identifier(heredoc, lexer))
 		{
-			array_delete(&heredoc->current_leading_word);
-			array_delete(&heredoc->delimiter);
-			(void)array_pop(&scanner->heredocs);
+			string_free(heredoc->current_leading_word);
+			string_free(heredoc->delimiter);
+			(void)vec_heredoc_pop(&scanner->heredocs, NULL);
 			lexer->result_symbol = HEREDOC_END;
 			return true;
 		}
 	}

-	if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 && array_back(&scanner->heredocs)->started &&
-		!in_error_recovery(valid_symbols))
-	{
+	if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.len > 0 && vec_heredoc_last(&scanner->heredocs)->started &&
+		!valid_symbols[ERROR_RECOVERY])
 		return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT, HEREDOC_END);
-	}

-	if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) && scanner->heredocs.size > 0)
-	{
-		return scan_heredoc_start(array_back(&scanner->heredocs), lexer);
-	}
+	if (valid_symbols[HEREDOC_START] && !valid_symbols[ERROR_RECOVERY] && scanner->heredocs.len > 0)
+		return scan_heredoc_start(vec_heredoc_last(&scanner->heredocs), lexer);

-	if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) &&
-		!in_error_recovery(valid_symbols))
+	if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] || valid_symbols[HEREDOC_ARROW]) && !valid_symbols[ERROR_RECOVERY])
 	{
-		for (;;)
+		while (true)
 		{
 			if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' || lexer->lookahead == '\r' ||
 				 (lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) &&
 				!valid_symbols[EXPANSION_WORD])
 			{
-				skip(lexer);
+				lexer->advance(lexer, true);
 			}
 			else if (lexer->lookahead == '\\')
 			{
-				skip(lexer);
+				lexer->advance(lexer, true);

 				if (lexer->eof(lexer))
 				{
@ -392,39 +158,29 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 				}

 				if (lexer->lookahead == '\r')
-				{
-					skip(lexer);
-				}
+					lexer->advance(lexer, true);
 				if (lexer->lookahead == '\n')
-				{
-					skip(lexer);
-				}
+					lexer->advance(lexer, true);
 				else
 				{
 					if (lexer->lookahead == '\\' && valid_symbols[EXPANSION_WORD])
-					{
 						goto expansion_word;
-					}
 					return false;
 				}
 			}
 			else
-			{
 				break;
 		}
-		}

 		// no '*', '@', '?', '-', '$', '0', '_'
 		if (!valid_symbols[EXPANSION_WORD] && (lexer->lookahead == '*' || lexer->lookahead == '@' || lexer->lookahead == '?' ||
 											   lexer->lookahead == '-' || lexer->lookahead == '0' || lexer->lookahead == '_'))
 		{
 			lexer->mark_end(lexer);
-			advance(lexer);
+			lexer->advance(lexer, false);
 			if (lexer->lookahead == '=' || lexer->lookahead == '[' || lexer->lookahead == ':' || lexer->lookahead == '-' ||
 				lexer->lookahead == '%' || lexer->lookahead == '#' || lexer->lookahead == '/')
-			{
 				return false;
-			}
 			if (valid_symbols[EXTGLOB_PATTERN] && me_isspace(lexer->lookahead))
 			{
 				lexer->mark_end(lexer);
@ -435,26 +191,22 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)

 		if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<')
 		{
-			advance(lexer);
+			lexer->advance(lexer, false);
 			if (lexer->lookahead == '<')
 			{
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (lexer->lookahead == '-')
 				{
-					advance(lexer);
+					lexer->advance(lexer, false);
 					t_heredoc heredoc = heredoc_new();
 					heredoc.allows_indent = true;
-					array_push(&scanner->heredocs, heredoc);
+					vec_heredoc_push(&scanner->heredocs, heredoc);
 					lexer->result_symbol = HEREDOC_ARROW_DASH;
 				}
-				// else if (lexer->lookahead == '<' || lexer->lookahead == '=')
-				// {
-				// 	return false;
-				// }
 				else
 				{
 					t_heredoc heredoc = heredoc_new();
-					array_push(&scanner->heredocs, heredoc);
+					vec_heredoc_push(&scanner->heredocs, heredoc);
 					lexer->result_symbol = HEREDOC_ARROW;
 				}
 				return true;
@ -464,45 +216,35 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)

 		bool is_number = true;
 		if (me_isdigit(lexer->lookahead))
-			advance(lexer);
+			lexer->advance(lexer, false);
 		else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_')
 		{
 			is_number = false;
-			advance(lexer);
+			lexer->advance(lexer, false);
 		}
 		else
 		{
 			if (lexer->lookahead == '{')
-			{
 				goto brace_start;
-			}
 			if (valid_symbols[EXPANSION_WORD])
-			{
 				goto expansion_word;
-			}
 			if (valid_symbols[EXTGLOB_PATTERN])
-			{
 				goto extglob_pattern;
-			}
 			return false;
 		}

-		for (;;)
+		while (true)
 		{
 			if (me_isdigit(lexer->lookahead))
-			{
-				advance(lexer);
-			}
+				lexer->advance(lexer, false);
 			else if (me_isalpha(lexer->lookahead) || lexer->lookahead == '_')
 			{
 				is_number = false;
-				advance(lexer);
+				lexer->advance(lexer, false);
 			}
 			else
-			{
 				break;
 		}
-		}

 		if (is_number && valid_symbols[FILE_DESCRIPTOR] && (lexer->lookahead == '>' || lexer->lookahead == '<'))
 		{
@ -515,7 +257,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 			if (lexer->lookahead == '+')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (lexer->lookahead == '=' || lexer->lookahead == ':')
 				{
 					lexer->result_symbol = VARIABLE_NAME;
@ -527,12 +269,8 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 			{
 				return false;
 			}
-			if (lexer->lookahead == '=' || lexer->lookahead == '[' ||
-				(lexer->lookahead == ':' &&
-				 !valid_symbols[OPENING_PAREN]) || // TODO(amaanq): more cases for regular word chars but not variable
-												   // names for function words, only handling : for now? #235
-				lexer->lookahead == '%' ||
-				(lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-'))
+			if (lexer->lookahead == '=' || lexer->lookahead == '[' || (lexer->lookahead == ':' && !valid_symbols[OPENING_PAREN]) ||
+				lexer->lookahead == '%' || (lexer->lookahead == '#' && !is_number) || lexer->lookahead == '@' || (lexer->lookahead == '-'))
 			{
 				lexer->mark_end(lexer);
 				lexer->result_symbol = VARIABLE_NAME;
@ -542,7 +280,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 			if (lexer->lookahead == '?')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				lexer->result_symbol = VARIABLE_NAME;
 				return me_isalpha(lexer->lookahead);
 			}
@ -551,20 +289,16 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 		return false;
 	}

-	if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) && scan_bare_dollar(lexer))
+	if (valid_symbols[BARE_DOLLAR] && !valid_symbols[ERROR_RECOVERY] && scan_bare_dollar(lexer))
 	{
 		return true;
 	}

-	if ((valid_symbols[REGEX]) && !in_error_recovery(valid_symbols))
+	if ((valid_symbols[REGEX]) && !valid_symbols[ERROR_RECOVERY])
 	{
 		if (valid_symbols[REGEX])
-		{
 			while (me_isspace(lexer->lookahead))
-			{
-				skip(lexer);
-			}
-		}
+				lexer->advance(lexer, true);

 		if ((lexer->lookahead != '"' && lexer->lookahead != '\'') || ((lexer->lookahead == '$' || lexer->lookahead == '\'')) ||
 			(lexer->lookahead == '\''))
@ -584,12 +318,10 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 			if (lexer->lookahead == '$')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (lexer->lookahead == '(')
-				{
 					return false;
 			}
-			}

 			lexer->mark_end(lexer);

@ -601,7 +333,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 					if (lexer->lookahead == '\'')
 					{
 						state.in_single_quote = false;
-						advance(lexer);
+						lexer->advance(lexer, false);
 						lexer->mark_end(lexer);
 					}
 				}
@ -646,7 +378,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 				case '\'':
 					// Enter or exit a single-quoted string.
 					state.in_single_quote = !state.in_single_quote;
-					advance(lexer);
+					lexer->advance(lexer, false);
 					state.advanced_once = true;
 					state.last_was_escape = false;
 					continue;
@ -660,7 +392,7 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 					if (valid_symbols[REGEX])
 					{
 						bool was_space = !state.in_single_quote && me_isspace(lexer->lookahead);
-						advance(lexer);
+						lexer->advance(lexer, false);
 						state.advanced_once = true;
 						if (!was_space || state.paren_depth > 0)
 						{
@ -671,20 +403,16 @@ bool scan(t_scanner *scanner, TSLexer *lexer, const bool *valid_symbols)
 			}

 			lexer->result_symbol = REGEX;
-			if (valid_symbols[REGEX] && !state.advanced_once)
-			{
-				return false;
-			}
-			return true;
+			return (!(valid_symbols[REGEX] && !state.advanced_once));
 		}
 	}

 extglob_pattern:
-	if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols))
+	if (valid_symbols[EXTGLOB_PATTERN] && !valid_symbols[ERROR_RECOVERY])
 	{
 		// first skip ws, then check for ? * + @ !
 		while (me_isspace(lexer->lookahead))
-			skip(lexer);
+			lexer->advance(lexer, true);

 		if (lexer->lookahead == '?' || lexer->lookahead == '*' || lexer->lookahead == '+' || lexer->lookahead == '@' ||
 			lexer->lookahead == '!' || lexer->lookahead == '-' || lexer->lookahead == ')' || lexer->lookahead == '\\' ||
@ -692,9 +420,9 @@ extglob_pattern:
 		{
 			if (lexer->lookahead == '\\')
 			{
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if ((me_isspace(lexer->lookahead) || lexer->lookahead == '"') && lexer->lookahead != '\r' && lexer->lookahead != '\n')
-					advance(lexer);
+					lexer->advance(lexer, false);
 				else
 					return false;
 			}
@ -702,13 +430,11 @@ extglob_pattern:
 			if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);

 				if (me_isspace(lexer->lookahead))
-				{
 					return false;
 			}
-			}

 			lexer->mark_end(lexer);
 			bool was_non_alpha = !me_isalpha(lexer->lookahead);
@ -718,16 +444,16 @@ extglob_pattern:
 				if (lexer->lookahead == 'e')
 				{
 					lexer->mark_end(lexer);
-					advance(lexer);
+					lexer->advance(lexer, false);
 					if (lexer->lookahead == 's')
 					{
-						advance(lexer);
+						lexer->advance(lexer, false);
 						if (lexer->lookahead == 'a')
 						{
-							advance(lexer);
+							lexer->advance(lexer, false);
 							if (lexer->lookahead == 'c')
 							{
-								advance(lexer);
+								lexer->advance(lexer, false);
 								if (me_isspace(lexer->lookahead))
 									return false;
 							}
@ -735,16 +461,16 @@ extglob_pattern:
 					}
 				}
 				else
-					advance(lexer);
+					lexer->advance(lexer, false);
 			}

 			// -\w is just a word, find something else special
 			if (lexer->lookahead == '-')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				while (me_isalnum(lexer->lookahead))
-					advance(lexer);
+					lexer->advance(lexer, false);

 				if (lexer->lookahead == ')' || lexer->lookahead == '\\' || lexer->lookahead == '.')
 					return false;
@ -755,7 +481,7 @@ extglob_pattern:
 			if (lexer->lookahead == ')' && scanner->last_glob_paren_depth == 0)
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (me_isspace(lexer->lookahead))
 				{
 					lexer->result_symbol = EXTGLOB_PATTERN;
@ -774,7 +500,7 @@ extglob_pattern:
 			if (lexer->lookahead == '$')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (lexer->lookahead == '{' || lexer->lookahead == '(')
 				{
 					lexer->result_symbol = EXTGLOB_PATTERN;
@ -785,7 +511,7 @@ extglob_pattern:
 			if (lexer->lookahead == '|')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				lexer->result_symbol = EXTGLOB_PATTERN;
 				return true;
 			}
@ -846,7 +572,7 @@ extglob_pattern:
 				if (lexer->lookahead == '|')
 				{
 					lexer->mark_end(lexer);
-					advance(lexer);
+					lexer->advance(lexer, false);
 					if (state.paren_depth == 0 && state.bracket_depth == 0 && state.brace_depth == 0)
 					{
 						lexer->result_symbol = EXTGLOB_PATTERN;
@ -862,7 +588,7 @@ extglob_pattern:
 						lexer->mark_end(lexer);
 						if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
 							state.saw_non_alphadot = true;
-						advance(lexer);
+						lexer->advance(lexer, false);
 						if (lexer->lookahead == '(' || lexer->lookahead == '{')
 						{
 							lexer->result_symbol = EXTGLOB_PATTERN;
@ -888,15 +614,15 @@ extglob_pattern:
 					{
 						if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
 							state.saw_non_alphadot = true;
-						advance(lexer);
+						lexer->advance(lexer, false);
 						if (me_isspace(lexer->lookahead) || lexer->lookahead == '"')
-							advance(lexer);
+							lexer->advance(lexer, false);
 					}
 					else
 					{
 						if (!me_isalpha(lexer->lookahead) && lexer->lookahead != '.' && lexer->lookahead != '\\')
 							state.saw_non_alphadot = true;
-						advance(lexer);
+						lexer->advance(lexer, false);
 					}
 					if (!was_space)
 						lexer->mark_end(lexer);
@ -917,14 +643,14 @@ expansion_word:
 	{
 		bool advanced_once = false;
 		bool advance_once_space = false;
-		for (;;)
+		while (true)
 		{
 			if (lexer->lookahead == '\"')
 				return false;
 			if (lexer->lookahead == '$')
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead))
 				{
 					lexer->result_symbol = EXPANSION_WORD;
@ -943,17 +669,13 @@ expansion_word:
 			if (lexer->lookahead == '(' && !(advanced_once || advance_once_space))
 			{
 				lexer->mark_end(lexer);
-				advance(lexer);
+				lexer->advance(lexer, false);
 				while (lexer->lookahead != ')' && !lexer->eof(lexer))
 				{
-					// if we find a $( or ${ assume this is valid and is
-					// a garbage concatenation of some weird word + an
-					// expansion
-					// I wonder where this can fail
 					if (lexer->lookahead == '$')
 					{
 						lexer->mark_end(lexer);
-						advance(lexer);
+						lexer->advance(lexer, false);
 						if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'' || me_isalnum(lexer->lookahead))
 						{
 							lexer->result_symbol = EXPANSION_WORD;
@ -965,14 +687,14 @@ expansion_word:
 					{
 						advanced_once = advanced_once || !me_isspace(lexer->lookahead);
 						advance_once_space = advance_once_space || me_isspace(lexer->lookahead);
-						advance(lexer);
+						lexer->advance(lexer, false);
 					}
 				}
 				lexer->mark_end(lexer);
 				if (lexer->lookahead == ')')
 				{
 					advanced_once = true;
-					advance(lexer);
+					lexer->advance(lexer, false);
 					lexer->mark_end(lexer);
 					if (lexer->lookahead == '}')
 						return false;
@ -987,7 +709,7 @@ expansion_word:
 				return false;
 			advanced_once = advanced_once || !me_isspace(lexer->lookahead);
 			advance_once_space = advance_once_space || me_isspace(lexer->lookahead);
-			advance(lexer);
+			lexer->advance(lexer, false);
 		}
 	}

@ -1000,7 +722,7 @@ void *tree_sitter_sh_external_scanner_create()
 	t_scanner *scanner;

 	scanner = mem_alloc(sizeof(*scanner));
-	array_init(&scanner->heredocs);
+	scanner->heredocs = vec_heredoc_new(16, heredoc_free);
 	return (scanner);
 }

@ -1014,18 +736,6 @@ bool tree_sitter_sh_external_scanner_scan(void *payload, TSLexer *lexer, const b

 void tree_sitter_sh_external_scanner_destroy(void *payload)
 {
-	t_scanner *scanner;
-	t_heredoc *heredoc;
-	t_usize	   i;
-
-	scanner = (t_scanner *)payload;
-	i = 0;
-	while (i < scanner->heredocs.size)
-	{
-		heredoc = array_get(&scanner->heredocs, i++);
-		string_free(heredoc->current_leading_word);
-		string_free(heredoc->delimiter);
-	}
-	array_delete(&scanner->heredocs);
-	mem_free(scanner);
+	vec_heredoc_free(((t_scanner *)payload)->heredocs);
+	mem_free((t_scanner *)payload);
 }
--- a/parser/src/scanner/advance_words.c
+++ b/parser/src/scanner/advance_words.c
@ -0,0 +1,46 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   advance_words.c                                    :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2024/09/01 19:28:19 by maiboyer          #+#    #+#             */
+/*   Updated: 2024/09/01 19:30:20 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/char/char.h"
+#include "me/string/string.h"
+#include "me/types.h"
+#include "parser/parser.h"
+
+bool advance_word(TSLexer *lexer, t_string *unquoted_word)
+{
+	bool  empty;
+	t_i32 quote;
+
+	empty = true;
+	quote = 0;
+	if (lexer->lookahead == '\'' || lexer->lookahead == '"')
+	{
+		quote = lexer->lookahead;
+		lexer->advance(lexer, false);
+	}
+	while (lexer->lookahead && !((quote && (lexer->lookahead == quote || lexer->lookahead == '\r' || lexer->lookahead == '\n')) ||
+								 (!quote && (me_isspace(lexer->lookahead)))))
+	{
+		if (lexer->lookahead == '\\')
+		{
+			lexer->advance(lexer, false);
+			if (!lexer->lookahead)
+				return (false);
+		}
+		empty = false;
+		string_push_char(unquoted_word, lexer->lookahead);
+		lexer->advance(lexer, false);
+	}
+	if (quote && lexer->lookahead == quote)
+		lexer->advance(lexer, false);
+	return (!empty);
+}
--- a/parser/src/scanner/deserialize.c
+++ b/parser/src/scanner/deserialize.c
@ -6,15 +6,15 @@
 /*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/01 15:06:56 by maiboyer          #+#    #+#             */
-/*   Updated: 2024/09/01 15:08:47 by maiboyer         ###   ########.fr       */
+/*   Updated: 2024/09/01 19:40:35 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

+#include "me/mem/mem.h"
 #include "me/types.h"
+#include "me/vec/vec_heredoc.h"
 #include "parser/inner/heredoc.h"
 #include "parser/inner/scanner.h"
-#include "parser/array.h"
-#include "parser/parser.h"

 void reset(t_scanner *);

@ -39,12 +39,12 @@ void tree_sitter_sh_external_scanner_deserialize(t_scanner *scanner, const t_u8
 		while (i < heredoc_count)
 		{
 			heredoc = NULL;
-			if (i < scanner->heredocs.size)
-				heredoc = array_get(&scanner->heredocs, i);
+			if (i < scanner->heredocs.len)
+				heredoc = vec_heredoc_get(&scanner->heredocs, i);
 			else
 			{
-				array_push(&scanner->heredocs, heredoc_new());
-				heredoc = array_back(&scanner->heredocs);
+				vec_heredoc_push(&scanner->heredocs, heredoc_new());
+				heredoc = vec_heredoc_last(&scanner->heredocs);
 			}

 			heredoc->is_raw = buffer[size++];
--- a/parser/src/scanner/heredoc.c
+++ b/parser/src/scanner/heredoc.c
@ -0,0 +1,91 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   heredoc.c                                          :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2024/09/01 19:33:04 by maiboyer          #+#    #+#             */
+/*   Updated: 2024/09/01 19:55:50 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "parser/inner/heredoc.h"
+#include "me/char/char.h"
+#include "me/str/str.h"
+#include "me/types.h"
+#include "me/vec/vec_heredoc.h"
+#include "parser/inner/scanner.h"
+#include "parser/parser.h"
+
+bool scan_heredoc_start(t_heredoc *heredoc, TSLexer *lexer)
+{
+	bool found_delimiter;
+
+	while (me_isspace(lexer->lookahead))
+		lexer->advance(lexer, true);
+	lexer->result_symbol = HEREDOC_START;
+	heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' || lexer->lookahead == '\\';
+	found_delimiter = advance_word(lexer, &heredoc->delimiter);
+	if (!found_delimiter)
+		return (string_clear(&heredoc->delimiter), false);
+	return (found_delimiter);
+}
+
+bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer)
+{
+	t_i32 size;
+
+	size = 0;
+	string_clear(&heredoc->current_leading_word);
+	if (heredoc->delimiter.len > 0)
+	{
+		while (lexer->lookahead != '\0' && lexer->lookahead != '\n' && (t_i32)heredoc->delimiter.buf[size] == lexer->lookahead &&
+			   heredoc->current_leading_word.len < heredoc->delimiter.len)
+		{
+			string_push_char(&heredoc->current_leading_word, lexer->lookahead);
+			lexer->advance(lexer, false);
+			size++;
+		}
+	}
+	if (heredoc->delimiter.len == 0)
+		return (false);
+	return (str_compare(heredoc->current_leading_word.buf, heredoc->delimiter.buf));
+}
+
+bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state);
+bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state);
+bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state);
+bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state);
+bool scan_heredoc_content_other(struct s_heredoc_scan_state *state);
+
+bool scan_heredoc_content(t_scanner *scanner, TSLexer *lexer, enum e_token_type middle_type, enum e_token_type end_type)
+{
+	struct s_heredoc_scan_state state;
+	bool (*func)(struct s_heredoc_scan_state *state);
+
+	state.did_advance = false;
+	state.lexer = lexer;
+	state.heredoc = vec_heredoc_last(&scanner->heredocs);
+	state.scanner = scanner;
+	state.middle_type = middle_type;
+	state.end_type = end_type;
+	state.return_value = false;
+
+	while (true)
+	{
+		if (lexer->lookahead == '\0')
+			func = scan_heredoc_content_nullbyte;
+		else if (lexer->lookahead == '\\')
+			func = scan_heredoc_content_backslash;
+		else if (lexer->lookahead == '$')
+			func = scan_heredoc_content_dollar;
+		else if (lexer->lookahead == '\n')
+			func = scan_heredoc_content_newline;
+		else
+			func = scan_heredoc_content_other;
+		if (func(&state))
+			return (state.return_value);
+	}
+	return (false);
+}
--- a/parser/src/scanner/heredoc_functions.c
+++ b/parser/src/scanner/heredoc_functions.c
@ -0,0 +1,120 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   heredoc_functions.c                                :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2024/09/01 19:36:53 by maiboyer          #+#    #+#             */
+/*   Updated: 2024/09/01 19:54:13 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/char/char.h"
+#include "me/str/str.h"
+#include "me/types.h"
+#include "me/vec/vec_heredoc.h"
+#include "parser/inner/heredoc.h"
+#include "parser/inner/scanner.h"
+#include "parser/parser.h"
+
+bool scan_heredoc_end_identifier(t_heredoc *heredoc, TSLexer *lexer);
+
+bool scan_heredoc_content_nullbyte(struct s_heredoc_scan_state *state)
+{
+	if (state->lexer->eof(state->lexer) && state->did_advance)
+	{
+		reset_heredoc(state->heredoc);
+		state->lexer->result_symbol = state->end_type;
+		return (state->return_value = true, true);
+	}
+	return (state->return_value = false, true);
+}
+
+bool scan_heredoc_content_backslash(struct s_heredoc_scan_state *state)
+{
+	state->did_advance = true;
+	state->lexer->advance(state->lexer, false);
+	state->lexer->advance(state->lexer, false);
+	return (false);
+}
+
+bool scan_heredoc_content_dollar(struct s_heredoc_scan_state *state)
+{
+	if (state->heredoc->is_raw)
+	{
+		state->did_advance = true;
+		state->lexer->advance(state->lexer, false);
+	}
+	if (state->did_advance)
+	{
+		state->lexer->mark_end(state->lexer);
+		state->lexer->result_symbol = state->middle_type;
+		state->heredoc->started = true;
+		state->lexer->advance(state->lexer, false);
+		if (me_isalpha(state->lexer->lookahead) || state->lexer->lookahead == '{' || state->lexer->lookahead == '(')
+			return (state->return_value = true, true);
+	}
+	if (state->middle_type == HEREDOC_BODY_BEGINNING && state->lexer->get_column(state->lexer) == 0)
+	{
+		state->lexer->result_symbol = state->middle_type;
+		state->heredoc->started = true;
+		return (state->return_value = true, true);
+	}
+	return (state->return_value = false, true);
+}
+
+bool scan_heredoc_content_newline(struct s_heredoc_scan_state *state)
+{
+	if (!state->did_advance)
+		state->lexer->advance(state->lexer, true);
+	else
+		state->lexer->advance(state->lexer, false);
+	state->did_advance = true;
+	if (state->heredoc->allows_indent)
+	{
+		while (me_isspace(state->lexer->lookahead))
+			state->lexer->advance(state->lexer, false);
+	}
+	state->lexer->result_symbol = state->end_type;
+	if (state->heredoc->started)
+		state->lexer->result_symbol = state->middle_type;
+	state->lexer->mark_end(state->lexer);
+	if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
+	{
+		if (state->lexer->result_symbol == HEREDOC_END)
+			vec_heredoc_pop(&state->scanner->heredocs, NULL);
+		return (state->return_value = true, true);
+	}
+	return (false);
+}
+
+bool scan_heredoc_content_other(struct s_heredoc_scan_state *state)
+{
+	if (state->lexer->get_column(state->lexer) == 0)
+	{
+		while (me_isspace(state->lexer->lookahead))
+		{
+			if (state->did_advance)
+				state->lexer->advance(state->lexer, false);
+			else
+				state->lexer->advance(state->lexer, true);
+		}
+		if (state->end_type != SIMPLE_HEREDOC_BODY)
+		{
+			state->lexer->result_symbol = state->middle_type;
+			if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
+				return (state->return_value = true, true);
+		}
+		if (state->end_type == SIMPLE_HEREDOC_BODY)
+		{
+			state->lexer->result_symbol = state->end_type;
+			state->lexer->mark_end(state->lexer);
+			if (scan_heredoc_end_identifier(state->heredoc, state->lexer))
+				return (state->return_value = true, true);
+		}
+	}
+	state->did_advance = true;
+	state->lexer->advance(state->lexer, false);
+	return (false);
+}
--- a/parser/src/scanner/serialize.c
+++ b/parser/src/scanner/serialize.c
@ -6,21 +6,21 @@
 /*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
 /*                                                +#+#+#+#+#+   +#+           */
 /*   Created: 2024/09/01 15:06:56 by maiboyer          #+#    #+#             */
-/*   Updated: 2024/09/01 15:08:47 by maiboyer         ###   ########.fr       */
+/*   Updated: 2024/09/01 19:28:24 by maiboyer         ###   ########.fr       */
 /*                                                                            */
 /* ************************************************************************** */

-#include "parser/inner/scanner.h"
-#include "parser/inner/heredoc.h"
 #include "me/types.h"
 #include "parser/array.h"
+#include "parser/inner/heredoc.h"
+#include "parser/inner/scanner.h"
 #include "parser/parser.h"

 t_error serialize_heredocs(t_scanner *scanner, t_u8 *buffer, t_u32 *size, t_usize i)
 {
 	t_heredoc *heredoc;

-	heredoc = array_get(&scanner->heredocs, i);
+	heredoc = vec_heredoc_get(&scanner->heredocs, i);
 	if (heredoc->delimiter.len + 1 + sizeof(t_usize) + (*size) >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
 		return (ERROR);
 	buffer[(*size)++] = (char)heredoc->is_raw;
@ -47,9 +47,9 @@ t_u32 tree_sitter_sh_external_scanner_serialize(t_scanner *scanner, t_u8 *buffer
 	buffer[size++] = (char)scanner->last_glob_paren_depth;
 	buffer[size++] = (char)scanner->ext_was_in_double_quote;
 	buffer[size++] = (char)scanner->ext_saw_outside_quote;
-	buffer[size++] = (char)scanner->heredocs.size;
+	buffer[size++] = (char)scanner->heredocs.len;
 	i = 0;
-	while (i < scanner->heredocs.size)
+	while (i < scanner->heredocs.len)
 		if (serialize_heredocs(scanner, buffer, &size, i++))
 			return (0);
 	return (size);
--- a/stdme/generic_sources/header/vec_CPREFIX.hTEMPLATE
+++ b/stdme/generic_sources/header/vec_CPREFIX.hTEMPLATE
@ -120,4 +120,14 @@ void	vec_C__PREFIX___sort(t_vec_C__PREFIX__		  *vec, t_vec_C__PREFIX___sort_fn i
 /// @return true if the operation failed, false otherwise
 t_error vec_C__PREFIX___back(t_vec_C__PREFIX__ *vec, C__TYPENAME__ **out);

+/// @brief Get a pointer to the i'th element, or NULL otherwise
+/// @param vec The vec_C__PREFIX__ to get the element from
+/// @return A pointer to the element or NULL
+C__TYPENAME__ *vec_C__PREFIX___get(t_vec_C__PREFIX__ *vec, t_usize i);
+
+/// @brief Get a pointer to the last element, or NULL otherwise
+/// @param vec The vec_C__PREFIX__ to get the element from
+/// @return A pointer to the last element or NULL
+C__TYPENAME__ *vec_C__PREFIX___last(t_vec_C__PREFIX__ *vec);
+
 #endif
--- a/stdme/generic_sources/src/vec/CPREFIX_functions3.cTEMPLATE
+++ b/stdme/generic_sources/src/vec/CPREFIX_functions3.cTEMPLATE
@ -10,8 +10,6 @@
 /*                                                                            */
 /* ************************************************************************** */

-#include "me/mem/mem.h"
-#include "me/mem/mem.h"
 #include "me/mem/mem.h"
 #include "me/types.h"
 #include "me/vec/vec_C__PREFIX__.h"
--- a/stdme/generic_sources/src/vec/CPREFIX_functions4.cTEMPLATE
+++ b/stdme/generic_sources/src/vec/CPREFIX_functions4.cTEMPLATE
@ -0,0 +1,30 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*                                                        :::      ::::::::   */
+/*   vec_C__PREFIX__.c                                  :+:      :+:    :+:   */
+/*                                                    +:+ +:+         +:+     */
+/*   By: maiboyer <maiboyer@student.42.fr>          +#+  +:+       +#+        */
+/*                                                +#+#+#+#+#+   +#+           */
+/*   Created: 2023/12/30 17:59:28 by maiboyer          #+#    #+#             */
+/*   Updated: 2023/12/30 17:59:28 by maiboyer         ###   ########.fr       */
+/*                                                                            */
+/* ************************************************************************** */
+
+#include "me/mem/mem.h"
+#include "me/types.h"
+#include "me/vec/vec_C__PREFIX__.h"
+#include <stdlib.h>
+
+C__TYPENAME__ *vec_C__PREFIX___get(t_vec_C__PREFIX__ *vec, t_usize i)
+{
+	if (vec->len >= i)
+		return (NULL);
+	return (&vec->buffer[i]);
+}
+
+C__TYPENAME__ *vec_C__PREFIX___last(t_vec_C__PREFIX__ *vec)
+{
+	if (vec->len == 0)
+		return (NULL);
+	return (&vec->buffer[vec->len - 1]);
+}
--- a/stdme/input.toml
+++ b/stdme/input.toml
@ -5,6 +5,7 @@ sources = [
 	"generic_sources/src/vec/C__PREFIX___sort.c__TEMPLATE__",
 	"generic_sources/src/vec/C__PREFIX___functions2.c__TEMPLATE__",
 	"generic_sources/src/vec/C__PREFIX___functions3.c__TEMPLATE__",
+	"generic_sources/src/vec/C__PREFIX___functions4.c__TEMPLATE__",
 ]
 replace.C__TYPENAME__ = "type"
 replace.C__TYPEHEADER__ = "header_include"