make things work normally

This commit is contained in:
Maieul BOYER 2024-04-30 17:37:59 +02:00
parent eee1354b40
commit 91e2c52270
No known key found for this signature in database
43 changed files with 12576 additions and 6662 deletions

View file

@ -6,7 +6,7 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */ /* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2024/04/28 18:35:22 by maiboyer #+# #+# */ /* Created: 2024/04/28 18:35:22 by maiboyer #+# #+# */
/* Updated: 2024/04/30 13:02:06 by maiboyer ### ########.fr */ /* Updated: 2024/04/30 16:41:44 by maiboyer ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */

View file

@ -6,7 +6,7 @@
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */ /* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2024/03/28 14:41:15 by rparodi #+# #+# */ /* Created: 2024/03/28 14:41:15 by rparodi #+# #+# */
/* Updated: 2024/04/30 15:42:51 by rparodi ### ########.fr */ /* Updated: 2024/04/30 16:41:57 by maiboyer ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */

View file

@ -4376,13 +4376,4 @@ static/unique_symbols_map/unique_symbols_map_2 \
static/lex_funcs/lex_normal/state_helper \ static/lex_funcs/lex_normal/state_helper \
static/lex_funcs/lex_normal/state_helper2 \ static/lex_funcs/lex_normal/state_helper2 \
static/lex_funcs/lex_keywords/state_0_bis \ static/lex_funcs/lex_keywords/state_0_bis \
static/lex_funcs/lex_keywords/state_4_bis \ static/lex_funcs/lex_keywords/state_4_bis
src/language \
src/lexer \
src/node \
src/parser \
src/scanner \
src/stack \
src/subtree \
src/tree \
src/tree_cursor \

View file

@ -6,7 +6,7 @@
# By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ # # By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ #
# +#+#+#+#+#+ +#+ # # +#+#+#+#+#+ +#+ #
# Created: 2023/11/03 13:20:01 by maiboyer #+# #+# # # Created: 2023/11/03 13:20:01 by maiboyer #+# #+# #
# Updated: 2024/04/30 13:35:56 by maiboyer ### ########.fr # # Updated: 2024/04/30 17:20:27 by maiboyer ### ########.fr #
# # # #
# **************************************************************************** # # **************************************************************************** #
@ -22,6 +22,7 @@ CFLAGS = -Wall -Wextra -Werror -MMD -I./includes -I../includes -I../output/inc
include ./Filelist.mk include ./Filelist.mk
SRC_FILES += ./src/lib ./src/scanner
SRC = $(addsuffix .c,$(addprefix $(SRC_DIR)/,$(SRC_FILES))) SRC = $(addsuffix .c,$(addprefix $(SRC_DIR)/,$(SRC_FILES)))
OBJ = $(addsuffix .o,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) OBJ = $(addsuffix .o,$(addprefix $(BUILD_DIR)/,$(SRC_FILES)))
DEPS = $(addsuffix .d,$(addprefix $(BUILD_DIR)/,$(SRC_FILES))) DEPS = $(addsuffix .d,$(addprefix $(BUILD_DIR)/,$(SRC_FILES)))
@ -77,4 +78,4 @@ re:
generate_filelist: generate_filelist:
@/usr/bin/env zsh -c "tree -iFf --noreport $(SRC_DIR) | rg '^$(SRC_DIR)/(.*)\.c\$$' --replace '\$$1' | sort -u" > ./source_files.list @/usr/bin/env zsh -c "tree -iFf --noreport $(SRC_DIR) | rg '^$(SRC_DIR)/(.*)\.c\$$' --replace '\$$1' | sort -u" > ./source_files.list
-include $(DEPS) # -include $(DEPS)

View file

@ -6,11 +6,10 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */ /* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2024/04/25 16:13:52 by maiboyer #+# #+# */ /* Created: 2024/04/25 16:13:52 by maiboyer #+# #+# */
/* Updated: 2024/04/28 17:15:16 by maiboyer ### ########.fr */ /* Updated: 2024/04/30 16:37:30 by maiboyer ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
#include "./includes/parser.h"
#include "./static/headers/constants.h" #include "./static/headers/constants.h"
#include "./static/headers/symbols.h" #include "./static/headers/symbols.h"
#include "./parse_types.h" #include "./parse_types.h"

View file

@ -14,7 +14,7 @@
#define LEXER_H #define LEXER_H
#include "me/types.h" #include "me/types.h"
#include "parser/api.h" #include "./api.h"
#include "parser/parser_length.h" #include "parser/parser_length.h"
#include "parser/types/types_lexer.h" #include "parser/types/types_lexer.h"

View file

@ -2,7 +2,7 @@
#define TREE_SITTER_LENGTH_H_ #define TREE_SITTER_LENGTH_H_
#include "parser/point.h" #include "parser/point.h"
#include "parser/api.h" #include "./api.h"
#include <stdbool.h> #include <stdbool.h>
#include <stdlib.h> #include <stdlib.h>

View file

@ -15,7 +15,7 @@
#include "me/types.h" #include "me/types.h"
#include "me/vec/vec_reduce_action.h" #include "me/vec/vec_reduce_action.h"
#include "parser/api.h" #include "./api.h"
#include "parser/types/types_reduce_action.h" #include "parser/types/types_reduce_action.h"
static inline void ts_reduce_action_set_add(t_vec_reduce_action *self, static inline void ts_reduce_action_set_add(t_vec_reduce_action *self,

48
parser/src/alloc.c Normal file
View file

@ -0,0 +1,48 @@
#include "alloc.h"
#include "./api.h"
#include <stdlib.h>
static void *ts_malloc_default(size_t size) {
void *result = malloc(size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
abort();
}
return result;
}
static void *ts_calloc_default(size_t count, size_t size) {
void *result = calloc(count, size);
if (count > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
abort();
}
return result;
}
static void *ts_realloc_default(void *buffer, size_t size) {
void *result = realloc(buffer, size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
abort();
}
return result;
}
// Allow clients to override allocation functions dynamically
TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
TS_PUBLIC void (*ts_current_free)(void *) = free;
void ts_set_allocator(
void *(*new_malloc)(size_t size),
void *(*new_calloc)(size_t count, size_t size),
void *(*new_realloc)(void *ptr, size_t size),
void (*new_free)(void *ptr)
) {
ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default;
ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default;
ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default;
ts_current_free = new_free ? new_free : free;
}

41
parser/src/alloc.h Normal file
View file

@ -0,0 +1,41 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
#define TS_PUBLIC
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif
TS_PUBLIC extern void *(*ts_current_malloc)(size_t);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t, size_t);
TS_PUBLIC extern void *(*ts_current_realloc)(void *, size_t);
TS_PUBLIC extern void (*ts_current_free)(void *);
// Allow clients to override allocation functions
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

1273
parser/src/api.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,11 @@
#ifndef TREE_SITTER_ARRAY_H_ #ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_ #define TREE_SITTER_ARRAY_H_
#include "me/types.h" #ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h> #include <assert.h>
#include <stdbool.h> #include <stdbool.h>
@ -9,12 +13,18 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \ #define Array(T) \
struct \ struct { \
{ \
T *contents; \ T *contents; \
t_u32 size; \ uint32_t size; \
t_u32 capacity; \ uint32_t capacity; \
} }
/// Initialize an array. /// Initialize an array.
@ -23,13 +33,11 @@
/// Create an empty array. /// Create an empty array.
#define array_new() \ #define array_new() \
{ \ { NULL, 0, 0 }
NULL, 0, 0 \
}
/// Get a pointer to the element at a given `index` in the array. /// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \ #define array_get(self, _index) \
(assert((t_u32)(_index) < (self)->size), &(self)->contents[_index]) (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array. /// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0) #define array_front(self) array_get(self, 0)
@ -58,13 +66,10 @@
/// Increase the array's size by `count` elements. /// Increase the array's size by `count` elements.
/// New elements are zero-initialized. /// New elements are zero-initialized.
#define array_grow_by(self, count) \ #define array_grow_by(self, count) \
do \ do { \
{ \ if ((count) == 0) break; \
if ((count) == 0) \
break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \ _array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, \ memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(count) * array_elem_size(self)); \
(self)->size += (count); \ (self)->size += (count); \
} while (0) } while (0)
@ -72,23 +77,26 @@
#define array_push_all(self, other) \ #define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents) array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from /// Append `count` elements to the end of the array, reading their values from the
/// the `contents` pointer. /// `contents` pointer.
#define array_extend(self, count, contents) \ #define array_extend(self, count, contents) \
_array__splice((Array *)(self), array_elem_size(self), (self)->size, 0, \ _array__splice( \
count, contents) (Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At /// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from /// the same index, insert `new_count` new elements, reading their values from the
/// the `new_contents` pointer. /// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \ #define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice((Array *)(self), array_elem_size(self), _index, old_count, \ _array__splice( \
new_count, new_contents) (Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`. /// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \ #define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, \ _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
&(element))
/// Remove one element from the array at the given `index`. /// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \ #define array_erase(self, _index) \
@ -99,11 +107,11 @@
/// Assign the contents of one array to another, reallocating if necessary. /// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \ #define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), \ _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
array_elem_size(self))
/// Swap one array with another /// Swap one array with another
#define array_swap(self, other) _array__swap((Array *)(self), (Array *)(other)) #define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents /// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents) #define array_elem_size(self) (sizeof *(self)->contents)
@ -120,8 +128,7 @@
_array__search_sorted(self, 0, compare, , needle, _index, _exists) _array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons /// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the /// of a given struct field (specified with a leading dot) to determine the order.
/// order.
/// ///
/// See also `array_search_sorted_with`. /// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \ #define array_search_sorted_by(self, field, needle, _index, _exists) \
@ -130,12 +137,10 @@
/// Insert a given `value` into a sorted array, using the given `compare` /// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order. /// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \ #define array_insert_sorted_with(self, compare, value) \
do \ do { \
{ \
unsigned _index, _exists; \ unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) \ if (!_exists) array_insert(self, _index, value); \
array_insert(self, _index, value); \
} while (0) } while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of /// Insert a given `value` into a sorted array, using integer comparisons of
@ -143,12 +148,10 @@
/// ///
/// See also `array_search_sorted_by`. /// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \ #define array_insert_sorted_by(self, field, value) \
do \ do { \
{ \
unsigned _index, _exists; \ unsigned _index, _exists; \
array_search_sorted_by(self, field, (value)field, &_index, &_exists); \ array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) \ if (!_exists) array_insert(self, _index, value); \
array_insert(self, _index, value); \
} while (0) } while (0)
// Private // Private
@ -156,11 +159,9 @@
typedef Array(void) Array; typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`. /// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) static inline void _array__delete(Array *self) {
{ if (self->contents) {
if (self->contents) ts_free(self->contents);
{
free(self->contents);
self->contents = NULL; self->contents = NULL;
self->size = 0; self->size = 0;
self->capacity = 0; self->capacity = 0;
@ -168,135 +169,122 @@ static inline void _array__delete(Array *self)
} }
/// This is not what you're looking for, see `array_erase`. /// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size, t_u32 index) static inline void _array__erase(Array *self, size_t element_size,
{ uint32_t index) {
assert(index < self->size); assert(index < self->size);
char *contents = (char *)self->contents; char *contents = (char *)self->contents;
memmove(contents + index * element_size, memmove(contents + index * element_size, contents + (index + 1) * element_size,
contents + (index + 1) * element_size,
(self->size - index - 1) * element_size); (self->size - index - 1) * element_size);
self->size--; self->size--;
} }
/// This is not what you're looking for, see `array_reserve`. /// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
t_u32 new_capacity) if (new_capacity > self->capacity) {
{ if (self->contents) {
if (new_capacity > self->capacity) self->contents = ts_realloc(self->contents, new_capacity * element_size);
{ } else {
if (self->contents) self->contents = ts_malloc(new_capacity * element_size);
{
self->contents =
realloc(self->contents, new_capacity * element_size);
}
else
{
self->contents = malloc(new_capacity * element_size);
} }
self->capacity = new_capacity; self->capacity = new_capacity;
} }
} }
/// This is not what you're looking for, see `array_assign`. /// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
size_t element_size)
{
_array__reserve(self, element_size, other->size); _array__reserve(self, element_size, other->size);
self->size = other->size; self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size); memcpy(self->contents, other->contents, self->size * element_size);
} }
/// This is not what you're looking for, see `array_swap`. /// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) static inline void _array__swap(Array *self, Array *other) {
{
Array swap = *other; Array swap = *other;
*other = *self; *other = *self;
*self = swap; *self = swap;
} }
/// This is not what you're looking for, see `array_push` or `array_grow_by`. /// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, t_u32 count, size_t element_size) static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
{ uint32_t new_size = self->size + count;
t_u32 new_size = self->size + count; if (new_size > self->capacity) {
if (new_size > self->capacity) uint32_t new_capacity = self->capacity * 2;
{ if (new_capacity < 8) new_capacity = 8;
t_u32 new_capacity = self->capacity * 2; if (new_capacity < new_size) new_capacity = new_size;
if (new_capacity < 8)
new_capacity = 8;
if (new_capacity < new_size)
new_capacity = new_size;
_array__reserve(self, element_size, new_capacity); _array__reserve(self, element_size, new_capacity);
} }
} }
/// This is not what you're looking for, see `array_splice`. /// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size, t_u32 index, static inline void _array__splice(Array *self, size_t element_size,
t_u32 old_count, t_u32 new_count, uint32_t index, uint32_t old_count,
const void *elements) uint32_t new_count, const void *elements) {
{ uint32_t new_size = self->size + new_count - old_count;
t_u32 new_size = self->size + new_count - old_count; uint32_t old_end = index + old_count;
t_u32 old_end = index + old_count; uint32_t new_end = index + new_count;
t_u32 new_end = index + new_count;
assert(old_end <= self->size); assert(old_end <= self->size);
_array__reserve(self, element_size, new_size); _array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents; char *contents = (char *)self->contents;
if (self->size > old_end) if (self->size > old_end) {
{ memmove(
memmove(contents + new_end * element_size, contents + new_end * element_size,
contents + old_end * element_size, contents + old_end * element_size,
(self->size - old_end) * element_size); (self->size - old_end) * element_size
);
} }
if (new_count > 0) if (new_count > 0) {
{ if (elements) {
if (elements) memcpy(
{ (contents + index * element_size),
memcpy((contents + index * element_size), elements, elements,
new_count * element_size); new_count * element_size
} );
else } else {
{ memset(
memset((contents + index * element_size), 0, (contents + index * element_size),
new_count * element_size); 0,
new_count * element_size
);
} }
} }
self->size += new_count - old_count; self->size += new_count - old_count;
} }
/// A binary search routine, based on Rust's `std::slice::binary_search_by`. /// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or /// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
/// `array_search_sorted_by`. #define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
#define _array__search_sorted(self, start, compare, suffix, needle, _index, \ do { \
_exists) \
do \
{ \
*(_index) = start; \ *(_index) = start; \
*(_exists) = false; \ *(_exists) = false; \
t_u32 size = (self)->size - *(_index); \ uint32_t size = (self)->size - *(_index); \
if (size == 0) \ if (size == 0) break; \
break; \
int comparison; \ int comparison; \
while (size > 1) \ while (size > 1) { \
{ \ uint32_t half_size = size / 2; \
t_u32 half_size = size / 2; \ uint32_t mid_index = *(_index) + half_size; \
t_u32 mid_index = *(_index) + half_size; \ comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
comparison = \ if (comparison <= 0) *(_index) = mid_index; \
compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) \
*(_index) = mid_index; \
size -= half_size; \ size -= half_size; \
} \ } \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) \ if (comparison == 0) *(_exists) = true; \
*(_exists) = true; \ else if (comparison < 0) *(_index) += 1; \
else if (comparison < 0) \
*(_index) += 1; \
} while (0) } while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left /// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// (existing) parameter by reference in order to work with the generic sorting /// parameter by reference in order to work with the generic sorting function above.
/// function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b)) #define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_ #endif // TREE_SITTER_ARRAY_H_

68
parser/src/atomic.h Normal file
View file

@ -0,0 +1,68 @@
#ifndef TREE_SITTER_ATOMIC_H_
#define TREE_SITTER_ATOMIC_H_
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __TINYC__
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
*p += 1;
return *p;
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
*p-= 1;
return *p;
}
#elif defined(_WIN32)
#include <windows.h>
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return InterlockedIncrement((long volatile *)p);
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
return InterlockedDecrement((long volatile *)p);
}
#else
static inline size_t atomic_load(const volatile size_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_load_n(p, __ATOMIC_RELAXED);
#else
return __sync_fetch_and_add((volatile size_t *)p, 0);
#endif
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch(p, 1U);
#endif
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST);
#else
return __sync_sub_and_fetch(p, 1U);
#endif
}
#endif
#endif // TREE_SITTER_ATOMIC_H_

146
parser/src/clock.h Normal file
View file

@ -0,0 +1,146 @@
#ifndef TREE_SITTER_CLOCK_H_
#define TREE_SITTER_CLOCK_H_
#include <stdbool.h>
#include <stdint.h>
typedef uint64_t TSDuration;
#ifdef _WIN32
// Windows:
// * Represent a time as a performance counter value.
// * Represent a duration as a number of performance counter ticks.
#include <windows.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return micros * (uint64_t)frequency.QuadPart / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return self * 1000000 / (uint64_t)frequency.QuadPart;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
LARGE_INTEGER result;
QueryPerformanceCounter(&result);
return (uint64_t)result.QuadPart;
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
// POSIX with monotonic clock support (Linux)
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
// * Represent a duration as a number of microseconds.
//
// On these platforms, parse timeouts will correspond accurately to
// real time, regardless of what other processes are running.
#include <time.h>
typedef struct timespec TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self;
}
static inline TSClock clock_now(void) {
TSClock result;
clock_gettime(CLOCK_MONOTONIC, &result);
return result;
}
static inline TSClock clock_null(void) {
return (TSClock) {0, 0};
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
TSClock result = base;
result.tv_sec += duration / 1000000;
result.tv_nsec += (duration % 1000000) * 1000;
if (result.tv_nsec >= 1000000000) {
result.tv_nsec -= 1000000000;
++(result.tv_sec);
}
return result;
}
static inline bool clock_is_null(TSClock self) {
return !self.tv_sec;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
if (self.tv_sec > other.tv_sec) return true;
if (self.tv_sec < other.tv_sec) return false;
return self.tv_nsec > other.tv_nsec;
}
#else
// macOS or POSIX without monotonic clock support
// * Represent a time as a process clock value.
// * Represent a duration as a number of process clock ticks.
//
// On these platforms, parse timeouts may be affected by other processes,
// which is not ideal, but is better than using a non-monotonic time API
// like `gettimeofday`.
#include <time.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
return (uint64_t)clock();
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#endif
#endif // TREE_SITTER_CLOCK_H_

11
parser/src/error_costs.h Normal file
View file

@ -0,0 +1,11 @@
#ifndef TREE_SITTER_ERROR_COSTS_H_
#define TREE_SITTER_ERROR_COSTS_H_
#define ERROR_STATE 0
#define ERROR_COST_PER_RECOVERY 500
#define ERROR_COST_PER_MISSING_TREE 110
#define ERROR_COST_PER_SKIPPED_TREE 100
#define ERROR_COST_PER_SKIPPED_LINE 30
#define ERROR_COST_PER_SKIPPED_CHAR 1
#endif

View file

@ -0,0 +1,501 @@
#include "./get_changed_ranges.h"
#include "./subtree.h"
#include "./language.h"
#include "./error_costs.h"
#include "./tree_cursor.h"
#include <assert.h>
// #define DEBUG_GET_CHANGED_RANGES
static void ts_range_array_add(
TSRangeArray *self,
Length start,
Length end
) {
if (self->size > 0) {
TSRange *last_range = array_back(self);
if (start.bytes <= last_range->end_byte) {
last_range->end_byte = end.bytes;
last_range->end_point = end.extent;
return;
}
}
if (start.bytes < end.bytes) {
TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
array_push(self, range);
}
}
bool ts_range_array_intersects(
const TSRangeArray *self,
unsigned start_index,
uint32_t start_byte,
uint32_t end_byte
) {
for (unsigned i = start_index; i < self->size; i++) {
TSRange *range = &self->contents[i];
if (range->end_byte > start_byte) {
if (range->start_byte >= end_byte) break;
return true;
}
}
return false;
}
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
) {
unsigned new_index = 0;
unsigned old_index = 0;
Length current_position = length_zero();
bool in_old_range = false;
bool in_new_range = false;
while (old_index < old_range_count || new_index < new_range_count) {
const TSRange *old_range = &old_ranges[old_index];
const TSRange *new_range = &new_ranges[new_index];
Length next_old_position;
if (in_old_range) {
next_old_position = (Length) {old_range->end_byte, old_range->end_point};
} else if (old_index < old_range_count) {
next_old_position = (Length) {old_range->start_byte, old_range->start_point};
} else {
next_old_position = LENGTH_MAX;
}
Length next_new_position;
if (in_new_range) {
next_new_position = (Length) {new_range->end_byte, new_range->end_point};
} else if (new_index < new_range_count) {
next_new_position = (Length) {new_range->start_byte, new_range->start_point};
} else {
next_new_position = LENGTH_MAX;
}
if (next_old_position.bytes < next_new_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_old_position);
}
if (in_old_range) old_index++;
current_position = next_old_position;
in_old_range = !in_old_range;
} else if (next_new_position.bytes < next_old_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_new_range) new_index++;
current_position = next_new_position;
in_new_range = !in_new_range;
} else {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_old_range) old_index++;
if (in_new_range) new_index++;
in_old_range = !in_old_range;
in_new_range = !in_new_range;
current_position = next_new_position;
}
}
}
typedef struct {
TreeCursor cursor;
const TSLanguage *language;
unsigned visible_depth;
bool in_padding;
} Iterator;
static Iterator iterator_new(
TreeCursor *cursor,
const Subtree *tree,
const TSLanguage *language
) {
array_clear(&cursor->stack);
array_push(&cursor->stack, ((TreeCursorEntry) {
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
return (Iterator) {
.cursor = *cursor,
.language = language,
.visible_depth = 1,
.in_padding = false,
};
}
static bool iterator_done(Iterator *self) {
return self->cursor.stack.size == 0;
}
static Length iterator_start_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (self->in_padding) {
return entry.position;
} else {
return length_add(entry.position, ts_subtree_padding(*entry.subtree));
}
}
static Length iterator_end_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
if (self->in_padding) {
return result;
} else {
return length_add(result, ts_subtree_size(*entry.subtree));
}
}
static bool iterator_tree_is_visible(const Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (ts_subtree_visible(*entry.subtree)) return true;
if (self->cursor.stack.size > 1) {
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
return ts_language_alias_at(
self->language,
parent.ptr->production_id,
entry.structural_child_index
) != 0;
}
return false;
}
static void iterator_get_visible_state(
const Iterator *self,
Subtree *tree,
TSSymbol *alias_symbol,
uint32_t *start_byte
) {
uint32_t i = self->cursor.stack.size - 1;
if (self->in_padding) {
if (i == 0) return;
i--;
}
for (; i + 1 > 0; i--) {
TreeCursorEntry entry = self->cursor.stack.contents[i];
if (i > 0) {
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
*alias_symbol = ts_language_alias_at(
self->language,
parent->ptr->production_id,
entry.structural_child_index
);
}
if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
*tree = *entry.subtree;
*start_byte = entry.position.bytes;
break;
}
}
}
static void iterator_ascend(Iterator *self) {
if (iterator_done(self)) return;
if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
self->cursor.stack.size--;
}
static bool iterator_descend(Iterator *self, uint32_t goal_position) {
if (self->in_padding) return false;
bool did_descend = false;
do {
did_descend = false;
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
Length child_left = length_add(position, ts_subtree_padding(*child));
Length child_right = length_add(child_left, ts_subtree_size(*child));
if (child_right.bytes > goal_position) {
array_push(&self->cursor.stack, ((TreeCursorEntry) {
.subtree = child,
.position = position,
.child_index = i,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (child_left.bytes > goal_position) {
self->in_padding = true;
} else {
self->visible_depth++;
}
return true;
}
did_descend = true;
break;
}
position = child_right;
if (!ts_subtree_extra(*child)) structural_child_index++;
}
} while (did_descend);
return false;
}
static void iterator_advance(Iterator *self) {
if (self->in_padding) {
self->in_padding = false;
if (iterator_tree_is_visible(self)) {
self->visible_depth++;
} else {
iterator_descend(self, 0);
}
return;
}
for (;;) {
if (iterator_tree_is_visible(self)) self->visible_depth--;
TreeCursorEntry entry = array_pop(&self->cursor.stack);
if (iterator_done(self)) return;
const Subtree *parent = array_back(&self->cursor.stack)->subtree;
uint32_t child_index = entry.child_index + 1;
if (ts_subtree_child_count(*parent) > child_index) {
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry) {
.subtree = next_child,
.position = position,
.child_index = child_index,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (ts_subtree_padding(*next_child).bytes > 0) {
self->in_padding = true;
} else {
self->visible_depth++;
}
} else {
iterator_descend(self, 0);
}
break;
}
}
}
typedef enum {
IteratorDiffers,
IteratorMayDiffer,
IteratorMatches,
} IteratorComparison;
static IteratorComparison iterator_compare(
const Iterator *old_iter,
const Iterator *new_iter
) {
Subtree old_tree = NULL_SUBTREE;
Subtree new_tree = NULL_SUBTREE;
uint32_t old_start = 0;
uint32_t new_start = 0;
TSSymbol old_alias_symbol = 0;
TSSymbol new_alias_symbol = 0;
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
if (
old_alias_symbol == new_alias_symbol &&
ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
) {
if (old_start == new_start &&
!ts_subtree_has_changes(old_tree) &&
ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
(ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
(ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
return IteratorMatches;
} else {
return IteratorMayDiffer;
}
}
return IteratorDiffers;
}
#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
printf(
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
name, self->in_padding ? "(p)" : " ",
self->visible_depth,
start.row + 1, start.column,
end.row + 1, end.column
);
}
#endif
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges
) {
TSRangeArray results = array_new();
Iterator old_iter = iterator_new(cursor1, old_tree, language);
Iterator new_iter = iterator_new(cursor2, new_tree, language);
unsigned included_range_difference_index = 0;
Length position = iterator_start_position(&old_iter);
Length next_position = iterator_start_position(&new_iter);
if (position.bytes < next_position.bytes) {
ts_range_array_add(&results, position, next_position);
position = next_position;
} else if (position.bytes > next_position.bytes) {
ts_range_array_add(&results, next_position, position);
next_position = position;
}
do {
#ifdef DEBUG_GET_CHANGED_RANGES
printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
iterator_print_state(&old_iter);
printf("\tvs\t");
iterator_print_state(&new_iter);
puts("");
#endif
// Compare the old and new subtrees.
IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
// Even if the two subtrees appear to be identical, they could differ
// internally if they contain a range of text that was previously
// excluded from the parse, and is now included, or vice-versa.
if (comparison == IteratorMatches && ts_range_array_intersects(
included_range_differences,
included_range_difference_index,
position.bytes,
iterator_end_position(&old_iter).bytes
)) {
comparison = IteratorMayDiffer;
}
bool is_changed = false;
switch (comparison) {
// If the subtrees are definitely identical, move to the end
// of both subtrees.
case IteratorMatches:
next_position = iterator_end_position(&old_iter);
break;
// If the subtrees might differ internally, descend into both
// subtrees, finding the first child that spans the current position.
case IteratorMayDiffer:
if (iterator_descend(&old_iter, position.bytes)) {
if (!iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&old_iter);
}
} else if (iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&new_iter);
} else {
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
}
break;
// If the subtrees are different, record a change and then move
// to the end of both subtrees.
case IteratorDiffers:
is_changed = true;
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
break;
}
// Ensure that both iterators are caught up to the current position.
while (
!iterator_done(&old_iter) &&
iterator_end_position(&old_iter).bytes <= next_position.bytes
) iterator_advance(&old_iter);
while (
!iterator_done(&new_iter) &&
iterator_end_position(&new_iter).bytes <= next_position.bytes
) iterator_advance(&new_iter);
// Ensure that both iterators are at the same depth in the tree.
while (old_iter.visible_depth > new_iter.visible_depth) {
iterator_ascend(&old_iter);
}
while (new_iter.visible_depth > old_iter.visible_depth) {
iterator_ascend(&new_iter);
}
if (is_changed) {
#ifdef DEBUG_GET_CHANGED_RANGES
printf(
" change: [[%u, %u] - [%u, %u]]\n",
position.extent.row + 1, position.extent.column,
next_position.extent.row + 1, next_position.extent.column
);
#endif
ts_range_array_add(&results, position, next_position);
}
position = next_position;
// Keep track of the current position in the included range differences
// array in order to avoid scanning the entire array on each iteration.
while (included_range_difference_index < included_range_differences->size) {
const TSRange *range = &included_range_differences->contents[
included_range_difference_index
];
if (range->end_byte <= position.bytes) {
included_range_difference_index++;
} else {
break;
}
}
} while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
Length old_size = ts_subtree_total_size(*old_tree);
Length new_size = ts_subtree_total_size(*new_tree);
if (old_size.bytes < new_size.bytes) {
ts_range_array_add(&results, old_size, new_size);
} else if (new_size.bytes < old_size.bytes) {
ts_range_array_add(&results, new_size, old_size);
}
*cursor1 = old_iter.cursor;
*cursor2 = new_iter.cursor;
*ranges = results.contents;
return results.size;
}

View file

@ -0,0 +1,36 @@
#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
#define TREE_SITTER_GET_CHANGED_RANGES_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./tree_cursor.h"
#include "./subtree.h"
typedef Array(TSRange) TSRangeArray;
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
);
bool ts_range_array_intersects(
const TSRangeArray *self, unsigned start_index,
uint32_t start_byte, uint32_t end_byte
);
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges
);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_GET_CHANGED_RANGES_H_

21
parser/src/host.h Normal file
View file

@ -0,0 +1,21 @@
// Determine endian and pointer size based on known defines.
// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments
// to override this.
#if !defined(TS_BIG_ENDIAN)
#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
|| (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__)))
#define TS_BIG_ENDIAN 1
#else
#define TS_BIG_ENDIAN 0
#endif
#endif
#if !defined(TS_PTR_SIZE)
#if UINTPTR_MAX == 0xFFFFFFFF
#define TS_PTR_SIZE 32
#else
#define TS_PTR_SIZE 64
#endif
#endif

View file

@ -1,36 +1,37 @@
#include "./language.h" #include "./language.h"
#include "parser/api.h"
#include "./api.h"
#include <string.h> #include <string.h>
const t_language *ts_language_copy(const t_language *self) { const TSLanguage *ts_language_copy(const TSLanguage *self) {
return self; return self;
} }
void ts_language_delete(const t_language *self) { void ts_language_delete(const TSLanguage *self) {
(void)(self); (void)(self);
} }
t_u32 ts_language_symbol_count(const t_language *self) { uint32_t ts_language_symbol_count(const TSLanguage *self) {
return self->symbol_count + self->alias_count; return self->symbol_count + self->alias_count;
} }
t_u32 ts_language_state_count(const t_language *self) { uint32_t ts_language_state_count(const TSLanguage *self) {
return self->state_count; return self->state_count;
} }
t_u32 ts_language_version(const t_language *self) { uint32_t ts_language_version(const TSLanguage *self) {
return self->version; return self->version;
} }
t_u32 ts_language_field_count(const t_language *self) { uint32_t ts_language_field_count(const TSLanguage *self) {
return self->field_count; return self->field_count;
} }
void ts_language_table_entry( void ts_language_table_entry(
const t_language *self, const TSLanguage *self,
t_state_id state, TSStateId state,
t_symbol symbol, TSSymbol symbol,
t_table_entry *result TableEntry *result
) { ) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->action_count = 0; result->action_count = 0;
@ -38,48 +39,48 @@ void ts_language_table_entry(
result->actions = NULL; result->actions = NULL;
} else { } else {
assert(symbol < self->token_count); assert(symbol < self->token_count);
t_u32 action_index = ts_language_lookup(self, state, symbol); uint32_t action_index = ts_language_lookup(self, state, symbol);
const t_parse_action_entry *entry = &self->parse_actions[action_index]; const TSParseActionEntry *entry = &self->parse_actions[action_index];
result->action_count = entry->entry.count; result->action_count = entry->entry.count;
result->is_reusable = entry->entry.reusable; result->is_reusable = entry->entry.reusable;
result->actions = (const t_parse_actions *)(entry + 1); result->actions = (const TSParseAction *)(entry + 1);
} }
} }
t_symbol_metadata ts_language_symbol_metadata( TSSymbolMetadata ts_language_symbol_metadata(
const t_language *self, const TSLanguage *self,
t_symbol symbol TSSymbol symbol
) { ) {
if (symbol == ts_builtin_sym_error) { if (symbol == ts_builtin_sym_error) {
return (t_symbol_metadata) {.visible = true, .named = true}; return (TSSymbolMetadata) {.visible = true, .named = true};
} else if (symbol == ts_builtin_sym_error_repeat) { } else if (symbol == ts_builtin_sym_error_repeat) {
return (t_symbol_metadata) {.visible = false, .named = false}; return (TSSymbolMetadata) {.visible = false, .named = false};
} else { } else {
return self->symbol_metadata[symbol]; return self->symbol_metadata[symbol];
} }
} }
t_symbol ts_language_public_symbol( TSSymbol ts_language_public_symbol(
const t_language *self, const TSLanguage *self,
t_symbol symbol TSSymbol symbol
) { ) {
if (symbol == ts_builtin_sym_error) return symbol; if (symbol == ts_builtin_sym_error) return symbol;
return self->public_symbol_map[symbol]; return self->public_symbol_map[symbol];
} }
t_state_id ts_language_next_state( TSStateId ts_language_next_state(
const t_language *self, const TSLanguage *self,
t_state_id state, TSStateId state,
t_symbol symbol TSSymbol symbol
) { ) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
return 0; return 0;
} else if (symbol < self->token_count) { } else if (symbol < self->token_count) {
t_u32 count; uint32_t count;
const t_parse_actions *actions = ts_language_actions(self, state, symbol, &count); const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
if (count > 0) { if (count > 0) {
t_parse_actions action = actions[count - 1]; TSParseAction action = actions[count - 1];
if (action.type == ActionTypeShift) { if (action.type == TSParseActionTypeShift) {
return action.shift.extra ? state : action.shift.state; return action.shift.extra ? state : action.shift.state;
} }
} }
@ -90,8 +91,8 @@ t_state_id ts_language_next_state(
} }
const char *ts_language_symbol_name( const char *ts_language_symbol_name(
const t_language *self, const TSLanguage *self,
t_symbol symbol TSSymbol symbol
) { ) {
if (symbol == ts_builtin_sym_error) { if (symbol == ts_builtin_sym_error) {
return "ERROR"; return "ERROR";
@ -104,16 +105,16 @@ const char *ts_language_symbol_name(
} }
} }
t_symbol ts_language_symbol_for_name( TSSymbol ts_language_symbol_for_name(
const t_language *self, const TSLanguage *self,
const char *string, const char *string,
t_u32 length, uint32_t length,
bool is_named bool is_named
) { ) {
if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
t_u16 count = (t_u16)ts_language_symbol_count(self); uint16_t count = (uint16_t)ts_language_symbol_count(self);
for (t_symbol i = 0; i < count; i++) { for (TSSymbol i = 0; i < count; i++) {
t_symbol_metadata metadata = ts_language_symbol_metadata(self, i); TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
const char *symbol_name = self->symbol_names[i]; const char *symbol_name = self->symbol_names[i];
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
@ -123,25 +124,25 @@ t_symbol ts_language_symbol_for_name(
return 0; return 0;
} }
t_symbol_type ts_language_symbol_type( TSSymbolType ts_language_symbol_type(
const t_language *self, const TSLanguage *self,
t_symbol symbol TSSymbol symbol
) { ) {
t_symbol_metadata metadata = ts_language_symbol_metadata(self, symbol); TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
if (metadata.named && metadata.visible) { if (metadata.named && metadata.visible) {
return SymbolTypeRegular; return TSSymbolTypeRegular;
} else if (metadata.visible) { } else if (metadata.visible) {
return SymbolTypeAnonymous; return TSSymbolTypeAnonymous;
} else { } else {
return SymbolTypeAuxiliary; return TSSymbolTypeAuxiliary;
} }
} }
const char *ts_language_field_name_for_id( const char *ts_language_field_name_for_id(
const t_language *self, const TSLanguage *self,
t_field_id id TSFieldId id
) { ) {
t_u32 count = ts_language_field_count(self); uint32_t count = ts_language_field_count(self);
if (count && id <= count) { if (count && id <= count) {
return self->field_names[id]; return self->field_names[id];
} else { } else {
@ -149,13 +150,13 @@ const char *ts_language_field_name_for_id(
} }
} }
t_field_id ts_language_field_id_for_name( TSFieldId ts_language_field_id_for_name(
const t_language *self, const TSLanguage *self,
const char *name, const char *name,
t_u32 name_length uint32_t name_length
) { ) {
t_u16 count = (t_u16)ts_language_field_count(self); uint16_t count = (uint16_t)ts_language_field_count(self);
for (t_symbol i = 1; i < count + 1; i++) { for (TSSymbol i = 1; i < count + 1; i++) {
switch (strncmp(name, self->field_names[i], name_length)) { switch (strncmp(name, self->field_names[i], name_length)) {
case 0: case 0:
if (self->field_names[i][name_length] == 0) return i; if (self->field_names[i][name_length] == 0) return i;
@ -169,47 +170,47 @@ t_field_id ts_language_field_id_for_name(
return 0; return 0;
} }
t_lookahead_iterator *ts_lookahead_iterator_new(const t_language *self, t_state_id state) { TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
if (state >= self->state_count) return NULL; if (state >= self->state_count) return NULL;
t_lookahead_iterator *iterator = malloc(sizeof(t_lookahead_iterator)); LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
*iterator = ts_language_lookaheads(self, state); *iterator = ts_language_lookaheads(self, state);
return (t_lookahead_iterator *)iterator; return (TSLookaheadIterator *)iterator;
} }
void ts_lookahead_iterator_delete(t_lookahead_iterator *self) { void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
free(self); ts_free(self);
} }
bool ts_lookahead_iterator_reset_state(t_lookahead_iterator * self, t_state_id state) { bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; LookaheadIterator *iterator = (LookaheadIterator *)self;
if (state >= iterator->language->state_count) return false; if (state >= iterator->language->state_count) return false;
*iterator = ts_language_lookaheads(iterator->language, state); *iterator = ts_language_lookaheads(iterator->language, state);
return true; return true;
} }
const t_language *ts_lookahead_iterator_language(const t_lookahead_iterator *self) { const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return iterator->language; return iterator->language;
} }
bool ts_lookahead_iterator_reset(t_lookahead_iterator *self, const t_language *language, t_state_id state) { bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
if (state >= language->state_count) return false; if (state >= language->state_count) return false;
t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; LookaheadIterator *iterator = (LookaheadIterator *)self;
*iterator = ts_language_lookaheads(language, state); *iterator = ts_language_lookaheads(language, state);
return true; return true;
} }
bool ts_lookahead_iterator_next(t_lookahead_iterator *self) { bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
t_lookahead_iterator *iterator = (t_lookahead_iterator *)self; LookaheadIterator *iterator = (LookaheadIterator *)self;
return ts_lookahead_iterator__next(iterator); return ts_lookahead_iterator__next(iterator);
} }
t_symbol ts_lookahead_iterator_current_symbol(const t_lookahead_iterator *self) { TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return iterator->symbol; return iterator->symbol;
} }
const char *ts_lookahead_iterator_current_symbol_name(const t_lookahead_iterator *self) { const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
const t_lookahead_iterator *iterator = (const t_lookahead_iterator *)self; const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return ts_language_symbol_name(iterator->language, iterator->symbol); return ts_language_symbol_name(iterator->language, iterator->symbol);
} }

View file

@ -1,74 +1,72 @@
#ifndef TREE_SITTER_LANGUAGE_H_ #ifndef TREE_SITTER_LANGUAGE_H_
#define TREE_SITTER_LANGUAGE_H_ #define TREE_SITTER_LANGUAGE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./subtree.h" #include "./subtree.h"
#include "parser/types/types_parse_action_type.h" #include "./parser.h"
#include "parser/types/types_state_id.h"
#include "parser/types/types_symbol.h"
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
#define LANGUAGE_VERSION_USABLE_VIA_WASM 13 #define LANGUAGE_VERSION_USABLE_VIA_WASM 13
typedef struct s_table_entry typedef struct {
{ const TSParseAction *actions;
const t_parse_actions *actions; uint32_t action_count;
t_u32 action_count;
bool is_reusable; bool is_reusable;
} t_table_entry; } TableEntry;
typedef struct s_lookahead_iterator typedef struct {
{ const TSLanguage *language;
const t_language *language; const uint16_t *data;
const t_u16 *data; const uint16_t *group_end;
const t_u16 *group_end; TSStateId state;
t_state_id state; uint16_t table_value;
t_u16 table_value; uint16_t section_index;
t_u16 section_index; uint16_t group_count;
t_u16 group_count;
bool is_small_state; bool is_small_state;
const t_parse_actions *actions; const TSParseAction *actions;
t_symbol symbol; TSSymbol symbol;
t_state_id next_state; TSStateId next_state;
t_u16 action_count; uint16_t action_count;
} t_lookahead_iterator; } LookaheadIterator;
void ts_language_table_entry(const t_language *, t_state_id, t_symbol, void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
t_table_entry *);
t_symbol_metadata ts_language_symbol_metadata(const t_language *, t_symbol); TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
t_symbol ts_language_public_symbol(const t_language *, t_symbol); TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol);
t_state_id ts_language_next_state(const t_language *self, t_state_id state, TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol);
t_symbol symbol);
static inline bool ts_language_is_symbol_external(const t_language *self, static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
t_symbol symbol)
{
return 0 < symbol && symbol < self->external_token_count + 1; return 0 < symbol && symbol < self->external_token_count + 1;
} }
static inline const t_parse_actions *ts_language_actions(const t_language *self, static inline const TSParseAction *ts_language_actions(
t_state_id state, const TSLanguage *self,
t_symbol symbol, TSStateId state,
t_u32 *count) TSSymbol symbol,
{ uint32_t *count
t_table_entry entry; ) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry); ts_language_table_entry(self, state, symbol, &entry);
*count = entry.action_count; *count = entry.action_count;
return entry.actions; return entry.actions;
} }
static inline bool ts_language_has_reduce_action(const t_language *self, static inline bool ts_language_has_reduce_action(
t_state_id state, const TSLanguage *self,
t_symbol symbol) TSStateId state,
{ TSSymbol symbol
t_table_entry entry; ) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry); ts_language_table_entry(self, state, symbol, &entry);
return entry.action_count > 0 && entry.actions[0].type == ActionTypeReduce; return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
} }
// Lookup the table value for a given symbol and state. // Lookup the table value for a given symbol and state.
@ -78,36 +76,33 @@ static inline bool ts_language_has_reduce_action(const t_language *self,
// For 'large' parse states, this is a direct lookup. For 'small' parse // For 'large' parse states, this is a direct lookup. For 'small' parse
// states, this requires searching through the symbol groups to find // states, this requires searching through the symbol groups to find
// the given symbol. // the given symbol.
static inline t_u16 ts_language_lookup(const t_language *self, t_state_id state, static inline uint16_t ts_language_lookup(
t_symbol symbol) const TSLanguage *self,
{ TSStateId state,
if (state >= self->large_state_count) TSSymbol symbol
{ ) {
t_u32 index = if (state >= self->large_state_count) {
self->small_parse_table_map[state - self->large_state_count]; uint32_t index = self->small_parse_table_map[state - self->large_state_count];
const t_u16 *data = &self->small_parse_table[index]; const uint16_t *data = &self->small_parse_table[index];
t_u16 group_count = *(data++); uint16_t group_count = *(data++);
for (unsigned i = 0; i < group_count; i++) for (unsigned i = 0; i < group_count; i++) {
{ uint16_t section_value = *(data++);
t_u16 section_value = *(data++); uint16_t symbol_count = *(data++);
t_u16 symbol_count = *(data++); for (unsigned j = 0; j < symbol_count; j++) {
for (unsigned j = 0; j < symbol_count; j++) if (*(data++) == symbol) return section_value;
{
if (*(data++) == symbol)
return section_value;
} }
} }
return 0; return 0;
} } else {
else
{
return self->parse_table[state * self->symbol_count + symbol]; return self->parse_table[state * self->symbol_count + symbol];
} }
} }
static inline bool ts_language_has_actions(const t_language *self, static inline bool ts_language_has_actions(
t_state_id state, t_symbol symbol) const TSLanguage *self,
{ TSStateId state,
TSSymbol symbol
) {
return ts_language_lookup(self, state, symbol) != 0; return ts_language_lookup(self, state, symbol) != 0;
} }
@ -117,26 +112,23 @@ static inline bool ts_language_has_actions(const t_language *self,
// all possible symbols and checking the parse table for each one. // all possible symbols and checking the parse table for each one.
// For 'small' parse states, this exploits the structure of the // For 'small' parse states, this exploits the structure of the
// table to only visit the valid symbols. // table to only visit the valid symbols.
static inline t_lookahead_iterator ts_language_lookaheads( static inline LookaheadIterator ts_language_lookaheads(
const t_language *self, t_state_id state) const TSLanguage *self,
{ TSStateId state
) {
bool is_small_state = state >= self->large_state_count; bool is_small_state = state >= self->large_state_count;
const t_u16 *data; const uint16_t *data;
const t_u16 *group_end = NULL; const uint16_t *group_end = NULL;
t_u16 group_count = 0; uint16_t group_count = 0;
if (is_small_state) if (is_small_state) {
{ uint32_t index = self->small_parse_table_map[state - self->large_state_count];
t_u32 index =
self->small_parse_table_map[state - self->large_state_count];
data = &self->small_parse_table[index]; data = &self->small_parse_table[index];
group_end = data + 1; group_end = data + 1;
group_count = *data; group_count = *data;
} } else {
else
{
data = &self->parse_table[state * self->symbol_count] - 1; data = &self->parse_table[state * self->symbol_count] - 1;
} }
return (t_lookahead_iterator){ return (LookaheadIterator) {
.language = self, .language = self,
.data = data, .data = data,
.group_end = group_end, .group_end = group_end,
@ -147,26 +139,20 @@ static inline t_lookahead_iterator ts_language_lookaheads(
}; };
} }
static inline bool ts_lookahead_iterator__next(t_lookahead_iterator *self) static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
{
// For small parse states, valid symbols are listed explicitly, // For small parse states, valid symbols are listed explicitly,
// grouped by their value. There's no need to look up the actions // grouped by their value. There's no need to look up the actions
// again until moving to the next group. // again until moving to the next group.
if (self->is_small_state) if (self->is_small_state) {
{
self->data++; self->data++;
if (self->data == self->group_end) if (self->data == self->group_end) {
{ if (self->group_count == 0) return false;
if (self->group_count == 0)
return false;
self->group_count--; self->group_count--;
self->table_value = *(self->data++); self->table_value = *(self->data++);
unsigned symbol_count = *(self->data++); unsigned symbol_count = *(self->data++);
self->group_end = self->data + symbol_count; self->group_end = self->data + symbol_count;
self->symbol = *self->data; self->symbol = *self->data;
} } else {
else
{
self->symbol = *self->data; self->symbol = *self->data;
return true; return true;
} }
@ -174,120 +160,104 @@ static inline bool ts_lookahead_iterator__next(t_lookahead_iterator *self)
// For large parse states, iterate through every symbol until one // For large parse states, iterate through every symbol until one
// is found that has valid actions. // is found that has valid actions.
else else {
{ do {
do
{
self->data++; self->data++;
self->symbol++; self->symbol++;
if (self->symbol >= self->language->symbol_count) if (self->symbol >= self->language->symbol_count) return false;
return false;
self->table_value = *self->data; self->table_value = *self->data;
} while (!self->table_value); } while (!self->table_value);
} }
// Depending on if the symbols is terminal or non-terminal, the table // Depending on if the symbols is terminal or non-terminal, the table value either
// value either represents a list of actions or a successor state. // represents a list of actions or a successor state.
if (self->symbol < self->language->token_count) if (self->symbol < self->language->token_count) {
{ const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
const t_parse_action_entry *entry =
&self->language->parse_actions[self->table_value];
self->action_count = entry->entry.count; self->action_count = entry->entry.count;
self->actions = (const t_parse_actions *)(entry + 1); self->actions = (const TSParseAction *)(entry + 1);
self->next_state = 0; self->next_state = 0;
} } else {
else
{
self->action_count = 0; self->action_count = 0;
self->next_state = self->table_value; self->next_state = self->table_value;
} }
return true; return true;
} }
// Whether the state is a "primary state". If this returns false, it // Whether the state is a "primary state". If this returns false, it indicates that there exists
// indicates that there exists another state that behaves identically to // another state that behaves identically to this one with respect to query analysis.
// this one with respect to query analysis. static inline bool ts_language_state_is_primary(
static inline bool ts_language_state_is_primary(const t_language *self, const TSLanguage *self,
t_state_id state) TSStateId state
{ ) {
if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) if (self->version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
{
return state == self->primary_state_ids[state]; return state == self->primary_state_ids[state];
} } else {
else
{
return true; return true;
} }
} }
static inline const bool *ts_language_enabled_external_tokens( static inline const bool *ts_language_enabled_external_tokens(
const t_language *self, unsigned external_scanner_state) const TSLanguage *self,
{ unsigned external_scanner_state
if (external_scanner_state == 0) ) {
{ if (external_scanner_state == 0) {
return NULL; return NULL;
} } else {
else return self->external_scanner.states + self->external_token_count * external_scanner_state;
{
return self->external_scanner.states +
self->external_token_count * external_scanner_state;
} }
} }
static inline const t_symbol *ts_language_alias_sequence(const t_language *self, static inline const TSSymbol *ts_language_alias_sequence(
t_u32 production_id) const TSLanguage *self,
{ uint32_t production_id
return production_id ) {
? &self->alias_sequences[production_id * return production_id ?
self->max_alias_sequence_length] &self->alias_sequences[production_id * self->max_alias_sequence_length] :
: NULL; NULL;
} }
static inline t_symbol ts_language_alias_at(const t_language *self, static inline TSSymbol ts_language_alias_at(
t_u32 production_id, const TSLanguage *self,
t_u32 child_index) uint32_t production_id,
{ uint32_t child_index
return production_id ) {
? self->alias_sequences[production_id * return production_id ?
self->max_alias_sequence_length + self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
child_index] 0;
: 0;
} }
static inline void ts_language_field_map(const t_language *self, static inline void ts_language_field_map(
t_u32 production_id, const TSLanguage *self,
const t_field_map_entry **start, uint32_t production_id,
const t_field_map_entry **end) const TSFieldMapEntry **start,
{ const TSFieldMapEntry **end
if (self->field_count == 0) ) {
{ if (self->field_count == 0) {
*start = NULL; *start = NULL;
*end = NULL; *end = NULL;
return; return;
} }
t_field_map_slice slice = self->field_map_slices[production_id]; TSFieldMapSlice slice = self->field_map_slices[production_id];
*start = &self->field_map_entries[slice.index]; *start = &self->field_map_entries[slice.index];
*end = &self->field_map_entries[slice.index] + slice.length; *end = &self->field_map_entries[slice.index] + slice.length;
} }
static inline void ts_language_aliases_for_symbol(const t_language *self, static inline void ts_language_aliases_for_symbol(
t_symbol original_symbol, const TSLanguage *self,
const t_symbol **start, TSSymbol original_symbol,
const t_symbol **end) const TSSymbol **start,
{ const TSSymbol **end
) {
*start = &self->public_symbol_map[original_symbol]; *start = &self->public_symbol_map[original_symbol];
*end = *start + 1; *end = *start + 1;
unsigned idx = 0; unsigned idx = 0;
for (;;) for (;;) {
{ TSSymbol symbol = self->alias_map[idx++];
t_symbol symbol = self->alias_map[idx++]; if (symbol == 0 || symbol > original_symbol) break;
if (symbol == 0 || symbol > original_symbol) uint16_t count = self->alias_map[idx++];
break; if (symbol == original_symbol) {
t_u16 count = self->alias_map[idx++];
if (symbol == original_symbol)
{
*start = &self->alias_map[idx]; *start = &self->alias_map[idx];
*end = &self->alias_map[idx + count]; *end = &self->alias_map[idx + count];
break; break;
@ -297,13 +267,13 @@ static inline void ts_language_aliases_for_symbol(const t_language *self,
} }
static inline void ts_language_write_symbol_as_dot_string( static inline void ts_language_write_symbol_as_dot_string(
const t_language *self, FILE *f, t_symbol symbol) const TSLanguage *self,
{ FILE *f,
TSSymbol symbol
) {
const char *name = ts_language_symbol_name(self, symbol); const char *name = ts_language_symbol_name(self, symbol);
for (const char *chr = name; *chr; chr++) for (const char *chr = name; *chr; chr++) {
{ switch (*chr) {
switch (*chr)
{
case '"': case '"':
case '\\': case '\\':
fputc('\\', f); fputc('\\', f);
@ -322,4 +292,8 @@ static inline void ts_language_write_symbol_as_dot_string(
} }
} }
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LANGUAGE_H_ #endif // TREE_SITTER_LANGUAGE_H_

52
parser/src/length.h Normal file
View file

@ -0,0 +1,52 @@
#ifndef TREE_SITTER_LENGTH_H_
#define TREE_SITTER_LENGTH_H_
#include <stdlib.h>
#include <stdbool.h>
#include "./point.h"
#include "./api.h"
typedef struct {
uint32_t bytes;
TSPoint extent;
} Length;
static const Length LENGTH_UNDEFINED = {0, {0, 1}};
static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
static inline bool length_is_undefined(Length length) {
return length.bytes == 0 && length.extent.column != 0;
}
static inline Length length_min(Length len1, Length len2) {
return (len1.bytes < len2.bytes) ? len1 : len2;
}
static inline Length length_add(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes + len2.bytes;
result.extent = point_add(len1.extent, len2.extent);
return result;
}
static inline Length length_sub(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes - len2.bytes;
result.extent = point_sub(len1.extent, len2.extent);
return result;
}
static inline Length length_zero(void) {
Length result = {0, {0, 0}};
return result;
}
static inline Length length_saturating_sub(Length len1, Length len2) {
if (len1.bytes > len2.bytes) {
return length_sub(len1, len2);
} else {
return length_zero();
}
}
#endif

View file

@ -1,49 +1,52 @@
#include "parser/lexer.h"
#include "parser/parser_length.h"
#include "./subtree.h"
#include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include "./lexer.h"
#include "./subtree.h"
#include "./length.h"
//#include "./unicode.h"
#define LOG(message, character) \ #define LOG(message, character) \
if (self->logger.log) \ if (self->logger.log) { \
{ \ snprintf( \
snprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ self->debug_buffer, \
32 <= character && character < 127 ? message \ TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
" character:'%c'" \ 32 <= character && character < 127 ? \
: message " character:%d", \ message " character:'%c'" : \
character); \ message " character:%d", \
self->logger.log(self->logger.payload, LogTypeLex, \ character \
self->debug_buffer); \ ); \
self->logger.log( \
self->logger.payload, \
TSLogTypeLex, \
self->debug_buffer \
); \
} }
static const t_i32 BYTE_ORDER_MARK = 0xFEFF; static const int32_t BYTE_ORDER_MARK = 0xFEFF;
static const t_parser_range DEFAULT_RANGE = {.start_point = static const TSRange DEFAULT_RANGE = {
{ .start_point = {
.row = 0, .row = 0,
.column = 0, .column = 0,
}, },
.end_point = .end_point = {
{
.row = UINT32_MAX, .row = UINT32_MAX,
.column = UINT32_MAX, .column = UINT32_MAX,
}, },
.start_byte = 0, .start_byte = 0,
.end_byte = UINT32_MAX}; .end_byte = UINT32_MAX
};
// Check if the lexer has reached EOF. This state is stored // Check if the lexer has reached EOF. This state is stored
// by setting the lexer's `current_included_range_index` such that // by setting the lexer's `current_included_range_index` such that
// it has consumed all of its available ranges. // it has consumed all of its available ranges.
static bool ts_lexer__eof(const t_lexer *_self) static bool ts_lexer__eof(const TSLexer *_self) {
{ Lexer *self = (Lexer *)_self;
t_liblexer *self = (t_liblexer *)_self;
return self->current_included_range_index == self->included_range_count; return self->current_included_range_index == self->included_range_count;
} }
// Clear the currently stored chunk of source code, because the lexer's // Clear the currently stored chunk of source code, because the lexer's
// position has changed. // position has changed.
static void ts_lexer__clear_chunk(t_liblexer *self) static void ts_lexer__clear_chunk(Lexer *self) {
{
self->chunk = NULL; self->chunk = NULL;
self->chunk_size = 0; self->chunk_size = 0;
self->chunk_start = 0; self->chunk_start = 0;
@ -51,83 +54,86 @@ static void ts_lexer__clear_chunk(t_liblexer *self)
// Call the lexer's input callback to obtain a new chunk of source code // Call the lexer's input callback to obtain a new chunk of source code
// for the current position. // for the current position.
static void ts_lexer__get_chunk(t_liblexer *self) static void ts_lexer__get_chunk(Lexer *self) {
{
self->chunk_start = self->current_position.bytes; self->chunk_start = self->current_position.bytes;
self->chunk = self->chunk = self->input.read(
self->input.read(self->input.payload, self->current_position.bytes, self->input.payload,
self->current_position.extent, &self->chunk_size); self->current_position.bytes,
if (!self->chunk_size) self->current_position.extent,
{ &self->chunk_size
);
if (!self->chunk_size) {
self->current_included_range_index = self->included_range_count; self->current_included_range_index = self->included_range_count;
self->chunk = NULL; self->chunk = NULL;
} }
} }
typedef uint32_t (*DecodeFunc)(
const uint8_t *string,
uint32_t length,
int32_t *code_point
);
typedef t_i32 (*UnicodeDecodeFunction)(const t_i8 *chunk, t_i32 size, static uint32_t ts_decode_ascii(
t_i32 *lookahead); const uint8_t *string,
uint32_t length,
t_i32 my_decode(const t_i8 *chunk, t_i32 size, t_i32 *lookahead) int32_t *code_point
{ ) {
(void)(size); uint32_t i = 1;
*((t_i32 *)lookahead) = *chunk; (void)(length);
return (1); *code_point = *string;
return i;
} }
#define TS_DECODE_ERROR -1
// Decode the next unicode character in the current chunk of source code. // Decode the next unicode character in the current chunk of source code.
// This assumes that the lexer has already retrieved a chunk of source // This assumes that the lexer has already retrieved a chunk of source
// code that spans the current position. // code that spans the current position.
static void ts_lexer__get_lookahead(t_liblexer *self) static void ts_lexer__get_lookahead(Lexer *self) {
{ uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
t_i32 position_in_chunk = uint32_t size = self->chunk_size - position_in_chunk;
self->current_position.bytes - self->chunk_start;
t_i32 size = self->chunk_size - position_in_chunk;
if (size == 0) if (size == 0) {
{
self->lookahead_size = 1; self->lookahead_size = 1;
self->data.lookahead = '\0'; self->data.lookahead = '\0';
return; return;
} }
const t_i8 *chunk = (const t_i8 *)self->chunk + position_in_chunk; #define TS_DECODE_ERROR -1
UnicodeDecodeFunction decode = my_decode;
self->lookahead_size = decode(chunk, size, &self->data.lookahead); const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
// UnicodeDecodeFunction decode = self->input.encoding == TSInputEncodingUTF8
// ? ts_decode_utf8
// : ts_decode_utf16;
self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead);
// If this chunk ended in the middle of a multi-byte character, // If this chunk ended in the middle of a multi-byte character,
// try again with a fresh chunk. // try again with a fresh chunk.
if (self->data.lookahead == TS_DECODE_ERROR && size < 4) if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
{
ts_lexer__get_chunk(self); ts_lexer__get_chunk(self);
chunk = (const t_i8 *)self->chunk; chunk = (const uint8_t *)self->chunk;
size = self->chunk_size; size = self->chunk_size;
self->lookahead_size = decode(chunk, size, &self->data.lookahead); self->lookahead_size = ts_decode_ascii(chunk, size, &self->data.lookahead);
} }
if (self->data.lookahead == TS_DECODE_ERROR) if (self->data.lookahead == TS_DECODE_ERROR) {
{
self->lookahead_size = 1; self->lookahead_size = 1;
} }
} }
static void ts_lexer_goto(t_liblexer *self, t_parse_length position) static void ts_lexer_goto(Lexer *self, Length position) {
{
self->current_position = position; self->current_position = position;
// Move to the first valid position at or after the given position. // Move to the first valid position at or after the given position.
bool found_included_range = false; bool found_included_range = false;
for (unsigned i = 0; i < self->included_range_count; i++) for (unsigned i = 0; i < self->included_range_count; i++) {
{ TSRange *included_range = &self->included_ranges[i];
t_parser_range *included_range = &self->included_ranges[i]; if (
if (included_range->end_byte > self->current_position.bytes && included_range->end_byte > self->current_position.bytes &&
included_range->end_byte > included_range->start_byte) included_range->end_byte > included_range->start_byte
{ ) {
if (included_range->start_byte >= self->current_position.bytes) if (included_range->start_byte >= self->current_position.bytes) {
{ self->current_position = (Length) {
self->current_position = (t_parse_length){
.bytes = included_range->start_byte, .bytes = included_range->start_byte,
.extent = included_range->start_point, .extent = included_range->start_point,
}; };
@ -139,14 +145,13 @@ static void ts_lexer_goto(t_liblexer *self, t_parse_length position)
} }
} }
if (found_included_range) if (found_included_range) {
{
// If the current position is outside of the current chunk of text, // If the current position is outside of the current chunk of text,
// then clear out the current chunk of text. // then clear out the current chunk of text.
if (self->chunk && (self->current_position.bytes < self->chunk_start || if (self->chunk && (
self->current_position.bytes >= self->current_position.bytes < self->chunk_start ||
self->chunk_start + self->chunk_size)) self->current_position.bytes >= self->chunk_start + self->chunk_size
{ )) {
ts_lexer__clear_chunk(self); ts_lexer__clear_chunk(self);
} }
@ -156,12 +161,10 @@ static void ts_lexer_goto(t_liblexer *self, t_parse_length position)
// If the given position is beyond any of included ranges, move to the EOF // If the given position is beyond any of included ranges, move to the EOF
// state - past the end of the included ranges. // state - past the end of the included ranges.
else else {
{
self->current_included_range_index = self->included_range_count; self->current_included_range_index = self->included_range_count;
t_parser_range *last_included_range = TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
&self->included_ranges[self->included_range_count - 1]; self->current_position = (Length) {
self->current_position = (t_parse_length){
.bytes = last_included_range->end_byte, .bytes = last_included_range->end_byte,
.extent = last_included_range->end_point, .extent = last_included_range->end_point,
}; };
@ -172,61 +175,48 @@ static void ts_lexer_goto(t_liblexer *self, t_parse_length position)
} }
// Intended to be called only from functions that control logging. // Intended to be called only from functions that control logging.
static void ts_lexer__do_advance(t_liblexer *self, bool skip) static void ts_lexer__do_advance(Lexer *self, bool skip) {
{ if (self->lookahead_size) {
if (self->lookahead_size)
{
self->current_position.bytes += self->lookahead_size; self->current_position.bytes += self->lookahead_size;
if (self->data.lookahead == '\n') if (self->data.lookahead == '\n') {
{
self->current_position.extent.row++; self->current_position.extent.row++;
self->current_position.extent.column = 0; self->current_position.extent.column = 0;
} } else {
else
{
self->current_position.extent.column += self->lookahead_size; self->current_position.extent.column += self->lookahead_size;
} }
} }
const t_parser_range *current_range = const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
&self->included_ranges[self->current_included_range_index]; while (
while (self->current_position.bytes >= current_range->end_byte || self->current_position.bytes >= current_range->end_byte ||
current_range->end_byte == current_range->start_byte) current_range->end_byte == current_range->start_byte
{ ) {
if (self->current_included_range_index < self->included_range_count) if (self->current_included_range_index < self->included_range_count) {
{
self->current_included_range_index++; self->current_included_range_index++;
} }
if (self->current_included_range_index < self->included_range_count) if (self->current_included_range_index < self->included_range_count) {
{
current_range++; current_range++;
self->current_position = (t_parse_length){ self->current_position = (Length) {
current_range->start_byte, current_range->start_byte,
current_range->start_point, current_range->start_point,
}; };
} } else {
else
{
current_range = NULL; current_range = NULL;
break; break;
} }
} }
if (skip) if (skip) self->token_start_position = self->current_position;
self->token_start_position = self->current_position;
if (current_range) if (current_range) {
{ if (
if (self->current_position.bytes < self->chunk_start || self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->current_position.bytes >= self->chunk_start + self->chunk_size
self->chunk_start + self->chunk_size) ) {
{
ts_lexer__get_chunk(self); ts_lexer__get_chunk(self);
} }
ts_lexer__get_lookahead(self); ts_lexer__get_lookahead(self);
} } else {
else
{
ts_lexer__clear_chunk(self); ts_lexer__clear_chunk(self);
self->data.lookahead = '\0'; self->data.lookahead = '\0';
self->lookahead_size = 1; self->lookahead_size = 1;
@ -235,32 +225,36 @@ static void ts_lexer__do_advance(t_liblexer *self, bool skip)
// Advance to the next character in the source code, retrieving a new // Advance to the next character in the source code, retrieving a new
// chunk of source code if needed. // chunk of source code if needed.
static void ts_lexer__advance(t_lexer *_self, bool skip) static void ts_lexer__advance(TSLexer *_self, bool skip) {
{ Lexer *self = (Lexer *)_self;
t_liblexer *self = (t_liblexer *)_self; if (!self->chunk) return;
if (!self->chunk)
return; if (skip) {
LOG("skip", self->data.lookahead)
} else {
LOG("consume", self->data.lookahead)
}
ts_lexer__do_advance(self, skip); ts_lexer__do_advance(self, skip);
} }
// Mark that a token match has completed. This can be called multiple // Mark that a token match has completed. This can be called multiple
// times if a longer match is found later. // times if a longer match is found later.
static void ts_lexer__mark_end(t_lexer *_self) static void ts_lexer__mark_end(TSLexer *_self) {
{ Lexer *self = (Lexer *)_self;
t_liblexer *self = (t_liblexer *)_self; if (!ts_lexer__eof(&self->data)) {
if (!ts_lexer__eof(&self->data))
{
// If the lexer is right at the beginning of included range, // If the lexer is right at the beginning of included range,
// then the token should be considered to end at the *end* of the // then the token should be considered to end at the *end* of the
// previous included range, rather than here. // previous included range, rather than here.
t_parser_range *current_included_range = TSRange *current_included_range = &self->included_ranges[
&self->included_ranges[self->current_included_range_index]; self->current_included_range_index
if (self->current_included_range_index > 0 && ];
self->current_position.bytes == current_included_range->start_byte) if (
{ self->current_included_range_index > 0 &&
t_parser_range *previous_included_range = self->current_position.bytes == current_included_range->start_byte
current_included_range - 1; ) {
self->token_end_position = (t_parse_length){ TSRange *previous_included_range = current_included_range - 1;
self->token_end_position = (Length) {
previous_included_range->end_byte, previous_included_range->end_byte,
previous_included_range->end_point, previous_included_range->end_point,
}; };
@ -270,31 +264,26 @@ static void ts_lexer__mark_end(t_lexer *_self)
self->token_end_position = self->current_position; self->token_end_position = self->current_position;
} }
static t_i32 ts_lexer__get_column(t_lexer *_self) static uint32_t ts_lexer__get_column(TSLexer *_self) {
{ Lexer *self = (Lexer *)_self;
t_liblexer *self = (t_liblexer *)_self;
t_u32 goal_byte = self->current_position.bytes; uint32_t goal_byte = self->current_position.bytes;
self->did_get_column = true; self->did_get_column = true;
self->current_position.bytes -= self->current_position.extent.column; self->current_position.bytes -= self->current_position.extent.column;
self->current_position.extent.column = 0; self->current_position.extent.column = 0;
if (self->current_position.bytes < self->chunk_start) if (self->current_position.bytes < self->chunk_start) {
{
ts_lexer__get_chunk(self); ts_lexer__get_chunk(self);
} }
t_i32 result = 0; uint32_t result = 0;
if (!ts_lexer__eof(_self)) if (!ts_lexer__eof(_self)) {
{
ts_lexer__get_lookahead(self); ts_lexer__get_lookahead(self);
while (self->current_position.bytes < goal_byte && self->chunk) while (self->current_position.bytes < goal_byte && self->chunk) {
{
result++; result++;
ts_lexer__do_advance(self, false); ts_lexer__do_advance(self, false);
if (ts_lexer__eof(_self)) if (ts_lexer__eof(_self)) break;
break;
} }
} }
@ -304,36 +293,26 @@ static t_i32 ts_lexer__get_column(t_lexer *_self)
// Is the lexer at a boundary between two disjoint included ranges of // Is the lexer at a boundary between two disjoint included ranges of
// source code? This is exposed as an API because some languages' external // source code? This is exposed as an API because some languages' external
// scanners need to perform custom actions at these boundaries. // scanners need to perform custom actions at these boundaries.
static bool ts_lexer__is_at_included_range_start(const t_lexer *_self) static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
{ const Lexer *self = (const Lexer *)_self;
const t_liblexer *self = (const t_liblexer *)_self; if (self->current_included_range_index < self->included_range_count) {
if (self->current_included_range_index < self->included_range_count) TSRange *current_range = &self->included_ranges[self->current_included_range_index];
{
t_parser_range *current_range =
&self->included_ranges[self->current_included_range_index];
return self->current_position.bytes == current_range->start_byte; return self->current_position.bytes == current_range->start_byte;
} } else {
else
{
return false; return false;
} }
} }
void ts_lexer_init(t_liblexer *self) void ts_lexer_init(Lexer *self) {
{ *self = (Lexer) {
*self = (t_liblexer){ .data = {
.data = // The lexer's methods are stored as struct fields so that generated
{ // parsers can call them without needing to be linked against this
// The lexer's methods are stored as struct fields so that
// generated
// parsers can call them without needing to be linked against
// this
// library. // library.
.advance = ts_lexer__advance, .advance = ts_lexer__advance,
.mark_end = ts_lexer__mark_end, .mark_end = ts_lexer__mark_end,
.get_column = ts_lexer__get_column, .get_column = ts_lexer__get_column,
.is_at_included_range_start = .is_at_included_range_start = ts_lexer__is_at_included_range_start,
ts_lexer__is_at_included_range_start,
.eof = ts_lexer__eof, .eof = ts_lexer__eof,
.lookahead = 0, .lookahead = 0,
.result_symbol = 0, .result_symbol = 0,
@ -342,7 +321,10 @@ void ts_lexer_init(t_liblexer *self)
.chunk_size = 0, .chunk_size = 0,
.chunk_start = 0, .chunk_start = 0,
.current_position = {0, {0, 0}}, .current_position = {0, {0, 0}},
.logger = {.payload = NULL, .log = NULL}, .logger = {
.payload = NULL,
.log = NULL
},
.included_ranges = NULL, .included_ranges = NULL,
.included_range_count = 0, .included_range_count = 0,
.current_included_range_index = 0, .current_included_range_index = 0,
@ -350,13 +332,11 @@ void ts_lexer_init(t_liblexer *self)
ts_lexer_set_included_ranges(self, NULL, 0); ts_lexer_set_included_ranges(self, NULL, 0);
} }
void ts_lexer_delete(t_liblexer *self) void ts_lexer_delete(Lexer *self) {
{ ts_free(self->included_ranges);
free(self->included_ranges);
} }
void ts_lexer_set_input(t_liblexer *self, t_parse_input input) void ts_lexer_set_input(Lexer *self, TSInput input) {
{
self->input = input; self->input = input;
ts_lexer__clear_chunk(self); ts_lexer__clear_chunk(self);
ts_lexer_goto(self, self->current_position); ts_lexer_goto(self, self->current_position);
@ -364,93 +344,93 @@ void ts_lexer_set_input(t_liblexer *self, t_parse_input input)
// Move the lexer to the given position. This doesn't do any work // Move the lexer to the given position. This doesn't do any work
// if the parser is already at the given position. // if the parser is already at the given position.
void ts_lexer_reset(t_liblexer *self, t_parse_length position) void ts_lexer_reset(Lexer *self, Length position) {
{ if (position.bytes != self->current_position.bytes) {
if (position.bytes != self->current_position.bytes)
{
ts_lexer_goto(self, position); ts_lexer_goto(self, position);
} }
} }
void ts_lexer_start(t_liblexer *self) void ts_lexer_start(Lexer *self) {
{
self->token_start_position = self->current_position; self->token_start_position = self->current_position;
self->token_end_position = LENGTH_UNDEFINED; self->token_end_position = LENGTH_UNDEFINED;
self->data.result_symbol = 0; self->data.result_symbol = 0;
self->did_get_column = false; self->did_get_column = false;
if (!ts_lexer__eof(&self->data)) if (!ts_lexer__eof(&self->data)) {
{ if (!self->chunk_size) ts_lexer__get_chunk(self);
if (!self->chunk_size) if (!self->lookahead_size) ts_lexer__get_lookahead(self);
ts_lexer__get_chunk(self); if (
if (!self->lookahead_size) self->current_position.bytes == 0 &&
ts_lexer__get_lookahead(self); self->data.lookahead == BYTE_ORDER_MARK
if (self->current_position.bytes == 0 && ) ts_lexer__advance(&self->data, true);
self->data.lookahead == BYTE_ORDER_MARK)
ts_lexer__advance(&self->data, true);
} }
} }
void ts_lexer_finish(t_liblexer *self, t_i32 *lookahead_end_byte) void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
{ if (length_is_undefined(self->token_end_position)) {
if (length_is_undefined(self->token_end_position))
{
ts_lexer__mark_end(&self->data); ts_lexer__mark_end(&self->data);
} }
// If the token ended at an included range boundary, then its end position // If the token ended at an included range boundary, then its end position
// will have been reset to the end of the preceding range. Reset the start // will have been reset to the end of the preceding range. Reset the start
// position to match. // position to match.
if (self->token_end_position.bytes < self->token_start_position.bytes) if (self->token_end_position.bytes < self->token_start_position.bytes) {
{
self->token_start_position = self->token_end_position; self->token_start_position = self->token_end_position;
} }
t_i32 current_lookahead_end_byte = self->current_position.bytes + 1; uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
// In order to determine that a byte sequence is invalid UTF8 or UTF16, // In order to determine that a byte sequence is invalid UTF8 or UTF16,
// the character decoding algorithm may have looked at the following byte. // the character decoding algorithm may have looked at the following byte.
// Therefore, the next byte *after* the current (invalid) character // Therefore, the next byte *after* the current (invalid) character
// affects the interpretation of the current character. // affects the interpretation of the current character.
if (self->data.lookahead == TS_DECODE_ERROR) if (self->data.lookahead == TS_DECODE_ERROR) {
{
current_lookahead_end_byte++; current_lookahead_end_byte++;
} }
if (current_lookahead_end_byte > *lookahead_end_byte) if (current_lookahead_end_byte > *lookahead_end_byte) {
{
*lookahead_end_byte = current_lookahead_end_byte; *lookahead_end_byte = current_lookahead_end_byte;
} }
} }
void ts_lexer_advance_to_end(t_liblexer *self) void ts_lexer_advance_to_end(Lexer *self) {
{ while (self->chunk) {
while (self->chunk)
{
ts_lexer__advance(&self->data, false); ts_lexer__advance(&self->data, false);
} }
} }
void ts_lexer_mark_end(t_liblexer *self) void ts_lexer_mark_end(Lexer *self) {
{
ts_lexer__mark_end(&self->data); ts_lexer__mark_end(&self->data);
} }
bool ts_lexer_set_included_ranges(t_liblexer *self, bool ts_lexer_set_included_ranges(
const t_parser_range *ranges, t_u32 count) Lexer *self,
{ const TSRange *ranges,
uint32_t count
) {
if (count == 0 || !ranges) {
ranges = &DEFAULT_RANGE; ranges = &DEFAULT_RANGE;
count = 1; count = 1;
size_t size = count * sizeof(t_parser_range); } else {
self->included_ranges = realloc(self->included_ranges, size); uint32_t previous_byte = 0;
for (unsigned i = 0; i < count; i++) {
const TSRange *range = &ranges[i];
if (
range->start_byte < previous_byte ||
range->end_byte < range->start_byte
) return false;
previous_byte = range->end_byte;
}
}
size_t size = count * sizeof(TSRange);
self->included_ranges = ts_realloc(self->included_ranges, size);
memcpy(self->included_ranges, ranges, size); memcpy(self->included_ranges, ranges, size);
self->included_range_count = count; self->included_range_count = count;
ts_lexer_goto(self, self->current_position); ts_lexer_goto(self, self->current_position);
return true; return true;
} }
t_parser_range *ts_lexer_included_ranges(const t_liblexer *self, TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
t_u32 *count)
{
*count = self->included_range_count; *count = self->included_range_count;
return self->included_ranges; return self->included_ranges;
} }

49
parser/src/lexer.h Normal file
View file

@ -0,0 +1,49 @@
#ifndef TREE_SITTER_LEXER_H_
#define TREE_SITTER_LEXER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./length.h"
#include "./subtree.h"
#include "./api.h"
#include "./parser.h"
typedef struct {
TSLexer data;
Length current_position;
Length token_start_position;
Length token_end_position;
TSRange *included_ranges;
const char *chunk;
TSInput input;
TSLogger logger;
uint32_t included_range_count;
uint32_t current_included_range_index;
uint32_t chunk_start;
uint32_t chunk_size;
uint32_t lookahead_size;
bool did_get_column;
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
} Lexer;
void ts_lexer_init(Lexer *);
void ts_lexer_delete(Lexer *);
void ts_lexer_set_input(Lexer *, TSInput);
void ts_lexer_reset(Lexer *, Length);
void ts_lexer_start(Lexer *);
void ts_lexer_finish(Lexer *, uint32_t *);
void ts_lexer_advance_to_end(Lexer *);
void ts_lexer_mark_end(Lexer *);
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LEXER_H_

13
parser/src/lib.c Normal file
View file

@ -0,0 +1,13 @@
#define _POSIX_C_SOURCE 200112L
#include "./alloc.c"
#include "./get_changed_ranges.c"
#include "./language.c"
#include "./lexer.c"
#include "./node.c"
#include "./parser.c"
#include "./query.c"
#include "./stack.c"
#include "./subtree.c"
#include "./tree_cursor.c"
#include "./tree.c"

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

265
parser/src/parser.h Normal file
View file

@ -0,0 +1,265 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

62
parser/src/point.h Normal file
View file

@ -0,0 +1,62 @@
#ifndef TREE_SITTER_POINT_H_
#define TREE_SITTER_POINT_H_
#include "./api.h"
#define POINT_ZERO ((TSPoint) {0, 0})
#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
static inline TSPoint point__new(unsigned row, unsigned column) {
TSPoint result = {row, column};
return result;
}
static inline TSPoint point_add(TSPoint a, TSPoint b) {
if (b.row > 0)
return point__new(a.row + b.row, b.column);
else
return point__new(a.row, a.column + b.column);
}
static inline TSPoint point_sub(TSPoint a, TSPoint b) {
if (a.row > b.row)
return point__new(a.row - b.row, a.column);
else
return point__new(0, a.column - b.column);
}
static inline bool point_lte(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
}
static inline bool point_lt(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column < b.column);
}
static inline bool point_gt(TSPoint a, TSPoint b) {
return (a.row > b.row) || (a.row == b.row && a.column > b.column);
}
static inline bool point_gte(TSPoint a, TSPoint b) {
return (a.row > b.row) || (a.row == b.row && a.column >= b.column);
}
static inline bool point_eq(TSPoint a, TSPoint b) {
return a.row == b.row && a.column == b.column;
}
static inline TSPoint point_min(TSPoint a, TSPoint b) {
if (a.row < b.row || (a.row == b.row && a.column < b.column))
return a;
else
return b;
}
static inline TSPoint point_max(TSPoint a, TSPoint b) {
if (a.row > b.row || (a.row == b.row && a.column > b.column))
return a;
else
return b;
}
#endif

4134
parser/src/query.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,34 @@
#ifndef TREE_SITTER_REDUCE_ACTION_H_
#define TREE_SITTER_REDUCE_ACTION_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./array.h"
#include "./api.h"
typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short production_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;
static inline void ts_reduce_action_set_add(ReduceActionSet *self,
ReduceAction new_action) {
for (uint32_t i = 0; i < self->size; i++) {
ReduceAction action = self->contents[i];
if (action.symbol == new_action.symbol && action.count == new_action.count)
return;
}
array_push(self, new_action);
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_REDUCE_ACTION_H_

View file

@ -2,8 +2,8 @@
typedef struct { typedef struct {
Subtree tree; Subtree tree;
t_u32 child_index; uint32_t child_index;
t_u32 byte_offset; uint32_t byte_offset;
} StackEntry; } StackEntry;
typedef struct { typedef struct {
@ -26,7 +26,7 @@ static inline Subtree reusable_node_tree(ReusableNode *self) {
: NULL_SUBTREE; : NULL_SUBTREE;
} }
static inline t_u32 reusable_node_byte_offset(ReusableNode *self) { static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
return self->stack.size > 0 return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].byte_offset ? self->stack.contents[self->stack.size - 1].byte_offset
: UINT32_MAX; : UINT32_MAX;
@ -38,13 +38,13 @@ static inline void reusable_node_delete(ReusableNode *self) {
static inline void reusable_node_advance(ReusableNode *self) { static inline void reusable_node_advance(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack); StackEntry last_entry = *array_back(&self->stack);
t_u32 byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
if (ts_subtree_has_external_tokens(last_entry.tree)) { if (ts_subtree_has_external_tokens(last_entry.tree)) {
self->last_external_token = ts_subtree_last_external_token(last_entry.tree); self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
} }
Subtree tree; Subtree tree;
t_u32 next_index; uint32_t next_index;
do { do {
StackEntry popped_entry = array_pop(&self->stack); StackEntry popped_entry = array_pop(&self->stack);
next_index = popped_entry.child_index + 1; next_index = popped_entry.child_index + 1;

File diff suppressed because it is too large Load diff

View file

@ -1,9 +1,9 @@
#include "./alloc.h"
#include "./language.h" #include "./language.h"
#include "./subtree.h" #include "./subtree.h"
#include "./array.h" #include "./array.h"
#include "./stack.h" #include "./stack.h"
#include "parser/parser_length.h" #include "./length.h"
#include <assert.h> #include <assert.h>
#include <inttypes.h> #include <inttypes.h>
#include <stdio.h> #include <stdio.h>
@ -27,11 +27,11 @@ typedef struct {
} StackLink; } StackLink;
struct StackNode { struct StackNode {
t_state_id state; TSStateId state;
t_parse_length position; Length position;
StackLink links[MAX_LINK_COUNT]; StackLink links[MAX_LINK_COUNT];
short unsigned int link_count; short unsigned int link_count;
t_u32 ref_count; uint32_t ref_count;
unsigned error_cost; unsigned error_cost;
unsigned node_count; unsigned node_count;
int dynamic_precedence; int dynamic_precedence;
@ -40,7 +40,7 @@ struct StackNode {
typedef struct { typedef struct {
StackNode *node; StackNode *node;
SubtreeArray subtrees; SubtreeArray subtrees;
t_u32 subtree_count; uint32_t subtree_count;
bool is_pending; bool is_pending;
} StackIterator; } StackIterator;
@ -112,7 +112,7 @@ recur:
if (pool->size < MAX_NODE_POOL_SIZE) { if (pool->size < MAX_NODE_POOL_SIZE) {
array_push(pool, self); array_push(pool, self);
} else { } else {
free(self); ts_free(self);
} }
if (first_predecessor) { if (first_predecessor) {
@ -123,8 +123,8 @@ recur:
/// Get the number of nodes in the subtree, for the purpose of measuring /// Get the number of nodes in the subtree, for the purpose of measuring
/// how much progress has been made by a given version of the stack. /// how much progress has been made by a given version of the stack.
static t_u32 stack__subtree_node_count(Subtree subtree) { static uint32_t stack__subtree_node_count(Subtree subtree) {
t_u32 count = ts_subtree_visible_descendant_count(subtree); uint32_t count = ts_subtree_visible_descendant_count(subtree);
if (ts_subtree_visible(subtree)) count++; if (ts_subtree_visible(subtree)) count++;
// Count intermediate error nodes even though they are not visible, // Count intermediate error nodes even though they are not visible,
@ -139,12 +139,12 @@ static StackNode *stack_node_new(
StackNode *previous_node, StackNode *previous_node,
Subtree subtree, Subtree subtree,
bool is_pending, bool is_pending,
t_state_id state, TSStateId state,
StackNodeArray *pool StackNodeArray *pool
) { ) {
StackNode *node = pool->size > 0 StackNode *node = pool->size > 0
? array_pop(pool) ? array_pop(pool)
: malloc(sizeof(StackNode)); : ts_malloc(sizeof(StackNode));
*node = (StackNode) { *node = (StackNode) {
.ref_count = 1, .ref_count = 1,
.link_count = 0, .link_count = 0,
@ -234,7 +234,7 @@ static void stack_node_add_link(
for (int j = 0; j < link.node->link_count; j++) { for (int j = 0; j < link.node->link_count; j++) {
stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool); stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
} }
t_i32 dynamic_precedence = link.node->dynamic_precedence; int32_t dynamic_precedence = link.node->dynamic_precedence;
if (link.subtree.ptr) { if (link.subtree.ptr) {
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree); dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
} }
@ -277,7 +277,7 @@ static void stack_head_delete(
} }
if (self->summary) { if (self->summary) {
array_delete(self->summary); array_delete(self->summary);
free(self->summary); ts_free(self->summary);
} }
stack_node_release(self->node, pool, subtree_pool); stack_node_release(self->node, pool, subtree_pool);
} }
@ -307,7 +307,7 @@ static void ts_stack__add_slice(
StackNode *node, StackNode *node,
SubtreeArray *subtrees SubtreeArray *subtrees
) { ) {
for (t_u32 i = self->slices.size - 1; i + 1 > 0; i--) { for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
StackVersion version = self->slices.contents[i].version; StackVersion version = self->slices.contents[i].version;
if (self->heads.contents[version].node == node) { if (self->heads.contents[version].node == node) {
StackSlice slice = {*subtrees, version}; StackSlice slice = {*subtrees, version};
@ -342,13 +342,13 @@ static StackSliceArray stack__iter(
bool include_subtrees = false; bool include_subtrees = false;
if (goal_subtree_count >= 0) { if (goal_subtree_count >= 0) {
include_subtrees = true; include_subtrees = true;
array_reserve(&new_iterator.subtrees, (t_u32)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree)); array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
} }
array_push(&self->iterators, new_iterator); array_push(&self->iterators, new_iterator);
while (self->iterators.size > 0) { while (self->iterators.size > 0) {
for (t_u32 i = 0, size = self->iterators.size; i < size; i++) { for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
StackIterator *iterator = &self->iterators.contents[i]; StackIterator *iterator = &self->iterators.contents[i];
StackNode *node = iterator->node; StackNode *node = iterator->node;
@ -379,7 +379,7 @@ static StackSliceArray stack__iter(
continue; continue;
} }
for (t_u32 j = 1; j <= node->link_count; j++) { for (uint32_t j = 1; j <= node->link_count; j++) {
StackIterator *next_iterator; StackIterator *next_iterator;
StackLink link; StackLink link;
if (j == node->link_count) { if (j == node->link_count) {
@ -419,7 +419,7 @@ static StackSliceArray stack__iter(
} }
Stack *ts_stack_new(SubtreePool *subtree_pool) { Stack *ts_stack_new(SubtreePool *subtree_pool) {
Stack *self = calloc(1, sizeof(Stack)); Stack *self = ts_calloc(1, sizeof(Stack));
array_init(&self->heads); array_init(&self->heads);
array_init(&self->slices); array_init(&self->slices);
@ -443,28 +443,28 @@ void ts_stack_delete(Stack *self) {
if (self->iterators.contents) if (self->iterators.contents)
array_delete(&self->iterators); array_delete(&self->iterators);
stack_node_release(self->base_node, &self->node_pool, self->subtree_pool); stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
for (t_u32 i = 0; i < self->heads.size; i++) { for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
} }
array_clear(&self->heads); array_clear(&self->heads);
if (self->node_pool.contents) { if (self->node_pool.contents) {
for (t_u32 i = 0; i < self->node_pool.size; i++) for (uint32_t i = 0; i < self->node_pool.size; i++)
free(self->node_pool.contents[i]); ts_free(self->node_pool.contents[i]);
array_delete(&self->node_pool); array_delete(&self->node_pool);
} }
array_delete(&self->heads); array_delete(&self->heads);
free(self); ts_free(self);
} }
t_u32 ts_stack_version_count(const Stack *self) { uint32_t ts_stack_version_count(const Stack *self) {
return self->heads.size; return self->heads.size;
} }
t_state_id ts_stack_state(const Stack *self, StackVersion version) { TSStateId ts_stack_state(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->state; return array_get(&self->heads, version)->node->state;
} }
t_parse_length ts_stack_position(const Stack *self, StackVersion version) { Length ts_stack_position(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->position; return array_get(&self->heads, version)->node->position;
} }
@ -503,7 +503,7 @@ void ts_stack_push(
StackVersion version, StackVersion version,
Subtree subtree, Subtree subtree,
bool pending, bool pending,
t_state_id state TSStateId state
) { ) {
StackHead *head = array_get(&self->heads, version); StackHead *head = array_get(&self->heads, version);
StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool); StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
@ -520,7 +520,7 @@ forceinline StackAction pop_count_callback(void *payload, const StackIterator *i
} }
} }
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, t_u32 count) { StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
return stack__iter(self, version, pop_count_callback, &count, (int)count); return stack__iter(self, version, pop_count_callback, &count, (int)count);
} }
@ -593,7 +593,7 @@ typedef struct {
forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) { forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
SummarizeStackSession *session = payload; SummarizeStackSession *session = payload;
t_state_id state = iterator->node->state; TSStateId state = iterator->node->state;
unsigned depth = iterator->subtree_count; unsigned depth = iterator->subtree_count;
if (depth > session->max_depth) return StackActionStop; if (depth > session->max_depth) return StackActionStop;
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) { for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
@ -611,7 +611,7 @@ forceinline StackAction summarize_stack_callback(void *payload, const StackItera
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) { void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
SummarizeStackSession session = { SummarizeStackSession session = {
.summary = malloc(sizeof(StackSummary)), .summary = ts_malloc(sizeof(StackSummary)),
.max_depth = max_depth .max_depth = max_depth
}; };
array_init(session.summary); array_init(session.summary);
@ -619,7 +619,7 @@ void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_dep
StackHead *head = &self->heads.contents[version]; StackHead *head = &self->heads.contents[version];
if (head->summary) { if (head->summary) {
array_delete(head->summary); array_delete(head->summary);
free(head->summary); ts_free(head->summary);
} }
head->summary = session.summary; head->summary = session.summary;
} }
@ -664,7 +664,7 @@ void ts_stack_remove_version(Stack *self, StackVersion version) {
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) { void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
if (v1 == v2) return; if (v1 == v2) return;
assert(v2 < v1); assert(v2 < v1);
assert((t_u32)v1 < self->heads.size); assert((uint32_t)v1 < self->heads.size);
StackHead *source_head = &self->heads.contents[v1]; StackHead *source_head = &self->heads.contents[v1];
StackHead *target_head = &self->heads.contents[v2]; StackHead *target_head = &self->heads.contents[v2];
if (target_head->summary && !source_head->summary) { if (target_head->summary && !source_head->summary) {
@ -696,7 +696,7 @@ bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
if (!ts_stack_can_merge(self, version1, version2)) return false; if (!ts_stack_can_merge(self, version1, version2)) return false;
StackHead *head1 = &self->heads.contents[version1]; StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2]; StackHead *head2 = &self->heads.contents[version2];
for (t_u32 i = 0; i < head2->node->link_count; i++) { for (uint32_t i = 0; i < head2->node->link_count; i++) {
stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool); stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
} }
if (head1->node->state == ERROR_STATE) { if (head1->node->state == ERROR_STATE) {
@ -752,7 +752,7 @@ Subtree ts_stack_resume(Stack *self, StackVersion version) {
void ts_stack_clear(Stack *self) { void ts_stack_clear(Stack *self) {
stack_node_retain(self->base_node); stack_node_retain(self->base_node);
for (t_u32 i = 0; i < self->heads.size; i++) { for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool); stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
} }
array_clear(&self->heads); array_clear(&self->heads);
@ -764,7 +764,7 @@ void ts_stack_clear(Stack *self) {
})); }));
} }
bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f) { bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
array_reserve(&self->iterators, 32); array_reserve(&self->iterators, 32);
if (!f) f = stderr; if (!f) f = stderr;
@ -775,7 +775,7 @@ bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f)
Array(StackNode *) visited_nodes = array_new(); Array(StackNode *) visited_nodes = array_new();
array_clear(&self->iterators); array_clear(&self->iterators);
for (t_u32 i = 0; i < self->heads.size; i++) { for (uint32_t i = 0; i < self->heads.size; i++) {
StackHead *head = &self->heads.contents[i]; StackHead *head = &self->heads.contents[i];
if (head->status == StackStatusHalted) continue; if (head->status == StackStatusHalted) continue;
@ -794,14 +794,14 @@ bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f)
if (head->summary) { if (head->summary) {
fprintf(f, "\nsummary:"); fprintf(f, "\nsummary:");
for (t_u32 j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state); for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", head->summary->contents[j].state);
} }
if (head->last_external_token.ptr) { if (head->last_external_token.ptr) {
const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state; const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
const char *data = ts_external_scanner_state_data(state); const char *data = ts_external_scanner_state_data(state);
fprintf(f, "\nexternal_scanner_state:"); fprintf(f, "\nexternal_scanner_state:");
for (t_u32 j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]); for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
} }
fprintf(f, "\"]\n"); fprintf(f, "\"]\n");
@ -814,11 +814,11 @@ bool ts_stack_print_dot_graph(Stack *self, const t_language *language, FILE *f)
while (!all_iterators_done) { while (!all_iterators_done) {
all_iterators_done = true; all_iterators_done = true;
for (t_u32 i = 0; i < self->iterators.size; i++) { for (uint32_t i = 0; i < self->iterators.size; i++) {
StackIterator iterator = self->iterators.contents[i]; StackIterator iterator = self->iterators.contents[i];
StackNode *node = iterator.node; StackNode *node = iterator.node;
for (t_u32 j = 0; j < visited_nodes.size; j++) { for (uint32_t j = 0; j < visited_nodes.size; j++) {
if (visited_nodes.contents[j] == node) { if (visited_nodes.contents[j] == node) {
node = NULL; node = NULL;
break; break;

View file

@ -7,7 +7,7 @@ extern "C" {
#include "./array.h" #include "./array.h"
#include "./subtree.h" #include "./subtree.h"
#include "parser/error_costs.h" #include "./error_costs.h"
#include <stdio.h> #include <stdio.h>
typedef struct Stack Stack; typedef struct Stack Stack;
@ -22,9 +22,9 @@ typedef struct {
typedef Array(StackSlice) StackSliceArray; typedef Array(StackSlice) StackSliceArray;
typedef struct { typedef struct {
t_parse_length position; Length position;
unsigned depth; unsigned depth;
t_state_id state; TSStateId state;
} StackSummaryEntry; } StackSummaryEntry;
typedef Array(StackSummaryEntry) StackSummary; typedef Array(StackSummaryEntry) StackSummary;
@ -35,11 +35,11 @@ Stack *ts_stack_new(SubtreePool *);
void ts_stack_delete(Stack *); void ts_stack_delete(Stack *);
// Get the stack's current number of versions. // Get the stack's current number of versions.
t_u32 ts_stack_version_count(const Stack *); uint32_t ts_stack_version_count(const Stack *);
// Get the state at the top of the given version of the stack. If the stack is // Get the state at the top of the given version of the stack. If the stack is
// empty, this returns the initial state, 0. // empty, this returns the initial state, 0.
t_state_id ts_stack_state(const Stack *, StackVersion); TSStateId ts_stack_state(const Stack *, StackVersion);
// Get the last external token associated with a given version of the stack. // Get the last external token associated with a given version of the stack.
Subtree ts_stack_last_external_token(const Stack *, StackVersion); Subtree ts_stack_last_external_token(const Stack *, StackVersion);
@ -48,21 +48,21 @@ Subtree ts_stack_last_external_token(const Stack *, StackVersion);
void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
// Get the position of the given version of the stack within the document. // Get the position of the given version of the stack within the document.
t_parse_length ts_stack_position(const Stack *, StackVersion); Length ts_stack_position(const Stack *, StackVersion);
// Push a tree and state onto the given version of the stack. // Push a tree and state onto the given version of the stack.
// //
// This transfers ownership of the tree to the Stack. Callers that // This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should // need to retain ownership of the tree for their own purposes should
// first retain the tree. // first retain the tree.
void ts_stack_push(Stack *, StackVersion, Subtree , bool, t_state_id); void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
// Pop the given number of entries from the given version of the stack. This // Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple // operation can increase the number of stack versions by revealing multiple
// versions which had previously been merged. It returns an array that // versions which had previously been merged. It returns an array that
// specifies the index of each revealed version and the trees that were // specifies the index of each revealed version and the trees that were
// removed from that version. // removed from that version.
StackSliceArray ts_stack_pop_count(Stack *, StackVersion, t_u32 count); StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
// Remove an error at the top of the given version of the stack. // Remove an error at the top of the given version of the stack.
SubtreeArray ts_stack_pop_error(Stack *, StackVersion); SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
@ -122,9 +122,9 @@ void ts_stack_remove_version(Stack *, StackVersion);
void ts_stack_clear(Stack *); void ts_stack_clear(Stack *);
bool ts_stack_print_dot_graph(Stack *, const t_language *, FILE *); bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
typedef void (*StackIterateCallback)(void *, t_state_id, t_u32); typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
#ifdef __cplusplus #ifdef __cplusplus
} }

File diff suppressed because it is too large Load diff

View file

@ -1,19 +1,22 @@
#ifndef TREE_SITTER_SUBTREE_H_ #ifndef TREE_SITTER_SUBTREE_H_
#define TREE_SITTER_SUBTREE_H_ #define TREE_SITTER_SUBTREE_H_
#include "me/types.h" #ifdef __cplusplus
#include "parser/types/types_symbol.h" extern "C" {
#endif
#include "./array.h"
#include "parser/error_costs.h"
#include "parser/parser_length.h"
#include "parser/api.h"
#include <limits.h> #include <limits.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include "./length.h"
#include "./array.h"
#include "./error_costs.h"
#include "./host.h"
#include "./api.h"
#include "./parser.h"
#define TS_TREE_STATE_NONE USHRT_MAX #define TS_TREE_STATE_NONE USHRT_MAX
#define NULL_SUBTREE ((Subtree){.ptr = NULL}) #define NULL_SUBTREE ((Subtree) {.ptr = NULL})
// The serialized state of an external scanner. // The serialized state of an external scanner.
// //
@ -25,13 +28,12 @@
// //
// Small byte arrays are stored inline, and long ones are allocated // Small byte arrays are stored inline, and long ones are allocated
// separately on the heap. // separately on the heap.
typedef struct typedef struct {
{
union { union {
char *long_data; char *long_data;
char short_data[24]; char short_data[24];
}; };
t_u32 length; uint32_t length;
} ExternalScannerState; } ExternalScannerState;
// A compact representation of a subtree. // A compact representation of a subtree.
@ -45,41 +47,76 @@ typedef struct
// Because of alignment, for any valid pointer this will be 0, giving // Because of alignment, for any valid pointer this will be 0, giving
// us the opportunity to make use of this bit to signify whether to use // us the opportunity to make use of this bit to signify whether to use
// the pointer or the inline struct. // the pointer or the inline struct.
typedef struct s_subtree_inline_data t_subtree_inline_data; typedef struct SubtreeInlineData SubtreeInlineData;
struct s_subtree_inline_data #define SUBTREE_BITS \
{ bool visible : 1; \
bool is_inline : 1; bool named : 1; \
bool visible : 1; bool extra : 1; \
bool named : 1; bool has_changes : 1; \
bool extra : 1; bool is_missing : 1; \
bool has_changes : 1;
bool is_missing : 1;
bool is_keyword : 1; bool is_keyword : 1;
t_u8 symbol;
t_u16 parse_state; #define SUBTREE_SIZE \
t_u8 padding_columns; uint8_t padding_columns; \
t_u8 padding_rows : 4; uint8_t padding_rows : 4; \
t_u8 lookahead_bytes : 4; uint8_t lookahead_bytes : 4; \
t_u8 padding_bytes; uint8_t padding_bytes; \
t_u8 size_bytes; uint8_t size_bytes;
#if TS_BIG_ENDIAN
#if TS_PTR_SIZE == 32
struct SubtreeInlineData {
uint16_t parse_state;
uint8_t symbol;
SUBTREE_BITS
bool unused : 1;
bool is_inline : 1;
SUBTREE_SIZE
}; };
#else
struct SubtreeInlineData {
SUBTREE_SIZE
uint16_t parse_state;
uint8_t symbol;
SUBTREE_BITS
bool unused : 1;
bool is_inline : 1;
};
#endif
#else
struct SubtreeInlineData {
bool is_inline : 1;
SUBTREE_BITS
uint8_t symbol;
uint16_t parse_state;
SUBTREE_SIZE
};
#endif
#undef SUBTREE_BITS
#undef SUBTREE_SIZE
// A heap-allocated representation of a subtree. // A heap-allocated representation of a subtree.
// //
// This representation is used for parent nodes, external tokens, // This representation is used for parent nodes, external tokens,
// errors, and other leaf nodes whose data is too large to fit into // errors, and other leaf nodes whose data is too large to fit into
// the inline representation. // the inline representation.
typedef struct typedef struct {
{ volatile uint32_t ref_count;
volatile t_u32 ref_count; Length padding;
t_parse_length padding; Length size;
t_parse_length size; uint32_t lookahead_bytes;
t_u32 lookahead_bytes; uint32_t error_cost;
t_u32 error_cost; uint32_t child_count;
t_u32 child_count; TSSymbol symbol;
t_symbol symbol; TSStateId parse_state;
t_state_id parse_state;
bool visible : 1; bool visible : 1;
bool named : 1; bool named : 1;
@ -89,63 +126,56 @@ typedef struct
bool has_changes : 1; bool has_changes : 1;
bool has_external_tokens : 1; bool has_external_tokens : 1;
bool has_external_scanner_state_change : 1; bool has_external_scanner_state_change : 1;
bool depends_on_column : 1; bool depends_on_column: 1;
bool is_missing : 1; bool is_missing : 1;
bool is_keyword : 1; bool is_keyword : 1;
union { union {
// Non-terminal subtrees (`child_count > 0`) // Non-terminal subtrees (`child_count > 0`)
struct struct {
{ uint32_t visible_child_count;
t_u32 visible_child_count; uint32_t named_child_count;
t_u32 named_child_count; uint32_t visible_descendant_count;
t_u32 visible_descendant_count; int32_t dynamic_precedence;
t_i32 dynamic_precedence; uint16_t repeat_depth;
t_u16 repeat_depth; uint16_t production_id;
t_u16 production_id; struct {
struct TSSymbol symbol;
{ TSStateId parse_state;
t_symbol symbol;
t_state_id parse_state;
} first_leaf; } first_leaf;
}; };
// External terminal subtrees (`child_count == 0 && // External terminal subtrees (`child_count == 0 && has_external_tokens`)
// has_external_tokens`)
ExternalScannerState external_scanner_state; ExternalScannerState external_scanner_state;
// Error terminal subtrees (`child_count == 0 && symbol == // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
// ts_builtin_sym_error`) int32_t lookahead_char;
t_i32 lookahead_char;
}; };
} SubtreeHeapData; } SubtreeHeapData;
// The fundamental building block of a syntax tree. // The fundamental building block of a syntax tree.
typedef union { typedef union {
t_subtree_inline_data data; SubtreeInlineData data;
const SubtreeHeapData *ptr; const SubtreeHeapData *ptr;
} Subtree; } Subtree;
// Like Subtree, but mutable. // Like Subtree, but mutable.
typedef union { typedef union {
t_subtree_inline_data data; SubtreeInlineData data;
SubtreeHeapData *ptr; SubtreeHeapData *ptr;
} MutableSubtree; } MutableSubtree;
typedef Array(Subtree) SubtreeArray; typedef Array(Subtree) SubtreeArray;
typedef Array(MutableSubtree) MutableSubtreeArray; typedef Array(MutableSubtree) MutableSubtreeArray;
typedef struct typedef struct {
{
MutableSubtreeArray free_trees; MutableSubtreeArray free_trees;
MutableSubtreeArray tree_stack; MutableSubtreeArray tree_stack;
} SubtreePool; } SubtreePool;
void ts_external_scanner_state_init(ExternalScannerState *, const char *, void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
unsigned);
const char *ts_external_scanner_state_data(const ExternalScannerState *); const char *ts_external_scanner_state_data(const ExternalScannerState *);
bool ts_external_scanner_state_eq(const ExternalScannerState *self, bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *, unsigned);
const char *, unsigned);
void ts_external_scanner_state_delete(ExternalScannerState *self); void ts_external_scanner_state_delete(ExternalScannerState *self);
void ts_subtree_array_copy(SubtreeArray, SubtreeArray *); void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
@ -154,280 +184,199 @@ void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *); void ts_subtree_array_remove_trailing_extras(SubtreeArray *, SubtreeArray *);
void ts_subtree_array_reverse(SubtreeArray *); void ts_subtree_array_reverse(SubtreeArray *);
SubtreePool ts_subtree_pool_new(t_u32 capacity); SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *); void ts_subtree_pool_delete(SubtreePool *);
Subtree ts_subtree_new_leaf(SubtreePool *, t_symbol, t_parse_length, t_parse_length, t_u32, Subtree ts_subtree_new_leaf(
t_state_id, bool, bool, bool, const t_language *); SubtreePool *, TSSymbol, Length, Length, uint32_t,
Subtree ts_subtree_new_error(SubtreePool *, t_i32, t_parse_length, t_parse_length, t_u32, TSStateId, bool, bool, bool, const TSLanguage *
t_state_id, const t_language *); );
MutableSubtree ts_subtree_new_node(t_symbol, SubtreeArray *, unsigned, Subtree ts_subtree_new_error(
const t_language *); SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const t_language *); );
Subtree ts_subtree_new_missing_leaf(SubtreePool *, t_symbol, t_parse_length, t_u32, MutableSubtree ts_subtree_new_node(TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
const t_language *); Subtree ts_subtree_new_error_node(SubtreeArray *, bool, const TSLanguage *);
Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, uint32_t, const TSLanguage *);
MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree); MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
void ts_subtree_retain(Subtree); void ts_subtree_retain(Subtree);
void ts_subtree_release(SubtreePool *, Subtree); void ts_subtree_release(SubtreePool *, Subtree);
int ts_subtree_compare(Subtree, Subtree, SubtreePool *); int ts_subtree_compare(Subtree, Subtree, SubtreePool *);
void ts_subtree_set_symbol(MutableSubtree *, t_symbol, const t_language *); void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
void ts_subtree_summarize(MutableSubtree, const Subtree *, t_u32, void ts_subtree_summarize(MutableSubtree, const Subtree *, uint32_t, const TSLanguage *);
const t_language *); void ts_subtree_summarize_children(MutableSubtree, const TSLanguage *);
void ts_subtree_summarize_children(MutableSubtree, const t_language *); void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
void ts_subtree_balance(Subtree, SubtreePool *, const t_language *); Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
Subtree ts_subtree_edit(Subtree, const t_input_edit *edit, SubtreePool *); char *ts_subtree_string(Subtree, TSSymbol, bool, const TSLanguage *, bool include_all);
char *ts_subtree_string(Subtree, t_symbol, bool, const t_language *, void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
bool include_all);
void ts_subtree_print_dot_graph(Subtree, const t_language *, FILE *);
Subtree ts_subtree_last_external_token(Subtree); Subtree ts_subtree_last_external_token(Subtree);
const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
bool ts_subtree_external_scanner_state_eq(Subtree, Subtree); bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
#define SUBTREE_GET(self, name) \ #define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)
((self).data.is_inline ? (self).data.name : (self).ptr->name)
static inline t_symbol ts_subtree_symbol(Subtree self) static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
{ static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
return SUBTREE_GET(self, symbol); static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
} static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
static inline bool ts_subtree_visible(Subtree self) static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
{ static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
return SUBTREE_GET(self, visible); static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
} static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
static inline bool ts_subtree_named(Subtree self) static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
{
return SUBTREE_GET(self, named);
}
static inline bool ts_subtree_extra(Subtree self)
{
return SUBTREE_GET(self, extra);
}
static inline bool ts_subtree_has_changes(Subtree self)
{
return SUBTREE_GET(self, has_changes);
}
static inline bool ts_subtree_missing(Subtree self)
{
return SUBTREE_GET(self, is_missing);
}
static inline bool ts_subtree_is_keyword(Subtree self)
{
return SUBTREE_GET(self, is_keyword);
}
static inline t_state_id ts_subtree_parse_state(Subtree self)
{
return SUBTREE_GET(self, parse_state);
}
static inline t_u32 ts_subtree_lookahead_bytes(Subtree self)
{
return SUBTREE_GET(self, lookahead_bytes);
}
#undef SUBTREE_GET #undef SUBTREE_GET
// Get the size needed to store a heap-allocated subtree with the given // Get the size needed to store a heap-allocated subtree with the given
// number of children. // number of children.
static inline size_t ts_subtree_alloc_size(t_u32 child_count) static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
{
return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
} }
// Get a subtree's children, which are allocated immediately before the // Get a subtree's children, which are allocated immediately before the
// tree's own heap data. // tree's own heap data.
#define ts_subtree_children(self) \ #define ts_subtree_children(self) \
((self).data.is_inline \ ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
? NULL \
: (Subtree *)((self).ptr) - (self).ptr->child_count)
static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
{ if (self->data.is_inline) {
if (self->data.is_inline)
{
self->data.extra = is_extra; self->data.extra = is_extra;
} } else {
else
{
self->ptr->extra = is_extra; self->ptr->extra = is_extra;
} }
} }
static inline t_symbol ts_subtree_leaf_symbol(Subtree self) static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
{ if (self.data.is_inline) return self.data.symbol;
if (self.data.is_inline) if (self.ptr->child_count == 0) return self.ptr->symbol;
return self.data.symbol;
if (self.ptr->child_count == 0)
return self.ptr->symbol;
return self.ptr->first_leaf.symbol; return self.ptr->first_leaf.symbol;
} }
static inline t_state_id ts_subtree_leaf_parse_state(Subtree self) static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
{ if (self.data.is_inline) return self.data.parse_state;
if (self.data.is_inline) if (self.ptr->child_count == 0) return self.ptr->parse_state;
return self.data.parse_state;
if (self.ptr->child_count == 0)
return self.ptr->parse_state;
return self.ptr->first_leaf.parse_state; return self.ptr->first_leaf.parse_state;
} }
static inline t_parse_length ts_subtree_padding(Subtree self) static inline Length ts_subtree_padding(Subtree self) {
{ if (self.data.is_inline) {
if (self.data.is_inline) Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
{
t_parse_length result = {self.data.padding_bytes,
{self.data.padding_rows, self.data.padding_columns}};
return result; return result;
} } else {
else
{
return self.ptr->padding; return self.ptr->padding;
} }
} }
static inline t_parse_length ts_subtree_size(Subtree self) static inline Length ts_subtree_size(Subtree self) {
{ if (self.data.is_inline) {
if (self.data.is_inline) Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
{
t_parse_length result = {self.data.size_bytes, {0, self.data.size_bytes}};
return result; return result;
} } else {
else
{
return self.ptr->size; return self.ptr->size;
} }
} }
static inline t_parse_length ts_subtree_total_size(Subtree self) static inline Length ts_subtree_total_size(Subtree self) {
{
return length_add(ts_subtree_padding(self), ts_subtree_size(self)); return length_add(ts_subtree_padding(self), ts_subtree_size(self));
} }
static inline t_u32 ts_subtree_total_bytes(Subtree self) static inline uint32_t ts_subtree_total_bytes(Subtree self) {
{
return ts_subtree_total_size(self).bytes; return ts_subtree_total_size(self).bytes;
} }
static inline t_u32 ts_subtree_child_count(Subtree self) static inline uint32_t ts_subtree_child_count(Subtree self) {
{
return self.data.is_inline ? 0 : self.ptr->child_count; return self.data.is_inline ? 0 : self.ptr->child_count;
} }
static inline t_u32 ts_subtree_repeat_depth(Subtree self) static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
{
return self.data.is_inline ? 0 : self.ptr->repeat_depth; return self.data.is_inline ? 0 : self.ptr->repeat_depth;
} }
static inline t_u32 ts_subtree_is_repetition(Subtree self) static inline uint32_t ts_subtree_is_repetition(Subtree self) {
{ return self.data.is_inline
return self.data.is_inline ? 0 ? 0
: !self.ptr->named && !self.ptr->visible && : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
self.ptr->child_count != 0;
} }
static inline t_u32 ts_subtree_visible_descendant_count(Subtree self) static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
{
return (self.data.is_inline || self.ptr->child_count == 0) return (self.data.is_inline || self.ptr->child_count == 0)
? 0 ? 0
: self.ptr->visible_descendant_count; : self.ptr->visible_descendant_count;
} }
static inline t_u32 ts_subtree_visible_child_count(Subtree self) static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
{ if (ts_subtree_child_count(self) > 0) {
if (ts_subtree_child_count(self) > 0)
{
return self.ptr->visible_child_count; return self.ptr->visible_child_count;
} } else {
else
{
return 0; return 0;
} }
} }
static inline t_u32 ts_subtree_error_cost(Subtree self) static inline uint32_t ts_subtree_error_cost(Subtree self) {
{ if (ts_subtree_missing(self)) {
if (ts_subtree_missing(self))
{
return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
} } else {
else
{
return self.data.is_inline ? 0 : self.ptr->error_cost; return self.data.is_inline ? 0 : self.ptr->error_cost;
} }
} }
static inline t_i32 ts_subtree_dynamic_precedence(Subtree self) static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
{ return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
return (self.data.is_inline || self.ptr->child_count == 0)
? 0
: self.ptr->dynamic_precedence;
} }
static inline t_u16 ts_subtree_production_id(Subtree self) static inline uint16_t ts_subtree_production_id(Subtree self) {
{ if (ts_subtree_child_count(self) > 0) {
if (ts_subtree_child_count(self) > 0)
{
return self.ptr->production_id; return self.ptr->production_id;
} } else {
else
{
return 0; return 0;
} }
} }
static inline bool ts_subtree_fragile_left(Subtree self) static inline bool ts_subtree_fragile_left(Subtree self) {
{
return self.data.is_inline ? false : self.ptr->fragile_left; return self.data.is_inline ? false : self.ptr->fragile_left;
} }
static inline bool ts_subtree_fragile_right(Subtree self) static inline bool ts_subtree_fragile_right(Subtree self) {
{
return self.data.is_inline ? false : self.ptr->fragile_right; return self.data.is_inline ? false : self.ptr->fragile_right;
} }
static inline bool ts_subtree_has_external_tokens(Subtree self) static inline bool ts_subtree_has_external_tokens(Subtree self) {
{
return self.data.is_inline ? false : self.ptr->has_external_tokens; return self.data.is_inline ? false : self.ptr->has_external_tokens;
} }
static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
{ return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
return self.data.is_inline ? false
: self.ptr->has_external_scanner_state_change;
} }
static inline bool ts_subtree_depends_on_column(Subtree self) static inline bool ts_subtree_depends_on_column(Subtree self) {
{
return self.data.is_inline ? false : self.ptr->depends_on_column; return self.data.is_inline ? false : self.ptr->depends_on_column;
} }
static inline bool ts_subtree_is_fragile(Subtree self) static inline bool ts_subtree_is_fragile(Subtree self) {
{ return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
return self.data.is_inline
? false
: (self.ptr->fragile_left || self.ptr->fragile_right);
} }
static inline bool ts_subtree_is_error(Subtree self) static inline bool ts_subtree_is_error(Subtree self) {
{
return ts_subtree_symbol(self) == ts_builtin_sym_error; return ts_subtree_symbol(self) == ts_builtin_sym_error;
} }
static inline bool ts_subtree_is_eof(Subtree self) static inline bool ts_subtree_is_eof(Subtree self) {
{
return ts_subtree_symbol(self) == ts_builtin_sym_end; return ts_subtree_symbol(self) == ts_builtin_sym_end;
} }
static inline Subtree ts_subtree_from_mut(MutableSubtree self) static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
{
Subtree result; Subtree result;
result.data = self.data; result.data = self.data;
return result; return result;
} }
static inline MutableSubtree ts_subtree_to_mt_unsafe(Subtree self) static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
{
MutableSubtree result; MutableSubtree result;
result.data = self.data; result.data = self.data;
return result; return result;
} }
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_SUBTREE_H_ #endif // TREE_SITTER_SUBTREE_H_

View file

@ -1,62 +1,62 @@
#define _POSIX_C_SOURCE 200112L #define _POSIX_C_SOURCE 200112L
#include "parser/api.h" #include "./api.h"
#include "./array.h" #include "./array.h"
#include "./get_changed_ranges.h"
#include "parser/parser_length.h" #include "./length.h"
#include "./subtree.h" #include "./subtree.h"
#include "./tree_cursor.h" #include "./tree_cursor.h"
#include "./tree.h" #include "./tree.h"
t_parse_tree *ts_tree_new( TSTree *ts_tree_new(
Subtree root, const t_language *language, Subtree root, const TSLanguage *language,
const t_parser_range *included_ranges, unsigned included_range_count const TSRange *included_ranges, unsigned included_range_count
) { ) {
t_parse_tree *result = malloc(sizeof(t_parse_tree)); TSTree *result = ts_malloc(sizeof(TSTree));
result->root = root; result->root = root;
result->language = ts_language_copy(language); result->language = ts_language_copy(language);
result->included_ranges = calloc(included_range_count, sizeof(t_parser_range)); result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(t_parser_range)); memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
result->included_range_count = included_range_count; result->included_range_count = included_range_count;
return result; return result;
} }
t_parse_tree *ts_tree_copy(const t_parse_tree *self) { TSTree *ts_tree_copy(const TSTree *self) {
ts_subtree_retain(self->root); ts_subtree_retain(self->root);
return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
} }
void ts_tree_delete(t_parse_tree *self) { void ts_tree_delete(TSTree *self) {
if (!self) return; if (!self) return;
SubtreePool pool = ts_subtree_pool_new(0); SubtreePool pool = ts_subtree_pool_new(0);
ts_subtree_release(&pool, self->root); ts_subtree_release(&pool, self->root);
ts_subtree_pool_delete(&pool); ts_subtree_pool_delete(&pool);
ts_language_delete(self->language); ts_language_delete(self->language);
free(self->included_ranges); ts_free(self->included_ranges);
free(self); ts_free(self);
} }
t_parse_node ts_tree_root_node(const t_parse_tree *self) { TSNode ts_tree_root_node(const TSTree *self) {
return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
} }
t_parse_node ts_tree_root_node_with_offset( TSNode ts_tree_root_node_with_offset(
const t_parse_tree *self, const TSTree *self,
t_u32 offset_bytes, uint32_t offset_bytes,
t_point offset_extent TSPoint offset_extent
) { ) {
t_parse_length offset = {offset_bytes, offset_extent}; Length offset = {offset_bytes, offset_extent};
return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
} }
const t_language *ts_tree_language(const t_parse_tree *self) { const TSLanguage *ts_tree_language(const TSTree *self) {
return self->language; return self->language;
} }
void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) { void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
for (unsigned i = 0; i < self->included_range_count; i++) { for (unsigned i = 0; i < self->included_range_count; i++) {
t_parser_range *range = &self->included_ranges[i]; TSRange *range = &self->included_ranges[i];
if (range->end_byte >= edit->old_end_byte) { if (range->end_byte >= edit->old_end_byte) {
if (range->end_byte != UINT32_MAX) { if (range->end_byte != UINT32_MAX) {
range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
@ -66,7 +66,7 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) {
); );
if (range->end_byte < edit->new_end_byte) { if (range->end_byte < edit->new_end_byte) {
range->end_byte = UINT32_MAX; range->end_byte = UINT32_MAX;
range->end_point = point_val_max(); range->end_point = POINT_MAX;
} }
} }
} else if (range->end_byte > edit->start_byte) { } else if (range->end_byte > edit->start_byte) {
@ -81,7 +81,7 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) {
); );
if (range->start_byte < edit->new_end_byte) { if (range->start_byte < edit->new_end_byte) {
range->start_byte = UINT32_MAX; range->start_byte = UINT32_MAX;
range->start_point = point_val_max(); range->start_point = POINT_MAX;
} }
} else if (range->start_byte > edit->start_byte) { } else if (range->start_byte > edit->start_byte) {
range->start_byte = edit->start_byte; range->start_byte = edit->start_byte;
@ -94,13 +94,38 @@ void ts_tree_edit(t_parse_tree *self, const t_input_edit *edit) {
ts_subtree_pool_delete(&pool); ts_subtree_pool_delete(&pool);
} }
t_parser_range *ts_tree_included_ranges(const t_parse_tree *self, t_u32 *length) { TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
*length = self->included_range_count; *length = self->included_range_count;
t_parser_range *ranges = calloc(self->included_range_count, sizeof(t_parser_range)); TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(t_parser_range)); memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
return ranges; return ranges;
} }
TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) {
TreeCursor cursor1 = {NULL, array_new(), 0};
TreeCursor cursor2 = {NULL, array_new(), 0};
ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree));
ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree));
TSRangeArray included_range_differences = array_new();
ts_range_array_get_changed_ranges(
old_tree->included_ranges, old_tree->included_range_count,
new_tree->included_ranges, new_tree->included_range_count,
&included_range_differences
);
TSRange *result;
*length = ts_subtree_get_changed_ranges(
&old_tree->root, &new_tree->root, &cursor1, &cursor2,
old_tree->language, &included_range_differences, &result
);
array_delete(&included_range_differences);
array_delete(&cursor1.stack);
array_delete(&cursor2.stack);
return result;
}
#ifdef _WIN32 #ifdef _WIN32
#include <io.h> #include <io.h>
@ -117,7 +142,7 @@ int _ts_dup(HANDLE handle) {
return _open_osfhandle((intptr_t)dup_handle, 0); return _open_osfhandle((intptr_t)dup_handle, 0);
} }
void ts_tree_print_dot_graph(const t_parse_tree *self, int fd) { void ts_tree_print_dot_graph(const TSTree *self, int fd) {
FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a");
ts_subtree_print_dot_graph(self->root, self->language, file); ts_subtree_print_dot_graph(self->root, self->language, file);
fclose(file); fclose(file);
@ -131,7 +156,7 @@ int _ts_dup(int file_descriptor) {
return dup(file_descriptor); return dup(file_descriptor);
} }
void ts_tree_print_dot_graph(const t_parse_tree *self, int file_descriptor) { void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
FILE *file = fdopen(_ts_dup(file_descriptor), "a"); FILE *file = fdopen(_ts_dup(file_descriptor), "a");
ts_subtree_print_dot_graph(self->root, self->language, file); ts_subtree_print_dot_graph(self->root, self->language, file);
fclose(file); fclose(file);

View file

@ -3,25 +3,29 @@
#include "./subtree.h" #include "./subtree.h"
typedef struct #ifdef __cplusplus
{ extern "C" {
#endif
typedef struct {
const Subtree *child; const Subtree *child;
const Subtree *parent; const Subtree *parent;
t_parse_length position; Length position;
t_symbol alias_symbol; TSSymbol alias_symbol;
} ParentCacheEntry; } ParentCacheEntry;
struct s_parse_tree struct TSTree {
{
Subtree root; Subtree root;
const t_language *language; const TSLanguage *language;
t_parser_range *included_ranges; TSRange *included_ranges;
t_u32 included_range_count; unsigned included_range_count;
}; };
t_parse_tree *ts_tree_new(Subtree root, const t_language *language, TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
const t_parser_range *, t_u32); TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
t_parse_node ts_node_new(const t_parse_tree *, const Subtree *, t_parse_length,
t_symbol); #ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_TREE_H_ #endif // TREE_SITTER_TREE_H_

View file

@ -1,22 +1,22 @@
#include "parser/api.h" #include "./api.h"
#include "./alloc.h"
#include "./tree_cursor.h" #include "./tree_cursor.h"
#include "./language.h" #include "./language.h"
#include "./tree.h" #include "./tree.h"
typedef struct { typedef struct {
Subtree parent; Subtree parent;
const t_parse_tree *tree; const TSTree *tree;
t_parse_length position; Length position;
t_u32 child_index; uint32_t child_index;
t_u32 structural_child_index; uint32_t structural_child_index;
t_u32 descendant_index; uint32_t descendant_index;
const t_symbol *alias_sequence; const TSSymbol *alias_sequence;
} CursorChildIterator; } CursorChildIterator;
// CursorChildIterator // CursorChildIterator
static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, t_u32 index) { static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) {
TreeCursorEntry *entry = &self->stack.contents[index]; TreeCursorEntry *entry = &self->stack.contents[index];
if (index == 0 || ts_subtree_visible(*entry->subtree)) { if (index == 0 || ts_subtree_visible(*entry->subtree)) {
return true; return true;
@ -37,12 +37,12 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs
if (ts_subtree_child_count(*last_entry->subtree) == 0) { if (ts_subtree_child_count(*last_entry->subtree) == 0) {
return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL}; return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL};
} }
const t_symbol *alias_sequence = ts_language_alias_sequence( const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language, self->tree->language,
last_entry->subtree->ptr->production_id last_entry->subtree->ptr->production_id
); );
t_u32 descendant_index = last_entry->descendant_index; uint32_t descendant_index = last_entry->descendant_index;
if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) { if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) {
descendant_index += 1; descendant_index += 1;
} }
@ -101,12 +101,12 @@ static inline bool ts_tree_cursor_child_iterator_next(
// can only be computed if `b` has zero rows. Otherwise, this function // can only be computed if `b` has zero rows. Otherwise, this function
// returns `LENGTH_UNDEFINED`, and the caller needs to recompute // returns `LENGTH_UNDEFINED`, and the caller needs to recompute
// the position some other way. // the position some other way.
static inline t_parse_length length_backtrack(t_parse_length a, t_parse_length b) { static inline Length length_backtrack(Length a, Length b) {
if (length_is_undefined(a) || b.extent.row != 0) { if (length_is_undefined(a) || b.extent.row != 0) {
return LENGTH_UNDEFINED; return LENGTH_UNDEFINED;
} }
t_parse_length result; Length result;
result.bytes = a.bytes - b.bytes; result.bytes = a.bytes - b.bytes;
result.extent.row = a.extent.row; result.extent.row = a.extent.row;
result.extent.column = a.extent.column - b.extent.column; result.extent.column = a.extent.column - b.extent.column;
@ -120,7 +120,7 @@ static inline bool ts_tree_cursor_child_iterator_previous(
) { ) {
// this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into
// account unsigned underflow // account unsigned underflow
if (!self->parent.ptr || (t_i8)self->child_index == -1) return false; if (!self->parent.ptr || (int8_t)self->child_index == -1) return false;
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
*result = (TreeCursorEntry) { *result = (TreeCursorEntry) {
.subtree = child, .subtree = child,
@ -141,26 +141,26 @@ static inline bool ts_tree_cursor_child_iterator_previous(
// unsigned can underflow so compare it to child_count // unsigned can underflow so compare it to child_count
if (self->child_index < self->parent.ptr->child_count) { if (self->child_index < self->parent.ptr->child_count) {
Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; Subtree previous_child = ts_subtree_children(self->parent)[self->child_index];
t_parse_length size = ts_subtree_size(previous_child); Length size = ts_subtree_size(previous_child);
self->position = length_backtrack(self->position, size); self->position = length_backtrack(self->position, size);
} }
return true; return true;
} }
// t_parse_tree_cursor - lifecycle // TSTreeCursor - lifecycle
t_parse_tree_cursor ts_tree_cursor_new(t_parse_node node) { TSTreeCursor ts_tree_cursor_new(TSNode node) {
t_parse_tree_cursor self = {NULL, NULL, {0, 0, 0}}; TSTreeCursor self = {NULL, NULL, {0, 0, 0}};
ts_tree_cursor_init((TreeCursor *)&self, node); ts_tree_cursor_init((TreeCursor *)&self, node);
return self; return self;
} }
void ts_tree_cursor_reset(t_parse_tree_cursor *_self, t_parse_node node) { void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
ts_tree_cursor_init((TreeCursor *)_self, node); ts_tree_cursor_init((TreeCursor *)_self, node);
} }
void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) { void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
self->tree = node.tree; self->tree = node.tree;
self->root_alias_symbol = node.context[3]; self->root_alias_symbol = node.context[3];
array_clear(&self->stack); array_clear(&self->stack);
@ -176,14 +176,14 @@ void ts_tree_cursor_init(TreeCursor *self, t_parse_node node) {
})); }));
} }
void ts_tree_cursor_delete(t_parse_tree_cursor *_self) { void ts_tree_cursor_delete(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
array_delete(&self->stack); array_delete(&self->stack);
} }
// t_parse_tree_cursor - walking the tree // TSTreeCursor - walking the tree
TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_parse_tree_cursor *_self) { TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
bool visible; bool visible;
TreeCursorEntry entry; TreeCursorEntry entry;
@ -201,7 +201,7 @@ TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_parse_tree_cursor *_se
return TreeCursorStepNone; return TreeCursorStepNone;
} }
bool ts_tree_cursor_goto_first_child(t_parse_tree_cursor *self) { bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
for (;;) { for (;;) {
switch (ts_tree_cursor_goto_first_child_internal(self)) { switch (ts_tree_cursor_goto_first_child_internal(self)) {
case TreeCursorStepHidden: case TreeCursorStepHidden:
@ -215,7 +215,7 @@ bool ts_tree_cursor_goto_first_child(t_parse_tree_cursor *self) {
return false; return false;
} }
TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_parse_tree_cursor *_self) { TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
bool visible; bool visible;
TreeCursorEntry entry; TreeCursorEntry entry;
@ -242,7 +242,7 @@ TreeCursorStep ts_tree_cursor_goto_last_child_internal(t_parse_tree_cursor *_sel
return TreeCursorStepNone; return TreeCursorStepNone;
} }
bool ts_tree_cursor_goto_last_child(t_parse_tree_cursor *self) { bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) {
for (;;) { for (;;) {
switch (ts_tree_cursor_goto_last_child_internal(self)) { switch (ts_tree_cursor_goto_last_child_internal(self)) {
case TreeCursorStepHidden: case TreeCursorStepHidden:
@ -256,14 +256,14 @@ bool ts_tree_cursor_goto_last_child(t_parse_tree_cursor *self) {
return false; return false;
} }
static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point( static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point(
t_parse_tree_cursor *_self, TSTreeCursor *_self,
t_u32 goal_byte, uint32_t goal_byte,
t_point goal_point TSPoint goal_point
) { ) {
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
t_u32 initial_size = self->stack.size; uint32_t initial_size = self->stack.size;
t_u32 visible_child_index = 0; uint32_t visible_child_index = 0;
bool did_descend; bool did_descend;
do { do {
@ -273,9 +273,9 @@ static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point(
TreeCursorEntry entry; TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
t_parse_length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree)); Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree));
bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point); bool at_goal = entry_end.bytes >= goal_byte && point_gte(entry_end.extent, goal_point);
t_u32 visible_child_count = ts_subtree_visible_child_count(*entry.subtree); uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
if (at_goal) { if (at_goal) {
if (visible) { if (visible) {
array_push(&self->stack, entry); array_push(&self->stack, entry);
@ -298,19 +298,19 @@ static inline t_i64 ts_tree_cursor_goto_first_child_for_byte_and_point(
return -1; return -1;
} }
t_i64 ts_tree_cursor_goto_first_child_for_byte(t_parse_tree_cursor *self, t_u32 goal_byte) { int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) {
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, point_val_zero()); return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO);
} }
t_i64 ts_tree_cursor_goto_first_child_for_point(t_parse_tree_cursor *self, t_point goal_point) { int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) {
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point);
} }
TreeCursorStep ts_tree_cursor_goto_sibling_internal( TreeCursorStep ts_tree_cursor_goto_sibling_internal(
t_parse_tree_cursor *_self, TSTreeCursor *_self,
bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) {
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
t_u32 initial_size = self->stack.size; uint32_t initial_size = self->stack.size;
while (self->stack.size > 1) { while (self->stack.size > 1) {
TreeCursorEntry entry = array_pop(&self->stack); TreeCursorEntry entry = array_pop(&self->stack);
@ -341,11 +341,11 @@ TreeCursorStep ts_tree_cursor_goto_sibling_internal(
return TreeCursorStepNone; return TreeCursorStepNone;
} }
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_parse_tree_cursor *_self) { TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) {
return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next);
} }
bool ts_tree_cursor_goto_next_sibling(t_parse_tree_cursor *self) { bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
switch (ts_tree_cursor_goto_next_sibling_internal(self)) { switch (ts_tree_cursor_goto_next_sibling_internal(self)) {
case TreeCursorStepHidden: case TreeCursorStepHidden:
ts_tree_cursor_goto_first_child(self); ts_tree_cursor_goto_first_child(self);
@ -357,7 +357,7 @@ bool ts_tree_cursor_goto_next_sibling(t_parse_tree_cursor *self) {
} }
} }
TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_parse_tree_cursor *_self) { TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) {
// since subtracting across row loses column information, we may have to // since subtracting across row loses column information, we may have to
// restore it // restore it
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
@ -374,14 +374,14 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_parse_tree_cursor
// restore position from the parent node // restore position from the parent node
const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2];
t_parse_length position = parent->position; Length position = parent->position;
t_u32 child_index = array_back(&self->stack)->child_index; uint32_t child_index = array_back(&self->stack)->child_index;
const Subtree *children = ts_subtree_children((*(parent->subtree))); const Subtree *children = ts_subtree_children((*(parent->subtree)));
if (child_index > 0) { if (child_index > 0) {
// skip first child padding since its position should match the position of the parent // skip first child padding since its position should match the position of the parent
position = length_add(position, ts_subtree_size(children[0])); position = length_add(position, ts_subtree_size(children[0]));
for (t_u32 i = 1; i < child_index; ++i) { for (uint32_t i = 1; i < child_index; ++i) {
position = length_add(position, ts_subtree_total_size(children[i])); position = length_add(position, ts_subtree_total_size(children[i]));
} }
position = length_add(position, ts_subtree_padding(children[child_index])); position = length_add(position, ts_subtree_padding(children[child_index]));
@ -392,7 +392,7 @@ TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(t_parse_tree_cursor
return step; return step;
} }
bool ts_tree_cursor_goto_previous_sibling(t_parse_tree_cursor *self) { bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) {
switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { switch (ts_tree_cursor_goto_previous_sibling_internal(self)) {
case TreeCursorStepHidden: case TreeCursorStepHidden:
ts_tree_cursor_goto_last_child(self); ts_tree_cursor_goto_last_child(self);
@ -404,7 +404,7 @@ bool ts_tree_cursor_goto_previous_sibling(t_parse_tree_cursor *self) {
} }
} }
bool ts_tree_cursor_goto_parent(t_parse_tree_cursor *_self) { bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
if (ts_tree_cursor_is_entry_visible(self, i)) { if (ts_tree_cursor_is_entry_visible(self, i)) {
@ -416,16 +416,16 @@ bool ts_tree_cursor_goto_parent(t_parse_tree_cursor *_self) {
} }
void ts_tree_cursor_goto_descendant( void ts_tree_cursor_goto_descendant(
t_parse_tree_cursor *_self, TSTreeCursor *_self,
t_u32 goal_descendant_index uint32_t goal_descendant_index
) { ) {
TreeCursor *self = (TreeCursor *)_self; TreeCursor *self = (TreeCursor *)_self;
// Ascend to the lowest ancestor that contains the goal node. // Ascend to the lowest ancestor that contains the goal node.
for (;;) { for (;;) {
t_u32 i = self->stack.size - 1; uint32_t i = self->stack.size - 1;
TreeCursorEntry *entry = &self->stack.contents[i]; TreeCursorEntry *entry = &self->stack.contents[i];
t_u32 next_descendant_index = uint32_t next_descendant_index =
entry->descendant_index + entry->descendant_index +
(ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) + (ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) +
ts_subtree_visible_descendant_count(*entry->subtree); ts_subtree_visible_descendant_count(*entry->subtree);
@ -466,16 +466,16 @@ void ts_tree_cursor_goto_descendant(
} while (did_descend); } while (did_descend);
} }
t_u32 ts_tree_cursor_current_descendant_index(const t_parse_tree_cursor *_self) { uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack); TreeCursorEntry *last_entry = array_back(&self->stack);
return last_entry->descendant_index; return last_entry->descendant_index;
} }
t_parse_node ts_tree_cursor_current_node(const t_parse_tree_cursor *_self) { TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack); TreeCursorEntry *last_entry = array_back(&self->stack);
t_symbol alias_symbol = self->root_alias_symbol; TSSymbol alias_symbol = self->root_alias_symbol;
if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) {
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
alias_symbol = ts_language_alias_at( alias_symbol = ts_language_alias_at(
@ -495,12 +495,12 @@ t_parse_node ts_tree_cursor_current_node(const t_parse_tree_cursor *_self) {
// Private - Get various facts about the current node that are needed // Private - Get various facts about the current node that are needed
// when executing tree queries. // when executing tree queries.
void ts_tree_cursor_current_status( void ts_tree_cursor_current_status(
const t_parse_tree_cursor *_self, const TSTreeCursor *_self,
t_field_id *field_id, TSFieldId *field_id,
bool *has_later_siblings, bool *has_later_siblings,
bool *has_later_named_siblings, bool *has_later_named_siblings,
bool *can_have_later_siblings_with_this_field, bool *can_have_later_siblings_with_this_field,
t_symbol *supertypes, TSSymbol *supertypes,
unsigned *supertype_count unsigned *supertype_count
) { ) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
@ -517,7 +517,7 @@ void ts_tree_cursor_current_status(
TreeCursorEntry *entry = &self->stack.contents[i]; TreeCursorEntry *entry = &self->stack.contents[i];
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
const t_symbol *alias_sequence = ts_language_alias_sequence( const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language, self->tree->language,
parent_entry->subtree->ptr->production_id parent_entry->subtree->ptr->production_id
); );
@ -532,11 +532,11 @@ void ts_tree_cursor_current_status(
ts_subtree_symbol(subtree)) ts_subtree_symbol(subtree))
// Stop walking up when a visible ancestor is found. // Stop walking up when a visible ancestor is found.
t_symbol entry_symbol = subtree_symbol( TSSymbol entry_symbol = subtree_symbol(
*entry->subtree, *entry->subtree,
entry->structural_child_index entry->structural_child_index
); );
t_symbol_metadata entry_metadata = ts_language_symbol_metadata( TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
self->tree->language, self->tree->language,
entry_symbol entry_symbol
); );
@ -555,7 +555,7 @@ void ts_tree_cursor_current_status(
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++; if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) { for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j]; Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
t_symbol_metadata sibling_metadata = ts_language_symbol_metadata( TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
self->tree->language, self->tree->language,
subtree_symbol(sibling, structural_child_index) subtree_symbol(sibling, structural_child_index)
); );
@ -581,7 +581,7 @@ void ts_tree_cursor_current_status(
#undef subtree_symbol #undef subtree_symbol
if (!ts_subtree_extra(*entry->subtree)) { if (!ts_subtree_extra(*entry->subtree)) {
const t_field_map_entry *field_map, *field_map_end; const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map( ts_language_field_map(
self->tree->language, self->tree->language,
parent_entry->subtree->ptr->production_id, parent_entry->subtree->ptr->production_id,
@ -590,7 +590,7 @@ void ts_tree_cursor_current_status(
// Look for a field name associated with the current node. // Look for a field name associated with the current node.
if (!*field_id) { if (!*field_id) {
for (const t_field_map_entry *map = field_map; map < field_map_end; map++) { for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (!map->inherited && map->child_index == entry->structural_child_index) { if (!map->inherited && map->child_index == entry->structural_child_index) {
*field_id = map->field_id; *field_id = map->field_id;
break; break;
@ -600,7 +600,7 @@ void ts_tree_cursor_current_status(
// Determine if the current node can have later siblings with the same field name. // Determine if the current node can have later siblings with the same field name.
if (*field_id) { if (*field_id) {
for (const t_field_map_entry *map = field_map; map < field_map_end; map++) { for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if ( if (
map->field_id == *field_id && map->field_id == *field_id &&
map->child_index > entry->structural_child_index map->child_index > entry->structural_child_index
@ -614,9 +614,9 @@ void ts_tree_cursor_current_status(
} }
} }
t_u32 ts_tree_cursor_current_depth(const t_parse_tree_cursor *_self) { uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
t_u32 depth = 0; uint32_t depth = 0;
for (unsigned i = 1; i < self->stack.size; i++) { for (unsigned i = 1; i < self->stack.size; i++) {
if (ts_tree_cursor_is_entry_visible(self, i)) { if (ts_tree_cursor_is_entry_visible(self, i)) {
depth++; depth++;
@ -625,12 +625,12 @@ t_u32 ts_tree_cursor_current_depth(const t_parse_tree_cursor *_self) {
return depth; return depth;
} }
t_parse_node ts_tree_cursor_parent_node(const t_parse_tree_cursor *_self) { TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
for (int i = (int)self->stack.size - 2; i >= 0; i--) { for (int i = (int)self->stack.size - 2; i >= 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i]; TreeCursorEntry *entry = &self->stack.contents[i];
bool is_visible = true; bool is_visible = true;
t_symbol alias_symbol = 0; TSSymbol alias_symbol = 0;
if (i > 0) { if (i > 0) {
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
alias_symbol = ts_language_alias_at( alias_symbol = ts_language_alias_at(
@ -652,7 +652,7 @@ t_parse_node ts_tree_cursor_parent_node(const t_parse_tree_cursor *_self) {
return ts_node_new(NULL, NULL, length_zero(), 0); return ts_node_new(NULL, NULL, length_zero(), 0);
} }
t_field_id ts_tree_cursor_current_field_id(const t_parse_tree_cursor *_self) { TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
// Walk up the tree, visiting the current node and its invisible ancestors. // Walk up the tree, visiting the current node and its invisible ancestors.
@ -668,13 +668,13 @@ t_field_id ts_tree_cursor_current_field_id(const t_parse_tree_cursor *_self) {
if (ts_subtree_extra(*entry->subtree)) break; if (ts_subtree_extra(*entry->subtree)) break;
const t_field_map_entry *field_map, *field_map_end; const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map( ts_language_field_map(
self->tree->language, self->tree->language,
parent_entry->subtree->ptr->production_id, parent_entry->subtree->ptr->production_id,
&field_map, &field_map_end &field_map, &field_map_end
); );
for (const t_field_map_entry *map = field_map; map < field_map_end; map++) { for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (!map->inherited && map->child_index == entry->structural_child_index) { if (!map->inherited && map->child_index == entry->structural_child_index) {
return map->field_id; return map->field_id;
} }
@ -683,8 +683,8 @@ t_field_id ts_tree_cursor_current_field_id(const t_parse_tree_cursor *_self) {
return 0; return 0;
} }
const char *ts_tree_cursor_current_field_name(const t_parse_tree_cursor *_self) { const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
t_field_id id = ts_tree_cursor_current_field_id(_self); TSFieldId id = ts_tree_cursor_current_field_id(_self);
if (id) { if (id) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
return self->tree->language->field_names[id]; return self->tree->language->field_names[id];
@ -693,9 +693,9 @@ const char *ts_tree_cursor_current_field_name(const t_parse_tree_cursor *_self)
} }
} }
t_parse_tree_cursor ts_tree_cursor_copy(const t_parse_tree_cursor *_cursor) { TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
const TreeCursor *cursor = (const TreeCursor *)_cursor; const TreeCursor *cursor = (const TreeCursor *)_cursor;
t_parse_tree_cursor res = {NULL, NULL, {0, 0}}; TSTreeCursor res = {NULL, NULL, {0, 0}};
TreeCursor *copy = (TreeCursor *)&res; TreeCursor *copy = (TreeCursor *)&res;
copy->tree = cursor->tree; copy->tree = cursor->tree;
copy->root_alias_symbol = cursor->root_alias_symbol; copy->root_alias_symbol = cursor->root_alias_symbol;
@ -704,7 +704,7 @@ t_parse_tree_cursor ts_tree_cursor_copy(const t_parse_tree_cursor *_cursor) {
return res; return res;
} }
void ts_tree_cursor_reset_to(t_parse_tree_cursor *_dst, const t_parse_tree_cursor *_src) { void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) {
const TreeCursor *cursor = (const TreeCursor *)_src; const TreeCursor *cursor = (const TreeCursor *)_src;
TreeCursor *copy = (TreeCursor *)_dst; TreeCursor *copy = (TreeCursor *)_dst;
copy->tree = cursor->tree; copy->tree = cursor->tree;

View file

@ -5,16 +5,16 @@
typedef struct { typedef struct {
const Subtree *subtree; const Subtree *subtree;
t_parse_length position; Length position;
t_u32 child_index; uint32_t child_index;
t_u32 structural_child_index; uint32_t structural_child_index;
t_u32 descendant_index; uint32_t descendant_index;
} TreeCursorEntry; } TreeCursorEntry;
typedef struct { typedef struct {
const t_parse_tree *tree; const TSTree *tree;
Array(TreeCursorEntry) stack; Array(TreeCursorEntry) stack;
t_symbol root_alias_symbol; TSSymbol root_alias_symbol;
} TreeCursor; } TreeCursor;
typedef enum { typedef enum {
@ -23,26 +23,26 @@ typedef enum {
TreeCursorStepVisible, TreeCursorStepVisible,
} TreeCursorStep; } TreeCursorStep;
void ts_tree_cursor_init(TreeCursor *, t_parse_node); void ts_tree_cursor_init(TreeCursor *, TSNode);
void ts_tree_cursor_current_status( void ts_tree_cursor_current_status(
const t_parse_tree_cursor *, const TSTreeCursor *,
t_field_id *, TSFieldId *,
bool *, bool *,
bool *, bool *,
bool *, bool *,
t_symbol *, TSSymbol *,
unsigned * unsigned *
); );
TreeCursorStep ts_tree_cursor_goto_first_child_internal(t_parse_tree_cursor *); TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *);
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(t_parse_tree_cursor *); TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *);
static inline Subtree ts_tree_cursor_current_subtree(const t_parse_tree_cursor *_self) { static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self; const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack); TreeCursorEntry *last_entry = array_back(&self->stack);
return *last_entry->subtree; return *last_entry->subtree;
} }
t_parse_node ts_tree_cursor_parent_node(const t_parse_tree_cursor *); TSNode ts_tree_cursor_parent_node(const TSTreeCursor *);
#endif // TREE_SITTER_TREE_CURSOR_H_ #endif // TREE_SITTER_TREE_CURSOR_H_

50
parser/src/unicode.h Normal file
View file

@ -0,0 +1,50 @@
#ifndef TREE_SITTER_UNICODE_H_
#define TREE_SITTER_UNICODE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdint.h>
#define U_EXPORT
#define U_EXPORT2
#include "unicode/utf8.h"
#include "unicode/utf16.h"
static const int32_t TS_DECODE_ERROR = U_SENTINEL;
// These functions read one unicode code point from the given string,
// returning the number of bytes consumed.
typedef uint32_t (*UnicodeDecodeFunction)(
const uint8_t *string,
uint32_t length,
int32_t *code_point
);
static inline uint32_t ts_decode_utf8(
const uint8_t *string,
uint32_t length,
int32_t *code_point
) {
uint32_t i = 0;
U8_NEXT(string, i, length, *code_point);
return i;
}
static inline uint32_t ts_decode_utf16(
const uint8_t *string,
uint32_t length,
int32_t *code_point
) {
uint32_t i = 0;
U16_NEXT(((uint16_t *)string), i, length, *code_point);
return i * 2;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_UNICODE_H_

View file

@ -6,15 +6,14 @@
/* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */ /* By: rparodi <rparodi@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */ /* Created: 2024/03/28 14:40:38 by rparodi #+# #+# */
/* Updated: 2024/04/30 16:15:53 by maiboyer ### ########.fr */ /* Updated: 2024/04/30 16:43:14 by maiboyer ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */
#include "../includes/minishell.h"
#include "app/node.h" #include "app/node.h"
#include "me/string/str_len.h" #include "me/string/str_len.h"
#include "parser/api.h" #include "parser/api.h"
#include "parser/parser.h"
#include "../includes/minishell.h"
void print_node_data(t_node *t, t_usize depth) void print_node_data(t_node *t, t_usize depth)
{ {
@ -46,13 +45,15 @@ t_node parse_str(t_myparser *parser, t_const_str input)
return (parse_to_nodes(parser->parser, input)); return (parse_to_nodes(parser->parser, input));
} }
void ft_check(t_utils *shcat, char **input) { void ft_check(t_utils *shcat, char **input)
{
t_usize i; t_usize i;
t_usize prev_i; t_usize prev_i;
i = 0; i = 0;
prev_i = 0; prev_i = 0;
while (input[i] != NULL) { while (input[i] != NULL)
{
if (ft_strcmp(input[i], "exit") == 0) if (ft_strcmp(input[i], "exit") == 0)
ft_exit(shcat, 0); ft_exit(shcat, 0);
else if (ft_strcmp(input[i], "pwd") == 0) else if (ft_strcmp(input[i], "pwd") == 0)
@ -75,7 +76,8 @@ void ft_take_args(t_utils *shcat)
t_i32 i; t_i32 i;
i = 0; i = 0;
while (1) { while (1)
{
shcat->str_input = readline((t_const_str)shcat->name_shell); shcat->str_input = readline((t_const_str)shcat->name_shell);
if (!shcat->str_input) if (!shcat->str_input)
ft_exit(shcat, 0); ft_exit(shcat, 0);
@ -96,10 +98,11 @@ void ft_find_path(t_str arge[], t_utils *utils)
check = 0; check = 0;
while (arge[i] != NULL) while (arge[i] != NULL)
{ {
if (arge[i][0] == 'P' && arge[i][1] == 'A' && arge[i][2] == 'T' && arge[i][3] == 'H' && arge[i][4] == '=') if (arge[i][0] == 'P' && arge[i][1] == 'A' && arge[i][2] == 'T' &&
arge[i][3] == 'H' && arge[i][4] == '=')
{ {
utils->path = ft_split(arge[i] + 5, ':'); utils->path = ft_split(arge[i] + 5, ':');
return ; return;
} }
i++; i++;
} }
@ -108,9 +111,6 @@ void ft_find_path(t_str arge[], t_utils *utils)
t_language *tree_sitter_bash(void); t_language *tree_sitter_bash(void);
t_myparser create_myparser(void) t_myparser create_myparser(void)
{ {
t_language *lang; t_language *lang;
@ -127,7 +127,6 @@ void free_myparser(t_myparser self)
ts_parser_delete(self.parser); ts_parser_delete(self.parser);
} }
t_i32 main(t_i32 argc, t_str argv[], t_str arge[]) t_i32 main(t_i32 argc, t_str argv[], t_str arge[])
{ {
t_utils utils; t_utils utils;

View file

@ -6,7 +6,7 @@
/* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */ /* By: maiboyer <maiboyer@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */ /* +#+#+#+#+#+ +#+ */
/* Created: 2024/04/28 18:36:40 by maiboyer #+# #+# */ /* Created: 2024/04/28 18:36:40 by maiboyer #+# #+# */
/* Updated: 2024/04/30 16:26:08 by maiboyer ### ########.fr */ /* Updated: 2024/04/30 16:43:35 by maiboyer ### ########.fr */
/* */ /* */
/* ************************************************************************** */ /* ************************************************************************** */