Loading...
Searching...
No Matches
token.f90
Go to the documentation of this file.
1!> @file
2!! @defgroup group_token Token
3!! @brief Token classification and representation for expression parsing in fpx
4!!
5!! This module provides the lightweight but robust token infrastructure used by the
6!! fpx preprocessor when evaluating constant expressions in `#if` / `#elif` directives.
7!!
8!! It defines:
9!! - A clean enumeration of token kinds (`tokens_enum`)
10!! - A simple `token` derived type that carries both the lexical value and its semantic category
11!!
12!! These types are used internally by `evaluate_expression()` (from `fpx_token`) to parse
13!! and compute `#if DEBUG > 1 && defined(USE_MPI)`-style conditions.
14!!
15!! @par Key design goals
16!! - Minimal memory footprint
17!! - Clear separation between lexical scanning and semantic interpretation
18!! - Easy extensibility for future operators or functions
19!!
20!! @par Examples
21!!
22!! 1. Manual token creation (mostly for testing/debugging):
23!! @code{.f90}
24!! use fpx_token
25!!
26!! type(token) :: t1, t2, t3
27!!
28!! t1 = token('42', number) ! numeric literal
29!! t2 = token('DEBUG', identifier) ! macro name
30!! t3 = token('>', operator) ! comparison operator
31!!
32!! print *, 'Token: ', t1%value, ' type=', t1%type ! → 42 type=0
33!! @endcode
34!!
35!! 2. Typical internal usage during `#if` evaluation:
36!! @code{.f90}
37!! ! (inside evaluate_expression)
38!! tokens = tokenize('defined(USE_MPI) && MPI_VERSION >= 3')
39!! ! tokens(1) → value=vdefined' type=identifier
40!! ! tokens(2) → value='(' type=parenthesis
41!! ! tokens(3) → value='USE_MPI' type=identifier
42!! ! ...
43!! @endcode
44!!
45!! @par Token kinds overview
46!! | Enumerator | Value | Meaning |
47!! |--------------|-------|----------------------------------------------|
48!! | `unknown` | -1 | Invalid / unrecognized token |
49!! | `number` | 0 | Integer or floating-point literal |
50!! | `operator` | 1 | ?:, +, -, *, /, ==, !=, &&, ||, !, >, <, etc.|
51!! | `identifier` | 2 | Macro name or function name (e.g. `defined`) |
52!! | `parenthesis`| 3 | `(` or `)` |
53!! | `defined` | 4 | Special keyword `defined` (treated specially)|
54!!
55module fpx_token
56 use fpx_constants, only: max_tokens
57 use fpx_string
58 use fpx_logging
59
60 implicit none; private
61
62 public :: tokenize, &
63 strtol, &
65 unknown, &
66 number, &
67 operation, &
68 identifier, &
69 parenthesis, &
70 defined
71
72 !> @brief Token kinds used in expression parsing.
73 !! Enumeration defining the possible types of tokens recognized by the tokenizer.
74 !! @ingroup group_token
75 enum, bind(c)
76 enumerator :: unknown = -1
77 enumerator :: number = 0
78 enumerator :: operation = 1
79 enumerator :: identifier = 2
80 enumerator :: parenthesis = 3
81 enumerator :: defined = 4
82 end enum
83
84 !> @brief Kind parameter for token type enumeration. Values are (`unknown`, `number`, `operation`, `identifier`, `parenthesis`,
85 !! `defined`)
86 !! @ingroup group_token
87 integer, parameter :: tokens_enum = kind(unknown)
88
89 !> Represents a single token in a parsed expression.
90 !! Holds the string value of the token and its classified type.
91 !! <h2 class="groupheader">Constructors</h2>
92 !! Initializes a new instance of the @link fpx_token::token token @endlink class
93 !! <h3>token(character(:), integer)</h3>
94 !! @verbatim type(token) function token(character(:) value, integer type) @endverbatim
95 !!
96 !! @param[in] value
97 !! @param[in] type
98 !!
99 !! @b Examples
100 !! @code{.f90}
101 !! a = token('9', number)
102 !! @endcode
103 !!
104 !! <h2 class="groupheader">Remarks</h2>
105 !! @ingroup group_token
106 type, public :: token
107 character(:), allocatable :: value !< Token value
108 integer(tokens_enum) :: type !< Token type, from the enum @ref tokens_enum.
109 integer :: start
110 end type
111
112 !> Converts a string to integer.
113 !! <h2 class="groupheader">Methods</h2>
114 !!
115 !! @code{.f90}strtol(character(*) str, (optional) logical success)@endcode
116 !!
117 !! @param[in] str String to convert
118 !! @param[out] success Optional flag indicating successful conversion
119 !! @return Converted integer value
120 !!
121 !! @code{.f90}strtol(character(*) str, integer base, (optional) logical success)@endcode
122 !!
123 !! Converts a string to integer with explicit base handling.
124 !! Supports base 2, 8, 10, 16 and prefixes `0x`, `0b`.
125 !! @param[in] str String to convert
126 !! @param[inout] base 0 = auto-detect, otherwise forces given base
127 !! @param[out] success Optional flag indicating successful conversion
128 !! @return Converted integer value
129 !!
130 !! <h2 class="groupheader"> Examples </h2>
131 !! The following demonstrate a call to the `strtol` interface.
132 !! @code{.f90}
133 !! integer :: i
134 !! logical :: success
135 !!
136 !! i = strtol(' 123', 0, success = res)
137 !! ! i = 123
138 !! @endcode
139 !!
140 !! <h2 class="groupheader"> Remarks </h2>
141 !! @ingroup group_operators
142 interface strtol
143 !! @cond
144 module procedure :: strtol_default
145 module procedure :: strtol_with_base
146 !! @endcond
147 end interface
148
149contains
150
151 !> Tokenizes a preprocessor expression into an array of token structures.
152 !! Handles whitespace, multi-character operators (`&&`, `||`, `==`, etc.),
153 !! the `defined` operator (with or without parentheses), numbers in various bases,
154 !! identifiers, and parentheses.
155 !! @param[in] expr Expression string to tokenize
156 !! @param[out] tokens Allocated array receiving the tokens
157 !! @param[out] ntokens Number of tokens produced
158 !!
159 !! @b Remarks
160 !! @ingroup group_token
161 subroutine tokenize(expr, tokens, ntokens)
162 character(*), intent(in) :: expr
163 type(token), allocatable, intent(out) :: tokens(:)
164 integer, intent(out) :: ntokens
165 !private
166 character(:), allocatable :: temp
167 integer :: i, pos, len_expr
168 logical :: in_word
169 logical, save :: in_comment
170
171 if (allocated(tokens)) deallocate(tokens)
172 allocate(tokens(max_tokens))
173 ntokens = 0
174 temp = trim(adjustl(expr)) // ' '
175 len_expr = len_trim(temp)
176 i = 1
177 in_word = .false.
178
179 do while (i <= len_expr)
180 if (temp(i:i) == ' ') then
181 i = i + 1
182 in_word = .false.
183 cycle
184 end if
185
186 if (.not. in_word) then
187 ntokens = ntokens + 1
188 if (ntokens > max_tokens) then
189 call printf(render(diagnostic_report(level_error, &
190 message='The maximum number of tokens has been reached', &
191 label=label_type('Too many tokens in expression.', 1, 1)), &
192 expr))
193 return
194 end if
195 in_word = .true.
196 end if
197
198 if (temp(i:i) == '(' .or. temp(i:i) == ')') then
199 tokens(ntokens)%value = temp(i:i)
200 tokens(ntokens)%type = parenthesis
201 tokens(ntokens)%start = i
202 i = i + 1
203 in_word = .false.
204 else if (temp(i:i + 1) == '&&' .or. temp(i:i + 1) == '||' .or. temp(i:i + 1) == '==' .or. &
205 temp(i:i + 1) == '!=' .or. temp(i:i + 1) == '<=' .or. temp(i:i + 1) == '>=') then
206 tokens(ntokens)%value = temp(i:i + 1)
207 tokens(ntokens)%type = operation
208 tokens(ntokens)%start = i
209 i = i + 2
210 in_word = .false.
211 else if (temp(i:i) == '!') then
212 tokens(ntokens)%value = temp(i:i)
213 tokens(ntokens)%type = operation
214 tokens(ntokens)%start = i
215 i = i + 1
216 in_word = .false.
217 else if (temp(i:i + 1) == '**') then
218 tokens(ntokens)%value = temp(i:i + 1)
219 tokens(ntokens)%type = operation
220 tokens(ntokens)%start = i
221 i = i + 2
222 in_word = .false.
223 else if (temp(i:i + 1) == '<<' .or. temp(i:i + 1) == '>>') then
224 tokens(ntokens)%value = temp(i:i + 1)
225 tokens(ntokens)%type = operation
226 tokens(ntokens)%start = i
227 i = i + 2
228 in_word = .false.
229 else if (temp(i:i) == '<' .or. temp(i:i) == '>' .or. temp(i:i) == '=' .or. &
230 temp(i:i) == '+' .or. temp(i:i) == '-' .or. temp(i:i) == '*' .or. &
231 temp(i:i) == '/' .or. temp(i:i) == '%' .or. &
232 temp(i:i) == '?' .or. temp(i:i) == ':') then
233 tokens(ntokens)%value = temp(i:i)
234 tokens(ntokens)%type = operation
235 tokens(ntokens)%start = i
236 i = i + 1
237 in_word = .false.
238 else if (temp(i:i) == '&' .or. temp(i:i) == '|' .or. temp(i:i) == '^' .or. &
239 temp(i:i) == '~') then
240 tokens(ntokens)%value = temp(i:i)
241 tokens(ntokens)%type = operation
242 tokens(ntokens)%start = i
243 i = i + 1
244 in_word = .false.
245 else if (starts_with(temp(i:), 'defined')) then
246 i = i + 7
247 do while (i <= len_expr .and. temp(i:i) == ' ')
248 i = i + 1
249 end do
250 if (i <= len_expr .and. temp(i:i) == '(') then
251 i = i + 1
252 pos = i
253 do while (pos <= len_expr .and. temp(pos:pos) /= ')')
254 pos = pos + 1
255 end do
256 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
257 tokens(ntokens)%type = defined
258 tokens(ntokens)%start = i
259 i = pos + 1
260 else
261 pos = i
262 do while (pos <= len_expr .and. temp(pos:pos) /= ' ')
263 pos = pos + 1
264 end do
265 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
266 tokens(ntokens)%type = defined
267 tokens(ntokens)%start = i
268 i = pos
269 end if
270 in_word = .false.
271 else if (is_typeless(temp(i:), pos)) then
272 pos = i + pos
273 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
274 tokens(ntokens)%type = number
275 tokens(ntokens)%start = i
276 i = pos
277 in_word = .false.
278 else if (is_digit(temp(i:i))) then
279 pos = i
280 do while (pos <= len_expr .and. is_digit(temp(pos:pos)))
281 pos = pos + 1
282 end do
283 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
284 tokens(ntokens)%type = number
285 tokens(ntokens)%start = i
286 i = pos
287 in_word = .false.
288 else
289 pos = i
290 do while (pos <= len_expr .and. temp(pos:pos) /= ' ' .and. &
291 temp(pos:pos) /= '(' .and. temp(pos:pos) /= ')')
292 pos = pos + 1
293 end do
294 tokens(ntokens)%value = trim(temp(i:pos - 1))
295 tokens(ntokens)%type = identifier
296 tokens(ntokens)%start = i
297 i = pos
298 in_word = .false.
299 end if
300 end do
301 end subroutine
302
303 !> Tests whether a single character is a decimal digit ('0'-'9').
304 !! @param[in] ch Character to test
305 !! @return .true. if ch is a digit
306 !!
307 !! @b Remarks
308 !! @ingroup group_token
309 logical elemental function is_digit(ch) result(res)
310 character(*), intent(in) :: ch
311
312 res = verify(ch, '0123456789') == 0
313 end function
314
315 !> Detects whether a string starts a typeless constant (hex, octal, binary).
316 !! Used to avoid treating them as identifiers during tokenization.
317 !! @param[in] str Input string starting at current position
318 !! @param[out] pos Length of the typeless constant (0 if not typeless)
319 !! @return .true. if the prefix is a valid typeless constant in non-base-10
320 !!
321 !! @b Remarks
322 !! @ingroup group_token
323 logical function is_typeless(str, pos) result(res)
324 character(*), intent(in) :: str
325 integer, intent(out) :: pos
326 !private
327 integer :: i, base, n
328
329 pos = 0; base = 0; n = len(str)
330 do i = 1, n
331 if (verify(str(i:i), '0123456789xXaAbBcCdDeEfF') /= 0) then
332 pos = i
333 exit
334 end if
335 end do
336 if (pos > 0) i = strtol(str(:pos - 1), base, success=res)
337 if (base == 10) res = .false.
338 end function
339
340 !> Implementation of strtol function
341 integer function strtol_default(str, success) result(val)
342 character(*), intent(in) :: str
343 logical, intent(out), optional :: success
344 !private
345 integer :: base
346
347 base = 0
348 val = strtol_with_base(str, base, success)
349 end function
350
351 !> Implementation of strtol function with a base argument.
352 integer function strtol_with_base(str, base, success) result(val)
353 character(*), intent(in) :: str
354 integer, intent(inout) :: base
355 logical, intent(out), optional :: success
356 !private
357 integer :: i, len, digit
358 character :: c
359 logical :: is_valid, isdigit, is_lower_hex, is_upper_hex
360 character(len=len_trim(str)) :: work_str
361
362 val = 0; is_valid = .true.
363 work_str = adjustl(str) ! Remove leading spaces
364 len = len_trim(work_str)
365
366 ! Handle base 0 (auto-detect)
367 if (base == 0) then
368 if (len >= 2) then
369 if (work_str(1:2) == '0x' .or. work_str(1:2) == '0X') then
370 base = 16
371 work_str = work_str(3:len)
372 len = len - 2
373 else if (work_str(1:2) == '0b' .or. work_str(1:2) == '0B') then
374 base = 2
375 work_str = work_str(3:len)
376 len = len - 2
377 else
378 if (len > 1) then
379 if (work_str(1:1) == '0') then
380 base = 8
381 else
382 base = 10
383 end if
384 else
385 base = 10
386 end if
387 end if
388 else
389 base = 10
390 end if
391 end if
392
393 ! Validate base
394 if (base /= 2 .and. base /= 8 .and. base /= 10 .and. base /= 16) then
395 is_valid = .false.
396 if (present(success)) success = .false.
397 return
398 end if
399
400 ! Process each character
401 do i = 1, len
402 c = work_str(i:i)
403 digit = -1 ! Invalid digit marker
404
405 ! Convert character to digit
406 isdigit = c >= '0' .and. c <= '9'
407 if (isdigit) digit = ichar(c) - ichar('0')
408
409 is_lower_hex = base == 16 .and. c >= 'a' .and. c <= 'f'
410 if (is_lower_hex) digit = ichar(c) - ichar('a') + 10
411
412 is_upper_hex = base == 16 .and. c >= 'A' .and. c <= 'F'
413 if (is_upper_hex) digit = ichar(c) - ichar('A') + 10
414
415 ! Check if digit is valid
416 if (digit == -1) then
417 is_valid = .false.
418 exit
419 end if
420 if (digit >= base) then
421 is_valid = .false.
422 exit
423 end if
424
425 ! Check for potential overflow (approximate for 32-bit integer)
426 if (val > (huge(val) - digit) / base) then
427 is_valid = .false.
428 exit
429 end if
430
431 ! Accumulate value
432 val = val * base + digit
433 end do
434
435 ! Set success flag if provided
436 if (present(success)) success = is_valid
437 end function
438end module
integer, parameter, public max_tokens
Maximum number of tokens per line, set to 100 for efficient tokenization.
Definition constants.f90:23
logical function, public starts_with(str, arg1, idx)
Checks if a string starts with a given prefix Returns .true. if the string str (after trimming leadin...
Definition string.f90:498
integer, parameter, public tokens_enum
Kind parameter for token type enumeration. Values are (unknown, number, operation,...
Definition token.f90:87
subroutine, public tokenize(expr, tokens, ntokens)
Tokenizes a preprocessor expression into an array of token structures. Handles whitespace,...
Definition token.f90:162
logical function is_typeless(str, pos)
Detects whether a string starts a typeless constant (hex, octal, binary). Used to avoid treating them...
Definition token.f90:324
logical elemental function is_digit(ch)
Tests whether a single character is a decimal digit ('0'-'9').
Definition token.f90:310
Interface to render diagnostic messages and labels.
Definition logging.f90:185
Return the trimmed length of a string.
Definition string.f90:141
Return the trimmed string.
Definition string.f90:149
Converts a string to integer.
Definition token.f90:142
Definition of diagnostic message.
Definition logging.f90:269
Represents text as a sequence of ASCII code units. The derived type wraps an allocatable character ar...
Definition logging.f90:246
Represents a single token in a parsed expression. Holds the string value of the token and its classif...
Definition token.f90:106