Loading...
Searching...
No Matches
token.f90
Go to the documentation of this file.
1!> @file
2!! @defgroup group_token Token
3!! @brief Token classification and representation for expression parsing in fpx
4!!
5!! This module provides the lightweight but robust token infrastructure used by the
6!! fpx preprocessor when evaluating constant expressions in `#if` / `#elif` directives.
7!!
8!! It defines:
9!! - A clean enumeration of token kinds (`tokens_enum`)
10!! - A simple `token` derived type that carries both the lexical value and its semantic category
11!!
12!! These types are used internally by `evaluate_expression()` (from `fpx_token`) to parse
13!! and compute `#if DEBUG > 1 && defined(USE_MPI)`-style conditions.
14!!
15!! @par Key design goals
16!! - Minimal memory footprint
17!! - Clear separation between lexical scanning and semantic interpretation
18!! - Easy extensibility for future operators or functions
19!!
20!! @par Examples
21!!
22!! 1. Manual token creation (mostly for testing/debugging):
23!! @code{.f90}
24!! use fpx_token
25!!
26!! type(token) :: t1, t2, t3
27!!
28!! t1 = token('42', number) ! numeric literal
29!! t2 = token('DEBUG', identifier) ! macro name
30!! t3 = token('>', operator) ! comparison operator
31!!
32!! print *, 'Token: ', t1%value, ' type=', t1%type ! → 42 type=0
33!! @endcode
34!!
35!! 2. Typical internal usage during `#if` evaluation:
36!! @code{.f90}
37!! ! (inside evaluate_expression)
38!! tokens = tokenize('defined(USE_MPI) && MPI_VERSION >= 3')
39!! ! tokens(1) → value=vdefined' type=identifier
40!! ! tokens(2) → value='(' type=parenthesis
41!! ! tokens(3) → value='USE_MPI' type=identifier
42!! ! ...
43!! @endcode
44!!
45!! @par Token kinds overview
46!! | Enumerator | Value | Meaning |
47!! |--------------|-------|----------------------------------------------|
48!! | `unknown` | -1 | Invalid / unrecognized token |
49!! | `number` | 0 | Integer or floating-point literal |
50!! | `operator` | 1 | ?:, +, -, *, /, ==, !=, &&, ||, !, >, <, etc.|
51!! | `identifier` | 2 | Macro name or function name (e.g. `defined`) |
52!! | `parenthesis`| 3 | `(` or `)` |
53!! | `defined` | 4 | Special keyword `defined` (treated specially)|
54!!
55module fpx_token
56 use fpx_constants, only: max_tokens
57 use fpx_string
58 use fpx_logging
59
60 implicit none; private
61
62 public :: tokenize, &
63 strtol, &
65 unknown, &
66 number, &
67 operator, &
68 identifier, &
69 parenthesis, &
70 defined
71
72 !> @brief Token kinds used in expression parsing.
73 !! Enumeration defining the possible types of tokens recognized by the tokenizer.
74 !! @ingroup group_token
75 enum, bind(c)
76 enumerator :: unknown = -1
77 enumerator :: number = 0
78 enumerator :: operator = 1
79 enumerator :: identifier = 2
80 enumerator :: parenthesis = 3
81 enumerator :: defined = 4
82 end enum
83
84 !> @brief Kind parameter for token type enumeration. Values are (`unknown`, `number`, `operator`, `identifier`, `parenthesis`,
85 !! `defined`)
86 !! @ingroup group_token
87 integer, parameter :: tokens_enum = kind(unknown)
88
89 !> Represents a single token in a parsed expression.
90 !! Holds the string value of the token and its classified type.
91 !! <h2 class="groupheader">Constructors</h2>
92 !! Initializes a new instance of the @link fpx_token::token token @endlink class
93 !! <h3>token(character(:), integer)</h3>
94 !! @verbatim type(token) function token(character(:) value, integer type) @endverbatim
95 !!
96 !! @param[in] value
97 !! @param[in] type
98 !!
99 !! @b Examples
100 !! @code{.f90}
101 !! a = token('9', number)
102 !! @endcode
103 !!
104 !! <h2 class="groupheader">Remarks</h2>
105 !! @ingroup group_token
106 type, public :: token
107 character(:), allocatable :: value !< Token value
108 integer(tokens_enum) :: type !< Token type, from the enum @ref tokens_enum.
109 end type
110
111 !> Converts a string to integer.
112 !! <h2 class="groupheader">Methods</h2>
113 !!
114 !! @code{.f90}strtol(character(*) str, (optional) logical success)@endcode
115 !!
116 !! @param[in] str String to convert
117 !! @param[out] success Optional flag indicating successful conversion
118 !! @return Converted integer value
119 !!
120 !! @code{.f90}strtol(character(*) str, integer base, (optional) logical success)@endcode
121 !!
122 !! Converts a string to integer with explicit base handling.
123 !! Supports base 2, 8, 10, 16 and prefixes `0x`, `0b`.
124 !! @param[in] str String to convert
125 !! @param[inout] base 0 = auto-detect, otherwise forces given base
126 !! @param[out] success Optional flag indicating successful conversion
127 !! @return Converted integer value
128 !!
129 !! <h2 class="groupheader"> Examples </h2>
130 !! The following demonstrate a call to the `strtol` interface.
131 !! @code{.f90}
132 !! integer :: i
133 !! logical :: success
134 !!
135 !! i = strtol(' 123', 0, success = res)
136 !! ! i = 123
137 !! @endcode
138 !!
139 !! <h2 class="groupheader"> Remarks </h2>
140 !! @ingroup group_operators
141 interface strtol
142 !! @cond
143 module procedure :: strtol_default
144 module procedure :: strtol_with_base
145 !! @endcond
146 end interface
147
148contains
149
150 !> Tokenizes a preprocessor expression into an array of token structures.
151 !! Handles whitespace, multi-character operators (`&&`, `||`, `==`, etc.),
152 !! the `defined` operator (with or without parentheses), numbers in various bases,
153 !! identifiers, and parentheses.
154 !! @param[in] expr Expression string to tokenize
155 !! @param[out] tokens Allocated array receiving the tokens
156 !! @param[out] ntokens Number of tokens produced
157 !!
158 !! @b Remarks
159 !! @ingroup group_token
160 subroutine tokenize(expr, tokens, ntokens)
161 character(*), intent(in) :: expr
162 type(token), allocatable, intent(out) :: tokens(:)
163 integer, intent(out) :: ntokens
164 !private
165 character(:), allocatable :: temp
166 integer :: i, pos, len_expr
167 logical :: in_word
168 logical, save :: in_comment
169
170 if (allocated(tokens)) deallocate(tokens)
171 allocate(tokens(max_tokens))
172 ntokens = 0
173 temp = trim(adjustl(expr)) // ' '
174 len_expr = len_trim(temp)
175 i = 1
176 in_word = .false.
177
178 do while (i <= len_expr)
179 if (temp(i:i) == ' ') then
180 i = i + 1
181 in_word = .false.
182 cycle
183 end if
184
185 if (.not. in_word) then
186 ntokens = ntokens + 1
187 if (ntokens > max_tokens) then
188 if (verbose) print *, "Error: Too many tokens in expression"
189 return
190 end if
191 in_word = .true.
192 end if
193
194 if (temp(i:i) == '(' .or. temp(i:i) == ')') then
195 tokens(ntokens)%value = temp(i:i)
196 tokens(ntokens)%type = parenthesis
197 i = i + 1
198 in_word = .false.
199 else if (temp(i:i + 1) == '&&' .or. temp(i:i + 1) == '||' .or. temp(i:i + 1) == '==' .or. &
200 temp(i:i + 1) == '!=' .or. temp(i:i + 1) == '<=' .or. temp(i:i + 1) == '>=') then
201 tokens(ntokens)%value = temp(i:i + 1)
202 tokens(ntokens)%type = operator
203 i = i + 2
204 in_word = .false.
205 else if (temp(i:i) == '!') then
206 tokens(ntokens)%value = temp(i:i)
207 tokens(ntokens)%type = operator
208 i = i + 1
209 in_word = .false.
210 else if (temp(i:i + 1) == '**') then
211 tokens(ntokens)%value = temp(i:i + 1)
212 tokens(ntokens)%type = operator
213 i = i + 2
214 in_word = .false.
215 else if (temp(i:i + 1) == '<<' .or. temp(i:i + 1) == '>>') then
216 tokens(ntokens)%value = temp(i:i + 1)
217 tokens(ntokens)%type = operator
218 i = i + 2
219 in_word = .false.
220 else if (temp(i:i) == '<' .or. temp(i:i) == '>' .or. temp(i:i) == '=' .or. &
221 temp(i:i) == '+' .or. temp(i:i) == '-' .or. temp(i:i) == '*' .or. &
222 temp(i:i) == '/' .or. temp(i:i) == '%' .or. &
223 temp(i:i) == '?' .or. temp(i:i) == ':') then
224 tokens(ntokens)%value = temp(i:i)
225 tokens(ntokens)%type = operator
226 i = i + 1
227 in_word = .false.
228 else if (temp(i:i) == '&' .or. temp(i:i) == '|' .or. temp(i:i) == '^' .or. &
229 temp(i:i) == '~') then
230 tokens(ntokens)%value = temp(i:i)
231 tokens(ntokens)%type = operator
232 i = i + 1
233 in_word = .false.
234 else if (starts_with(temp(i:), 'defined')) then
235 i = i + 7
236 do while (i <= len_expr .and. temp(i:i) == ' ')
237 i = i + 1
238 end do
239 if (i <= len_expr .and. temp(i:i) == '(') then
240 i = i + 1
241 pos = i
242 do while (pos <= len_expr .and. temp(pos:pos) /= ')')
243 pos = pos + 1
244 end do
245 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
246 tokens(ntokens)%type = defined
247 i = pos + 1
248 else
249 pos = i
250 do while (pos <= len_expr .and. temp(pos:pos) /= ' ')
251 pos = pos + 1
252 end do
253 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
254 tokens(ntokens)%type = defined
255 i = pos
256 end if
257 in_word = .false.
258 else if (is_typeless(temp(i:), pos)) then
259 pos = i + pos
260 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
261 tokens(ntokens)%type = number
262 i = pos
263 in_word = .false.
264 else if (is_digit(temp(i:i))) then
265 pos = i
266 do while (pos <= len_expr .and. is_digit(temp(pos:pos)))
267 pos = pos + 1
268 end do
269 tokens(ntokens)%value = trim(adjustl(temp(i:pos - 1)))
270 tokens(ntokens)%type = number
271 i = pos
272 in_word = .false.
273 else
274 pos = i
275 do while (pos <= len_expr .and. temp(pos:pos) /= ' ' .and. &
276 temp(pos:pos) /= '(' .and. temp(pos:pos) /= ')')
277 pos = pos + 1
278 end do
279 tokens(ntokens)%value = trim(temp(i:pos - 1))
280 tokens(ntokens)%type = identifier
281 i = pos
282 in_word = .false.
283 end if
284 end do
285
286 if (verbose) print *, "Tokens for '", trim(expr), "':"
287 do i = 1, ntokens
288 if (verbose) print *, " Token ", i, ": ", trim(tokens(i)%value), " (type ", tokens(i)%type, ")"
289 end do
290 end subroutine
291
292 !> Tests whether a single character is a decimal digit ('0'-'9').
293 !! @param[in] ch Character to test
294 !! @return .true. if ch is a digit
295 !!
296 !! @b Remarks
297 !! @ingroup group_token
298 logical elemental function is_digit(ch) result(res)
299 character(*), intent(in) :: ch
300
301 res = verify(ch, '0123456789') == 0
302 end function
303
304 !> Detects whether a string starts a typeless constant (hex, octal, binary).
305 !! Used to avoid treating them as identifiers during tokenization.
306 !! @param[in] str Input string starting at current position
307 !! @param[out] pos Length of the typeless constant (0 if not typeless)
308 !! @return .true. if the prefix is a valid typeless constant in non-base-10
309 !!
310 !! @b Remarks
311 !! @ingroup group_token
312 logical function is_typeless(str, pos) result(res)
313 character(*), intent(in) :: str
314 integer, intent(out) :: pos
315 !private
316 integer :: i, base, n
317
318 pos = 0; base = 0; n = len(str)
319 do i = 1, n
320 if (verify(str(i:i), '0123456789xXaAbBcCdDeEfF') /= 0) then
321 pos = i
322 exit
323 end if
324 end do
325 if (pos > 0) i = strtol(str(:pos - 1), base, success=res)
326 if (base == 10) res = .false.
327 end function
328
329 !> Implementation of strtol function
330 integer function strtol_default(str, success) result(val)
331 character(*), intent(in) :: str
332 logical, intent(out), optional :: success
333 !private
334 integer :: base
335
336 base = 0
337 val = strtol_with_base(str, base, success)
338 end function
339
340 !> Implementation of strtol function with a base argument.
341 integer function strtol_with_base(str, base, success) result(val)
342 character(*), intent(in) :: str
343 integer, intent(inout) :: base
344 logical, intent(out), optional :: success
345 !private
346 integer :: i, len, digit
347 character :: c
348 logical :: is_valid, isdigit, is_lower_hex, is_upper_hex
349 character(len=len_trim(str)) :: work_str
350
351 val = 0; is_valid = .true.
352 work_str = adjustl(str) ! Remove leading spaces
353 len = len_trim(work_str)
354
355 ! Handle base 0 (auto-detect)
356 if (base == 0) then
357 if (len >= 2) then
358 if (work_str(1:2) == '0x' .or. work_str(1:2) == '0X') then
359 base = 16
360 work_str = work_str(3:len)
361 len = len - 2
362 else if (work_str(1:2) == '0b' .or. work_str(1:2) == '0B') then
363 base = 2
364 work_str = work_str(3:len)
365 len = len - 2
366 else
367 if (len > 1) then
368 if (work_str(1:1) == '0') then
369 base = 8
370 else
371 base = 10
372 end if
373 else
374 base = 10
375 end if
376 end if
377 else
378 base = 10
379 end if
380 end if
381
382 ! Validate base
383 if (base /= 2 .and. base /= 8 .and. base /= 10 .and. base /= 16) then
384 is_valid = .false.
385 if (present(success)) success = .false.
386 return
387 end if
388
389 ! Process each character
390 do i = 1, len
391 c = work_str(i:i)
392 digit = -1 ! Invalid digit marker
393
394 ! Convert character to digit
395 isdigit = c >= '0' .and. c <= '9'
396 if (isdigit) digit = ichar(c) - ichar('0')
397
398 is_lower_hex = base == 16 .and. c >= 'a' .and. c <= 'f'
399 if (is_lower_hex) digit = ichar(c) - ichar('a') + 10
400
401 is_upper_hex = base == 16 .and. c >= 'A' .and. c <= 'F'
402 if (is_upper_hex) digit = ichar(c) - ichar('A') + 10
403
404 ! Check if digit is valid
405 if (digit == -1) then
406 is_valid = .false.
407 exit
408 end if
409 if (digit >= base) then
410 is_valid = .false.
411 exit
412 end if
413
414 ! Check for potential overflow (approximate for 32-bit integer)
415 if (val > (huge(val) - digit) / base) then
416 is_valid = .false.
417 exit
418 end if
419
420 ! Accumulate value
421 val = val * base + digit
422 end do
423
424 ! Set success flag if provided
425 if (present(success)) success = is_valid
426 end function
427end module
integer, parameter, public max_tokens
Maximum number of tokens per line, set to 100 for efficient tokenization.
Definition constants.f90:23
logical, public verbose
Master switch for verbose diagnostic output Default value is .false. (quiet mode)....
Definition logging.f90:56
logical function, public starts_with(str, arg1, idx)
Checks if a string starts with a given prefix Returns .true. if the string str (after trimming leadin...
Definition string.f90:485
integer, parameter, public tokens_enum
Kind parameter for token type enumeration. Values are (unknown, number, operator, identifier,...
Definition token.f90:87
subroutine, public tokenize(expr, tokens, ntokens)
Tokenizes a preprocessor expression into an array of token structures. Handles whitespace,...
Definition token.f90:161
logical function is_typeless(str, pos)
Detects whether a string starts a typeless constant (hex, octal, binary). Used to avoid treating them...
Definition token.f90:313
logical elemental function is_digit(ch)
Tests whether a single character is a decimal digit ('0'-'9').
Definition token.f90:299
Return the trimmed length of a string.
Definition string.f90:138
Return the trimmed string.
Definition string.f90:146
Converts a string to integer.
Definition token.f90:141
Represents a single token in a parsed expression. Holds the string value of the token and its classif...
Definition token.f90:106