% \iffalse meta-comment
%
%% File: l3text-map.dtx
%
% Copyright (C) 2022-2025 The LaTeX Project
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "l3kernel bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/latex3
%
% for those people who are interested.
%
%<*driver>
\documentclass[full,kernel]{l3doc}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
% \title{^^A
%   The \pkg{l3text-map} module\\ Text processing (mapping)^^A
% }
%
% \author{^^A
%  The \LaTeX{} Project\thanks
%    {^^A
%      E-mail:
%        \href{mailto:latex-team@latex-project.org}
%          {latex-team@latex-project.org}^^A
%    }^^A
% }
%
% \date{Released 2025-01-18}
%
% \maketitle
%
% \begin{documentation}
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3text-map} implementation}
%
%    \begin{macrocode}
%<*package>
%    \end{macrocode}
%
%    \begin{macrocode}
%<@@=text>
%    \end{macrocode}
%
% \subsection{Mapping to text}
%
% \begin{macro}[EXP]{\text_map_function:nN}
% \begin{macro}[EXP]{\@@_map_function:nN}
% \begin{macro}[EXP]{\@@_map_loop:Nnw}
% \begin{macro}[EXP]{\@@_map_group:Nnn}
% \begin{macro}[EXP]{\@@_map_space:Nnw}
% \begin{macro}[EXP]{\@@_map_N_type:NnN}
% \begin{macro}[EXP]{\@@_map_codepoint:Nnn}
% \begin{macro}[EXP]{\@@_map_CR:Nnw}
% \begin{macro}[EXP]{\@@_map_CR:NnN}
% \begin{macro}[EXP]{\@@_map_class:Nnnn}
% \begin{macro}[EXP]{\@@_map_class:nNnnn}
% \begin{macro}[EXP]{\@@_map_class_loop:Nnnnw}
% \begin{macro}[EXP]{\@@_map_class_end:nw}
% \begin{macro}[EXP]
%   {
%     \@@_map_Control:Nnn     ,
%     \@@_map_Extend:Nnn      ,
%     \@@_map_SpacingMark:Nnn ,
%     \@@_map_Prepend:Nnn     ,
%     \@@_map_Prepend_aux:Nnn
%   }
% \begin{macro}[EXP]{\@@_map_Prepend:nNnn}
% \begin{macro}[EXP]{\@@_map_Prepend_loop:Nnnw}
% \begin{macro}[EXP]
%   {
%     \@@_map_not_Control:Nnn     ,
%     \@@_map_not_Extend:Nnn      ,
%     \@@_map_not_SpacingMark:Nnn ,
%     \@@_map_not_Prepend:Nnn     ,
%     \@@_map_not_L:Nnn           ,
%     \@@_map_not_LV:Nnn          ,
%     \@@_map_not_V:Nnn           ,
%     \@@_map_not_LVT:Nnn         ,
%     \@@_map_not_T:Nnn
%   }
% \begin{macro}[EXP]
%   {
%     \@@_map_L:Nnn   ,
%     \@@_map_LV:Nnn  ,
%     \@@_map_V:Nnn   ,
%     \@@_map_LVT:Nnn ,
%     \@@_map_T:Nnn
%   }
% \begin{macro}[EXP]{\@@_map_hangul:Nnnw}
% \begin{macro}[EXP]{\@@_map_hangul:NnnN}
% \begin{macro}[EXP]{\@@_map_hangul:Nnnn}
% \begin{macro}[EXP]{\@@_map_hangul_aux:Nnnnw}
% \begin{macro}[EXP]{\@@_map_hangul:nNnnnw}
% \begin{macro}[EXP]{\@@_map_hangul_loop:Nnnnnw}
% \begin{macro}[EXP]{\@@_map_hangul_next:Nnnn}
% \begin{macro}[EXP]{\@@_map_hangul_end:nw}
% \begin{macro}[EXP]
%   {
%     \@@_map_hangul_L:Nnn   ,
%     \@@_map_hangul_LV:Nnn  ,
%     \@@_map_hangul_V:Nnn   ,
%     \@@_map_hangul_LVT:Nnn ,
%     \@@_map_hangul_T:Nnn
%   }
% \begin{macro}[EXP]
%   {\@@_map_Regional_Indicator:Nnn, \@@_map_Regional_Indicator_aux:Nnn}
% \begin{macro}[EXP]{\@@_map_lookahead:NnNw}
% \begin{macro}[EXP]{\@@_map_lookahead:NnNN}
% \begin{macro}[EXP]{\@@_map_output:Nn}
% \begin{macro}[EXP]{\text_map_break:}
% \begin{macro}[EXP]{\text_map_break:n}
%   The standard lead-off for an action loop.
%    \begin{macrocode}
\cs_new:Npn \text_map_function:nN #1#2
  { \exp_args:Ne \@@_map_function:nN { \text_expand:n {#1} } #2 }
\cs_new:Npn \@@_map_function:nN #1#2
  {
    \@@_map_loop:Nnw #2 { } #1
      \q_@@_recursion_tail \q_@@_recursion_stop
    \prg_break_point:Nn \text_map_break: { }
  }
%    \end{macrocode}
%  The standard set up for an \enquote{action} loop. Groups are handled by
%  recursion, spaces are treated similarly: both count as grapheme boundaries.
%  For \texttt{N}-type tokens, we filter out control sequences (again
%  a boundary), then move on to further analysis.
%    \begin{macrocode}
\cs_new:Npn \@@_map_loop:Nnw #1#2#3 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#3}
      { \@@_map_N_type:NnN }
      {
        \tl_if_head_is_group:nTF {#3}
          { \@@_map_group:Nnn }
          { \@@_map_space:Nnw }
      }
    #1 {#2} #3 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_group:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    {
      \@@_map_loop:Nnw #1 { } #2
        \q_@@_recursion_tail \q_@@_recursion_stop
      \prg_break_point:Nn \text_map_break: { }
    }
    \@@_map_loop:Nnw #1 { }
  }
\use:e
  { \cs_new:Npn \exp_not:N \@@_map_space:Nnw #1#2 \c_space_tl }
  {
    \@@_map_output:Nn #1 {#2}
    #1 { ~ }
    \@@_map_loop:Nnw #1 { }
  }
\cs_new:Npn \@@_map_N_type:NnN #1#2#3
  {
    \@@_if_q_recursion_tail_stop_do:Nn #3
      {
        \@@_map_output:Nn #1 {#2}
        \text_map_break:
      }
    \token_if_cs:NTF #3
      {
        \@@_map_output:Nn #1 {#2}
        #1 {#3}
        \@@_map_loop:Nnw #1 { }
      }
      {
        \@@_codepoint_process:nN
          { \@@_map_codepoint:Nnn #1 {#2} } #3
      }
  }
%    \end{macrocode}
%  We pull out a few special cases here. Carriage returns case needs a bit of
%  context handling so has an auxiliary. Codepoint U+200D is the zero-width
%  joiner, which has no context to concern us: just don't break.
%    \begin{macrocode}
\cs_new:Npn \@@_map_codepoint:Nnn #1#2#3
  {
    \@@_codepoint_compare:nNnTF {#3} = { "0D }
      {
        \@@_map_output:Nn #1 {#2}
        \@@_map_CR:Nnw #1 {#3}
      }
      {
        \@@_codepoint_compare:nNnTF {#3} = { "200D }
          { \@@_map_loop:Nnw #1 {#2#3} }
          { \@@_map_class:Nnnn #1 {#2} {#3} { Control } }
      }
  }
%    \end{macrocode}
%   A carriage return is a boundary unless it is immediately followed by
%   a line feed, in which case that pair is a boundary.
%    \begin{macrocode}
\cs_new:Npn \@@_map_CR:Nnw #1#2#3 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#3}
      { \@@_map_CR:NnN #1 {#2} }
      {
        #1 {#2}
        \@@_map_loop:Nnw #1 { }
      }
        #3 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_CR:NnN #1#2#3
  {
    \@@_if_q_recursion_tail_stop_do:Nn #3
      {
        #1 {#2}
        \text_map_break:
      }
    \bool_lazy_and:nnTF
      { ! \token_if_cs_p:N #3 }
      { \int_compare_p:nNn { `#3 } = { "0A } }
      {
        \@@_map_output:Nn #1 {#2#3}
        \@@_map_loop:Nnw #1 { }
      }
      { \@@_map_loop:Nnw #1 { } #3 }
  }
%    \end{macrocode}
%   There are various classes of character, and we deal with them all in
%   the same general way. We need to example the relevant list of codepoints:
%   if we get a hit, then we do whatever the relevant action is. Otherwise
%   we loop, but only if the current codepoint could still match: the
%   loop stops early otherwise and we move forward.
%    \begin{macrocode}
\cs_new:Npn \@@_map_class:Nnnn #1#2#3#4
  {
    \exp_args:Nv \@@_map_class:nNnnn { c_@@_grapheme_ #4 _clist }
      #1 {#2} {#3} {#4}
  }
\cs_new:Npn \@@_map_class:nNnnn #1#2#3#4#5
  {
    \@@_map_class_loop:Nnnnw #2 {#3} {#4} {#5}
      #1 , \q_@@_recursion_tail .. , \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_class_loop:Nnnnw #1#2#3#4 #5 .. #6 ,
  {
    \@@_if_q_recursion_tail_stop_do:nn {#5}
      { \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} }
    \@@_codepoint_compare:nNnTF {#3} < { "#5 }
      {
        \@@_map_class_end:nw
          { \use:c { @@_map_not_ #4 :Nnn } #1 {#2} {#3} }
      }
      {
        \@@_codepoint_compare:nNnTF {#3} > { "#6 }
          { \@@_map_class_loop:Nnnnw #1 {#2} {#3} {#4} }
          {
            \@@_map_class_end:nw
              { \use:c { @@_map_ #4 :Nnn } #1 {#2} {#3} }
          }
      }
  }
\cs_new:Npn \@@_map_class_end:nw #1#2 \q_@@_recursion_stop {#1}
%    \end{macrocode}
%   Break before \emph{and} after.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Control:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    \@@_map_output:Nn #1 {#3}
    \@@_map_loop:Nnw #1 { }
  }
%    \end{macrocode}
%   Keep collecting.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Extend:Nnn #1#2#3
  { \@@_map_loop:Nnw #1 {#2#3} }
\cs_new_eq:NN \@@_map_SpacingMark:Nnn \@@_map_Extend:Nnn
%    \end{macrocode}
%   Outputting anything earlier, the combine with what follows. The only
%   exclusions are control characters.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Prepend:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    \@@_map_lookahead:NnNw #1 {#3} \@@_map_Prepend_aux:Nnn
  }
\cs_new:Npn \@@_map_Prepend_aux:Nnn #1#2#3
  {
    \bool_lazy_or:nnTF
      { \@@_codepoint_compare_p:nNn {#3} = { "0A } }
      { \@@_codepoint_compare_p:nNn {#3} = { "0D } }
      {
        #1 {#2}
        \@@_map_loop:Nnw #1 {#3}
      }
      {
        \exp_args:NV \@@_map_Prepend:nNnn
          \c_@@_grapheme_Control_clist
          #1 {#2} {#3}
      }
  }
\cs_new:Npn \@@_map_Prepend:nNnn #1#2#3#4
  {
    \@@_map_Prepend_loop:Nnnw #2 {#3} {#4}
      #1 , \q_@@_recursion_tail .. , \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_Prepend_loop:Nnnw #1#2#3 #4 .. #5 ,
  {
    \@@_if_q_recursion_tail_stop_do:nn {#4}
      { \@@_map_loop:Nnw #1 {#2#3} }
    \@@_codepoint_compare:nNnTF {#3} < { "#4 }
      {
        \@@_map_class_end:nw
          { \@@_map_loop:Nnw #1 {#2#3} }
      }
      {
        \@@_codepoint_compare:nNnTF {#3} > { "#5 }
          { \@@_map_Prepend_loop:Nnnw #1 {#2} {#3} }
          {
            \@@_map_class_end:nw
              { \@@_map_loop:Nnw #1 {#2} #3 }
          }
      }
  }
%    \end{macrocode}
%   Dealing with end-of-class is done such that we can be flexible.
%    \begin{macrocode}
\cs_new:Npn \@@_map_not_Control:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { Extend } }
\cs_new:Npn \@@_map_not_Extend:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { SpacingMark } }
\cs_new:Npn \@@_map_not_SpacingMark:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { Prepend } }
\cs_new:Npn \@@_map_not_Prepend:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { L } }
\cs_new:Npn \@@_map_not_L:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { LV } }
\cs_new:Npn \@@_map_not_LV:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { V } }
\cs_new:Npn \@@_map_not_V:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { LVT } }
\cs_new:Npn \@@_map_not_LVT:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { T } }
\cs_new:Npn \@@_map_not_T:Nnn #1#2#3
  { \@@_map_class:Nnnn #1 {#2} {#3} { Regional_Indicator } }
\cs_new:Npn \@@_map_not_Regional_Indicator:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    \@@_map_loop:Nnw #1 {#3}
  }
%    \end{macrocode}
%   Hangul needs additional treatment. First we have to deal with
%   the start-of-Hangul position: output what we had up to now, then
%   move the specialist handler. The idea here is to pick off the
%   different codepoint types one at a time, tracking what else can be
%   considered at each stage until we hit the end of the viable types.
%   Other than that, we just keep building up the Hangul codepoints
%   using a dedicated version of the loop from above.
%    \begin{macrocode}
\cs_new:Npn \@@_map_L:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    \@@_map_hangul:Nnnw
      #1 {#3} { L ; V ; LV ; LVT }
  }
\cs_new:Npn \@@_map_LV:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    \@@_map_hangul:Nnnw
      #1 {#3} { V ; T }
  }
\cs_new_eq:NN \@@_map_V:Nnn \@@_map_LV:Nnn
\cs_new:Npn \@@_map_LVT:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    \@@_map_hangul:Nnnw
      #1 {#3} { T }
  }
\cs_new_eq:NN \@@_map_T:Nnn \@@_map_LVT:Nnn
\cs_new:Npn \@@_map_hangul:Nnnw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_hangul:NnnN #1 {#2} {#3} }
      {
        #1 {#2}
        \@@_map_loop:Nnw #1 { }
      }
    #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_hangul:NnnN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4
      {
        #1 {#2}
        \text_map_break:
      }
    \token_if_cs:NTF #4
      {
        #1 {#2}
        \@@_map_loop:Nnw #1 { }
      }
      {
        \@@_codepoint_process:nN
          { \@@_map_hangul:Nnnn #1 {#2} {#3} } #4
      }
  }
\cs_new:Npn \@@_map_hangul:Nnnn #1#2#3#4
  {
    \@@_map_hangul_aux:Nnnw #1 {#2} {#4}
      #3 ; \q_recursion_tail ; \q_recursion_stop
  }
\cs_new:Npn \@@_map_hangul_aux:Nnnw #1#2#3#4 ;
  {
    \quark_if_recursion_tail_stop_do:nn {#4}
      { \@@_map_loop:Nnw #1 {#2} #3 }
    \exp_args:Nv \@@_map_hangul:nNnnnw { c_@@_grapheme_ #4 _clist }
      #1 {#2} {#3} {#4}
  }
\cs_new:Npn \@@_map_hangul:nNnnnw #1#2#3#4#5#6  \q_recursion_stop
  {
    \@@_map_hangul_loop:Nnnnnw #2 {#3} {#4} {#5} {#6}
      #1 , \q_@@_recursion_tail .. , \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_hangul_loop:Nnnnnw #1#2#3#4#5 #6 .. #7 ,
  {
    \@@_if_q_recursion_tail_stop_do:nn {#6}
      { \@@_map_hangul_next:Nnnn #1 {#2} {#3} {#5} }
    \@@_codepoint_compare:nNnTF {#3} < { "#6 }
      {
        \@@_map_hangul_end:nw
          { \@@_map_hangul_next:Nnnn #1 {#2} {#3} {#5} }
      }
      {
        \@@_codepoint_compare:nNnTF {#3} > { "#7 }
          { \@@_map_hangul_loop:Nnnnnw #1 {#2} {#3} {#4} {#5} }
          {
            \@@_map_hangul_end:nw
              { \use:c { @@_map_hangul_ #4 :Nnn } #1 {#2} {#3} }
          }
      }
  }
\cs_new:Npn \@@_map_hangul_next:Nnnn #1#2#3#4
  { \@@_map_hangul_aux:Nnnw #1 {#2} {#3} #4 \q_recursion_stop }
\cs_new:Npn \@@_map_hangul_end:nw #1#2 \q_@@_recursion_stop {#1}
\cs_new:Npn \@@_map_hangul_L:Nnn #1#2#3
  {
    \@@_map_hangul:Nnnw
      #1 {#2#3} { L V { LV } { LVT } }
  }
\cs_new:Npn \@@_map_hangul_LV:Nnn #1#2#3
  {
    \@@_map_hangul:Nnnw
      #1 {#2#3} { VT }
  }
\cs_new_eq:NN \@@_map_hangul_V:Nnn \@@_map_hangul_LV:Nnn
\cs_new:Npn \@@_map_hangul_LVT:Nnn #1#2#3
  {
    \@@_map_hangul:Nnnw
      #1 {#2#3} { T }
  }
\cs_new_eq:NN \@@_map_hangul_T:Nnn \@@_map_hangul_LVT:Nnn
%    \end{macrocode}
%  The Regional Indicator rule means looking ahead and dealing with the
%  case where there are two in a row. So we use a look ahead to pick them
%  off. As there is only one range the values are hard-coded.
%    \begin{macrocode}
\cs_new:Npn \@@_map_Regional_Indicator:Nnn #1#2#3
  {
    \@@_map_output:Nn #1 {#2}
    \@@_map_lookahead:NnNw #1 {#3} \@@_map_Regional_Indicator_aux:Nnn
  }
\cs_new:Npn \@@_map_Regional_Indicator_aux:Nnn #1#2#3
  {
    \bool_lazy_or:nnTF
      { \@@_codepoint_compare_p:nNn {#3} < { "1F1E6 } }
      { \@@_codepoint_compare_p:nNn {#3} > { "1F1FF } }
      {
        \@@_map_loop:Nnw #1 {#2} #3
      }
      { \@@_map_loop:Nnw #1 {#2#3} }
  }
%    \end{macrocode}
%   A generic loop-ahead setup.
%    \begin{macrocode}
\cs_new:Npn \@@_map_lookahead:NnNw #1#2#3#4 \q_@@_recursion_stop
  {
    \tl_if_head_is_N_type:nTF {#4}
      { \@@_map_lookahead:NnNN #1 {#2} #3 }
      { \@@_map_loop:Nnw #1 {#2} }
    #4 \q_@@_recursion_stop
  }
\cs_new:Npn \@@_map_lookahead:NnNN #1#2#3#4
  {
    \@@_if_q_recursion_tail_stop_do:Nn #4 { #1 {#2} }
    \token_if_cs:NTF #4
      {
        #1 {#2}
        \@@_map_loop:Nnw #1 { }
      }
      { \@@_codepoint_process:nN { #3 #1 {#2} } }
        #4
  }
%    \end{macrocode}
%   For the end of the process.
%    \begin{macrocode}
\cs_new:Npn \@@_map_output:Nn #1#2
  { \tl_if_blank:nF {#2} { #1 {#2} } }
\cs_new:Npn \text_map_break:
  { \prg_map_break:Nn \text_map_break: { } }
\cs_new:Npn \text_map_break:n
  { \prg_map_break:Nn \text_map_break: }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\text_map_inline:nn}
%   The standard non-expandable inline version.
%    \begin{macrocode}
\cs_new_protected:Npn \text_map_inline:nn #1#2
  {
    \int_gincr:N \g__kernel_prg_map_int
    \cs_gset_protected:cpn
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w } ##1 {#2}
    \exp_args:Nnc \text_map_function:nN {#1}
      { @@_map_ \int_use:N \g__kernel_prg_map_int :w }
    \prg_break_point:Nn \text_map_break:
      { \int_gdecr:N \g__kernel_prg_map_int }
  }
%    \end{macrocode}
% \end{macro}
%
%    \begin{macrocode}
%</package>
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex