% \iffalse meta-comment % %% File: tagpdf-mc-luacode.dtx % % Copyright (C) 2019-2025 Ulrike Fischer % % It may be distributed and/or modified under the conditions of the % LaTeX Project Public License (LPPL), either version 1.3c of this % license or (at your option) any later version. The latest version % of this license is in the file % % https://www.latex-project.org/lppl.txt % % This file is part of the "tagpdf bundle" (The Work in LPPL) % and all files in that bundle must be distributed together. % % ----------------------------------------------------------------------- % % The development version of the bundle can be found at % % https://github.com/latex3/tagpdf % % for those people who are interested. %<*driver> \DocumentMetadata{} \documentclass{l3doc} \usepackage{array,booktabs,caption} \hypersetup{pdfauthor=Ulrike Fischer, pdftitle=tagpdf-mc module (tagpdf)} \begin{document} \DocInput{\jobname.dtx} \end{document} % % \fi % \title{^^A % The \pkg{tagpdf-mc-luacode} module\\ Code related to Marked Content (mc-chunks), luamode-specific ^^A % \\ Part of the tagpdf package % } % % \author{^^A % Ulrike Fischer\thanks % {^^A % E-mail: % \href{mailto:fischer@troubleshooting-tex.de} % {fischer@troubleshooting-tex.de}^^A % }^^A % } % % \date{Version 0.99l, released 2025-01-12} % \maketitle % \begin{implementation} % The code is split into three parts: code shared by all engines, % code specific to luamode and code not used by luamode. % % \section{Marked content code -- luamode code} % luamode uses attributes to mark mc-chunks. The two attributes used are % defined in the backend file. The backend also load the lua file, as it can contain % functions needed elsewhere. % The attributes for mc are global (between 0.6 and 0.81 they were local but this % was reverted). The attributes are setup only in lua, and one should use % the lua functions to set and get them.\\ % |g_@@_mc_type_attr|: the value represent the type\\ % |g_@@_mc_cnt_attr|: will hold the |\c@g_@@_MCID_abs_int value| % % Handling attribute needs a different system to number the page wise mcid's: % a |\tagmcbegin ... \tagmcend| pair no longer surrounds exactly one mc chunk: % it can be split at page breaks. % We know the included mcid(s) only after the ship out. % So for the |struct -> mcid| mapping we % need to record |struct -> mc-cnt| (in |\g_@@_mc_parenttree_prop| and/or a lua table % and at shipout |mc-cnt-> {mcid, mcid, ...}| % and when building the trees connect both. % % Key definitions are overwritten for luatex to store that data in lua-tables. % The data for the mc are in |ltx.@@.mc[absnum]|. % The fields of the table are:\\ % tag : the type (a string)\\ % raw : more properties (string)\\ % label: a string. \\ % artifact: the presence indicates an artifact, the value (string) is the type.\\ % kids: a array of tables\\ % |{1={kid=num2,page=pagenum1}, 2={kid=num2,page=pagenum2},...}|,\\ % this describes the chunks the mc has been split to by the traversing code\\ % parent: the number of the structure it is in. Needed to build the parent tree. % % \begin{macrocode} %<@@=tag> %<*luamode> \ProvidesExplPackage {tagpdf-mc-code-lua} {2025-01-12} {0.99l} {tagpdf - mc code only for the luamode } % %<*debug> \ProvidesExplPackage {tagpdf-debug-lua} {2025-01-12} {0.99l} {part of tagpdf - debugging code related to marking chunks - lua mode} % % \end{macrocode} % The main function which wanders through the shipout box to inject the literals. % if the new callback is there, it is used. % \begin{macrocode} %<*luamode> \hook_gput_code:nnn{begindocument}{tagpdf/mc} { \bool_if:NT\g__tag_active_space_bool { \lua_now:e { if~luatexbase.callbacktypes.pre_shipout_filter~then~ luatexbase.add_to_callback("pre_shipout_filter", function(TAGBOX)~ ltx.@@.func.space_chars_shipout(TAGBOX)~return~true~ end, "tagpdf")~ if~luatexbase.declare_callback_rule~then~ luatexbase.declare_callback_rule("pre_shipout_filter", "luaotfload.dvi", "after", "tagpdf") end~ end } \lua_now:e { if~luatexbase.callbacktypes.pre_shipout_filter~then~ token.get_next()~ end }\@secondoftwo\@gobble { \hook_gput_code:nnn{shipout/before}{tagpdf/lua} { \lua_now:e { ltx.@@.func.space_chars_shipout (tex.box["ShipoutBox"]) } } } } \bool_if:NT\g__tag_active_mc_bool { \lua_now:e { if~luatexbase.callbacktypes.pre_shipout_filter~then~ luatexbase.add_to_callback("pre_shipout_filter", function(TAGBOX)~ ltx.@@.func.mark_shipout(TAGBOX)~return~true~ end, "tagpdf")~ end } \lua_now:e { if~luatexbase.callbacktypes.pre_shipout_filter~then~ token.get_next()~ end }\@secondoftwo\@gobble { \hook_gput_code:nnn{shipout/before}{tagpdf/lua} { \lua_now:e { ltx.@@.func.mark_shipout (tex.box["ShipoutBox"]) } } } } } % \end{macrocode} % \subsection{Commands} % \begin{macro}{\@@_add_missing_mcs_to_stream:Nn} % This command is used in the output routine by the ptagging code. % It should do nothing in luamode. % \begin{macrocode} \cs_new_protected:Npn \@@_add_missing_mcs_to_stream:Nn #1#2 {} \cs_set_eq:NN \tag_mc_add_missing_to_stream:Nn \@@_add_missing_mcs_to_stream:Nn % \end{macrocode} % \end{macro} % \begin{macro}{\tag_mc_new_stream:n} % \begin{macrocode} \cs_new_protected:Npn \tag_mc_new_stream:n #1 {} % \end{macrocode} % \end{macro} % \begin{macro}[pTF]{\@@_mc_if_in:,\tag_mc_if_in:} % This tests, if we are in an mc, for attributes % this means to check against a number. % \begin{macrocode} \prg_new_conditional:Nnn \@@_mc_if_in: {p,T,F,TF} { \int_compare:nNnTF { -2147483647 } = {\lua_now:e { tex.print(\int_use:N \c_document_cctab,tex.getattribute(luatexbase.attributes.g__tag_mc_type_attr)) } } { \prg_return_false: } { \prg_return_true: } } \prg_new_eq_conditional:NNn \tag_mc_if_in: \@@_mc_if_in: {p,T,F,TF} % \end{macrocode} % \end{macro} % % \begin{macro} % { % \@@_mc_lua_set_mc_type_attr:n % ,\@@_mc_lua_set_mc_type_attr:o % ,\@@_mc_lua_unset_mc_type_attr: % } % This takes a tag name, and sets the attributes globally to the related number. % \begin{macrocode} \cs_new:Nn \@@_mc_lua_set_mc_type_attr:n % #1 is a tag name { %TODO ltx.@@.func.get_num_from("#1") seems not to return a suitable number?? \tl_set:Ne\l_@@_tmpa_tl{\lua_now:e{ltx.@@.func.output_num_from ("#1")} } \lua_now:e { tex.setattribute ( "global", luatexbase.attributes.g_@@_mc_type_attr, \l_@@_tmpa_tl ) } \lua_now:e { tex.setattribute ( "global", luatexbase.attributes.g_@@_mc_cnt_attr, \@@_get_mc_abs_cnt: ) } } \cs_generate_variant:Nn\@@_mc_lua_set_mc_type_attr:n { o } \cs_new:Nn \@@_mc_lua_unset_mc_type_attr: { \lua_now:e { tex.setattribute ( "global", luatexbase.attributes.g_@@_mc_type_attr, -2147483647 ) } \lua_now:e { tex.setattribute ( "global", luatexbase.attributes.g_@@_mc_cnt_attr, -2147483647 ) } } % \end{macrocode} % \end{macro} % \begin{macro} % { % \@@_mc_insert_mcid_kids:n % ,\@@_mc_insert_mcid_single_kids:n % } % These commands will in the finish code replace the dummy for a mc by the real mcid kids % we need a variant for the case that it is the only kid, to get the array right % \begin{macrocode} \cs_new:Nn \@@_mc_insert_mcid_kids:n { \lua_now:e { ltx.@@.func.mc_insert_kids (#1,0) } } \cs_new:Nn \@@_mc_insert_mcid_single_kids:n { \lua_now:e {ltx.@@.func.mc_insert_kids (#1,1) } } % \end{macrocode} % \end{macro} % %\begin{macro}{\@@_mc_handle_stash:n,\@@_mc_handle_stash:e} % This is the lua variant for the command to % put an mcid absolute number in the current structure. % \begin{macrocode} % %<*luamode|debug> %\cs_new_protected:Npn \@@_mc_handle_stash:n #1 %1 mcidnum %\cs_set_protected:Npn \@@_mc_handle_stash:n #1 %1 mcidnum { \@@_check_mc_used:n { #1 } \seq_gput_right:cn % Don't fill a lua table due to the command in the item, % so use the kernel command { g_@@_struct_kids_\g_@@_struct_stack_current_tl _seq } { \@@_mc_insert_mcid_kids:n {#1}% } % \seq_gput_right:cn % Don't fill a lua table due to the command in the item, % % so use the kernel command % { g_@@_struct_debug_kids_\g_@@_struct_stack_current_tl _seq } % { % MC~#1% % } \lua_now:e { ltx.@@.func.store_struct_mcabs ( \g_@@_struct_stack_current_tl,#1 ) } \prop_gput:Nee \g_@@_mc_parenttree_prop { #1 } { \g_@@_struct_stack_current_tl } } % %<*luamode> \cs_generate_variant:Nn \@@_mc_handle_stash:n { e } % \end{macrocode} % \end{macro} % % \begin{macro}{\tag_mc_begin:n} % This is the lua version of the user command. % We currently don't check if there is nesting as it doesn't matter so % much in lua. % \begin{macrocode} \cs_set_protected:Nn \tag_mc_begin:n { \@@_check_if_active_mc:T { \group_begin: %\@@_check_mc_if_nested: \bool_gset_true:N \g_@@_in_mc_bool \bool_set_false:N\l_@@_mc_artifact_bool \tl_clear:N \l_@@_mc_key_properties_tl \int_gincr:N \c@g_@@_MCID_abs_int % \end{macrocode} % set the default tag to the structure: % \begin{macrocode} \tl_set_eq:NN \l_@@_mc_key_tag_tl \g_@@_struct_tag_tl \tl_gset_eq:NN\g_@@_mc_key_tag_tl \g_@@_struct_tag_tl \lua_now:e { ltx.@@.func.store_mc_data(\@@_get_mc_abs_cnt:,"tag","\g_@@_struct_tag_tl") } \keys_set:nn { @@ / mc }{ label={}, #1 } %check that a tag or artifact has been used \@@_check_mc_tag:N \l_@@_mc_key_tag_tl %set the attributes: \@@_mc_lua_set_mc_type_attr:o { \l_@@_mc_key_tag_tl } \bool_if:NF \l_@@_mc_artifact_bool { % store the absolute num name in a label: \tl_if_empty:NF {\l_@@_mc_key_label_tl} { \exp_args:NV \@@_mc_handle_mc_label:e \l_@@_mc_key_label_tl } % if not stashed record the absolute number \bool_if:NF \l_@@_mc_key_stash_bool { \exp_args:NV\@@_struct_get_parentrole:nNN \g_@@_struct_stack_current_tl \l_@@_get_parent_tmpa_tl \l_@@_get_parent_tmpb_tl \@@_check_parent_child:VVnnN \l_@@_get_parent_tmpa_tl \l_@@_get_parent_tmpb_tl {MC}{} \l_@@_parent_child_check_tl \int_compare:nNnT {\l_@@_parent_child_check_tl}<{0} { \prop_get:cnN { g_@@_struct_ \g_@@_struct_stack_current_tl _prop} {S} \l_@@_tmpa_tl \msg_warning:nneee { tag } {role-parent-child} { \l_@@_get_parent_tmpa_tl/\l_@@_get_parent_tmpb_tl } { MC~(real content) } { not~allowed~ (struct~\g_@@_struct_stack_current_tl,~\l_@@_tmpa_tl) } } \@@_mc_handle_stash:e { \@@_get_mc_abs_cnt: } } } \group_end: } } % \end{macrocode} % \end{macro} % \begin{macro}{\tag_mc_end:} % % TODO: check how the use command must be guarded. % \begin{macrocode} \cs_set_protected:Nn \tag_mc_end: { \@@_check_if_active_mc:T { %\@@_check_mc_if_open: \bool_gset_false:N \g_@@_in_mc_bool \bool_set_false:N\l_@@_mc_artifact_bool \@@_mc_lua_unset_mc_type_attr: \tl_set:Nn \l_@@_mc_key_tag_tl { } \tl_gset:Nn \g_@@_mc_key_tag_tl { } } } % \end{macrocode} % \end{macro} % % \begin{macro}{\tag_mc_reset_box:N} % This allows to reset the mc-attributes in box. On base and generic mode it should do % nothing. % \begin{macrocode} \cs_set_protected:Npn \tag_mc_reset_box:N #1 { \lua_now:e { local~type=tex.getattribute(luatexbase.attributes.g_@@_mc_type_attr) local~mc=tex.getattribute(luatexbase.attributes.g_@@_mc_cnt_attr) ltx.__tag.func.update_mc_attributes(tex.getbox(\int_use:N #1),mc,type) } } % \end{macrocode} % \end{macro} % % \begin{macro}{ \@@_get_data_mc_tag: } % The command to retrieve the current mc tag. % TODO: Perhaps this should use the attribute instead. % \begin{macrocode} \cs_new:Npn \@@_get_data_mc_tag: { \g_@@_mc_key_tag_tl } % \end{macrocode} % \end{macro} % \subsection{Key definitions} % \begin{macro} % { % tag (mc-key), % raw (mc-key), % alt (mc-key), % actualtext (mc-key), % label (mc-key), % artifact (mc-key) % } % TODO: check conversion, check if local/global setting is right. % \begin{macrocode} \keys_define:nn { @@ / mc } { tag .code:n = % { \tl_set:Ne \l_@@_mc_key_tag_tl { #1 } \tl_gset:Ne \g_@@_mc_key_tag_tl { #1 } \lua_now:e { ltx.@@.func.store_mc_data(\@@_get_mc_abs_cnt:,"tag","#1") } }, raw .code:n = { \tl_put_right:Ne \l_@@_mc_key_properties_tl { #1 } \lua_now:e { ltx.@@.func.store_mc_data(\@@_get_mc_abs_cnt:,"raw","#1") } }, alt .code:n = % Alt property { \tl_if_empty:oF{#1} { \str_set_convert:Noon \l_@@_tmpa_str { #1 } { default } { utf16/hex } \tl_put_right:Nn \l_@@_mc_key_properties_tl { /Alt~< } \tl_put_right:No \l_@@_mc_key_properties_tl { \l_@@_tmpa_str>~ } \lua_now:e { ltx.@@.func.store_mc_data ( \@@_get_mc_abs_cnt:,"alt","/Alt~<\str_use:N \l_@@_tmpa_str>" ) } } }, alttext .meta:n = {alt=#1}, actualtext .code:n = % Alt property { \tl_if_empty:oF{#1} { \str_set_convert:Noon \l_@@_tmpa_str { #1 } { default } { utf16/hex } \tl_put_right:Nn \l_@@_mc_key_properties_tl { /Alt~< } \tl_put_right:No \l_@@_mc_key_properties_tl { \l_@@_tmpa_str>~ } \lua_now:e { ltx.@@.func.store_mc_data ( \@@_get_mc_abs_cnt:, "actualtext", "/ActualText~<\str_use:N \l_@@_tmpa_str>" ) } } }, label .code:n = { \tl_set:Nn\l_@@_mc_key_label_tl { #1 } \lua_now:e { ltx.@@.func.store_mc_data ( \@@_get_mc_abs_cnt:,"label","#1" ) } }, __artifact-store .code:n = { \lua_now:e { ltx.@@.func.store_mc_data ( \@@_get_mc_abs_cnt:,"artifact","#1" ) } }, artifact .code:n = { \exp_args:Nne \keys_set:nn { @@ / mc} { __artifact-bool, __artifact-type=#1, tag=Artifact } \exp_args:Nne \keys_set:nn { @@ / mc } { __artifact-store=\l_@@_mc_artifact_type_tl } }, artifact .default:n = { notype } } % % \end{macrocode} % \end{macro} % % \end{implementation} % \PrintIndex