Conditionally replacing sequences of characters

The code became a tad long. Much longer than I initially thought it would be.

Your conditions aren't too clear, I think. What I implemented (in crappy pseudocode) is:

procedure cur_char // (a, b, c, or d)
  if next_char is lowercase_vowel
    print replacement (cur_char) // replaces ABCD by PQRS
  else
    if next_char is uppercase_X
      override_replacement // changes replacement of ABC to LMN
    end
    print replacement (cur_char) // possibly overriden because of X
    print "x"
  end
end

which is more or less what the code does in an over simplified way.

With that code, the following substitutions occur:

I used expl3 for the job, otherwise the code would be much longer. I defined a macro \niranjan_define_active:Nn which you should use, for example, like this:

Click to copy

\niranjan_define_active:Nn A { \__niranjan_process:NN A P }

Another pair of macros \activateall and \deactivateall controls when the replacement behaviour should happen. Ideally you want this replacement active in a group to avoid chaos.

The code can be extented to handle other types of substitutions if you want to. The \__niranjan_define_charset_conditional:Nn allows you to define a conditional function to check if the next character belongs to a given set of characters.

However the code only works for characters with catcode 11. I didn't do any verification whatsoever for other catcodes.

I must say I doubt this code will simply work for the Devanagari script. But, as you wanted an example for latin characters, here it is:

Click to copy

\documentclass{article}
\usepackage{expl3}
\ExplSyntaxOn
% Main code
\cs_new_protected:Npn \__niranjan_process:NN #1#2
  {
    \cs_set_eq:NN \l__niranjan_curr_char #1
    \cs_set_eq:NN \l__niranjan_replacement_char #2
    \niranjan_deactivate_all:
    \peek_after:Nw \__niranjan_process_aux:
  }
\cs_new_protected:Npn \__niranjan_process_aux:
  {
    \__niranjan_if_lower_vowel:NTF \l_peek_token
      {
        \l__niranjan_replacement_char
        \__niranjan_rescan_token:w
      }
      {
        \__niranjan_if_upper_X:NT \l_peek_token
          { \__niranjan_followed_by_X: }
        \l__niranjan_replacement_char
        x
        \__niranjan_rescan_token:w
      }
  }
\cs_new_protected:Npn \__niranjan_followed_by_X:
  {
    \__niranjan_if_upper_ABC:NT \l__niranjan_curr_char
      {
        \exp_args:Nf \__niranjan_replace_char:n
          { \__niranjan_get_char:N \l__niranjan_curr_char }
      }
  }
\cs_new:Npn \__niranjan_replace_char:n #1
  {
    \exp_last_unbraced:NNf \cs_set_eq:NN \l__niranjan_replacement_char
    \str_case:nnF {#1}
      {
        { A } { L }
        { B } { M }
        { C } { N }
      }
      {#1}
  }
\cs_new_protected:Npn \__niranjan_rescan_token:w
  {
    \peek_N_type:TF
      { \__niranjan_rescan_token:Nw }
      { \niranjan_activate_all: }
  }
\cs_new_protected:Npn \__niranjan_rescan_token:Nw #1
  {
    \niranjan_activate_all:
    \tl_rescan:nn { } {#1}
  }
% Checking for following charset
\cs_set:Npn \__niranjan_tmp:w #1
  {
    \cs_new_protected:Npn \__niranjan_define_charset_conditional:Nn ##1 ##2
      {
        \prg_new_protected_conditional:Npnn ##1 ####1 { T, F, TF }
          {
            \exp_last_unbraced:No \__niranjan_if_charset:wn
              \token_to_meaning:N ####1 #1 \q_nil \q_stop {##2}
          }
      }
    \cs_new_protected:Npn \__niranjan_if_charset:wn ##1 #1 ##2##3 \q_stop ##4
      {
        \quark_if_nil:NTF ##2
          { \prg_return_false: }
          {
            \str_if_in:nnTF {##4} {##2}
              { \prg_return_true: }
              { \prg_return_false: }
          }
      }
    \cs_new:Npn \__niranjan_get_char:N ##1
      { \exp_last_unbraced:No \__niranjan_get_char:w \token_to_meaning:N ##1 }
    \cs_new:Npn \__niranjan_get_char:w #1 ##1 { ##1 }
  }
\use:x { \exp_not:N \__niranjan_tmp:w { \tl_to_str:n { the~letter~ } } }
\__niranjan_define_charset_conditional:Nn \__niranjan_if_lower_vowel:N { aeiou }
\__niranjan_define_charset_conditional:Nn \__niranjan_if_upper_X:N { X }
\__niranjan_define_charset_conditional:Nn \__niranjan_if_upper_ABC:N { ABC }
% Setting active chars
\tl_new:N \g__niranjan_chars_tl
\cs_new_protected:Npn \niranjan_define_active:Nn #1#2
  {
    \cs_gset_protected:cpn { __niranjan_active_letter_#1: } {#2}
    \char_set_active_eq:Nc #1 { __niranjan_active_letter_#1: }
    \tl_gput_right:Nn \g__niranjan_chars_tl { #1 }
  }
\cs_new_protected:Npn \niranjan_activate_all:
  { \tl_map_function:NN \g__niranjan_chars_tl \char_set_catcode_active:N }
\cs_new_protected:Npn \niranjan_deactivate_all:
  { \tl_map_function:NN \g__niranjan_chars_tl \char_set_catcode_letter:N }
\cs_new_eq:NN \activateall \niranjan_activate_all:
\cs_new_eq:NN \deactivateall \niranjan_deactivate_all:
\niranjan_define_active:Nn A { \__niranjan_process:NN A P }
\niranjan_define_active:Nn B { \__niranjan_process:NN B Q }
\niranjan_define_active:Nn C { \__niranjan_process:NN C R }
\niranjan_define_active:Nn D { \__niranjan_process:NN D S }
\ExplSyntaxOff
\begin{document}

\ttfamily

AaBeCiDoAu ->
\activateall
AaBeCiDoAu
\deactivateall

ABCDA ->
\activateall
ABCDA
\deactivateall

AXBXCXDXAX ->
\activateall
AXBXCXDXAX
\deactivateall

\end{document}

Conditionally replacing sequences of characters

Tags:

Macros

Characters

Conditionals

Related

Recent Posts