How could LaTeX replace the tokens <= by the command \leq?

I assume, the replacements should be done in math mode only. Then the starting characters can be made active via a special value "8000 for \mathcode. The characters behave in text mode as usual, but they became special in math mode.

The following example document provides parsers for the following shorthands:

<< : \ll (latexsym/amsmath)
<> : \neq
<= : \leq
<== : \Leftarrow
<=> : \Leftrightarrow
<-- : \leftarrow
<-> : \leftrightarrow
>> : \gg (latexsym/amsmath)
>= : \geq
--> : \rightarrow
-+ : \pm
+- : \mp
... : \dots (amsmath)
== : \equiv
=. : \doteq
==> : \Rightarrow
=( : \subseteq (latexsym/amsmath)
=) : \supseteq (latexsym/amsmath)
=[ : \sqsubseteq (latexsym/amsmath)
=] : \sqsubseteq (latexsym/amsmath)

Example file:

\documentclass{article}

%\usepackage{latexsym}
% * because of \gg, \ll, \subseteq, \supseteq, \sqsubseteq, \sqsupseteq
% * not needed if amsmath is loaded

\usepackage{amsmath}% because of \dots

\makeatletter
% LaTeX's \@ifnextchar gobbles spaces, therefore
% \msh@ifnextchar is defined that keeps spaces
\newcommand*{\msh@ifnextchar}[3]{%
  \def\msh@temp{\msh@@ifnextchar{#1}{#2}{#3}}%
  \futurelet\msh@token\msh@temp
}
\newcommand*{\msh@@ifnextchar}[1]{%
  \ifx\msh@token#1%
    \expandafter\@firstoftwo
  \else
    \expandafter\@secondoftwo
  \fi
}

% <<
% <>
% <=
% <==
% <=>
% <--
% <->
% >>
% >=
% -->
% -+
% +-
% ...
% ==
% =.
% ==>
% =(
% =)
% =[
% =]

% Commands that take the original meanings of the special characters
\mathchardef\msh@code@less=\mathcode`\<\relax
\mathchardef\msh@code@greater=\mathcode`\>\relax
\mathchardef\msh@code@minus=\mathcode`\-\relax
\mathchardef\msh@code@plus=\mathcode`\+\relax
\mathchardef\msh@code@equal=\mathcode`\=\relax
\mathchardef\msh@code@dot=\mathcode`\.\relax

% Macro \resetmathshorthands resets the original meaning of the
% special characters by resetting their \mathcode values
\@ifdefinable{\resetmathshorthands}{%
  \edef\resetmathshorthands{%
    \mathcode\number`\<=\msh@code@less
    \mathcode\number`\>=\msh@code@greater
    \mathcode\number`\-=\msh@code@minus
    \mathcode\number`\+=\msh@code@plus
    \mathcode\number`\.=\msh@code@dot
    \mathcode\number`\==\msh@code@equal
  }%
}

% Macro \setmathshorthands activates and defines the special
% characters
\begingroup
  \catcode`\<=\active
  \catcode`\>=\active
  \catcode`\-=\active
  \catcode`\+=\active
  \catcode`\.=\active
  \catcode`\==\active
  \edef={\string=}%
  \@ifdefinable{\setmathshorthands}{%
    \xdef\setmathshorthands{%
      \mathcode\number`\<="8000 %
      \mathcode\number`\>="8000 %
      \mathcode\number`\-="8000 %
      \mathcode\number`\+="8000 %
      \mathcode\number`\.="8000 %
      \mathcode\number`\=="8000 %
      \let\noexpand<\noexpand\msh@less
      \let\noexpand>\noexpand\msh@greater
      \let\noexpand-\noexpand\msh@minus
      \let\noexpand+\noexpand\msh@plus
      \let\noexpand.\noexpand\msh@dot
      \let\noexpand=\noexpand\msh@equal
    }%
  }%
\endgroup

% The parsers for the math shorthands follow:

% <<
% <>
% <=
% <==
% <=>
% <--
% <->
\newcommand*{\msh@less}{%
  \msh@ifnextchar<{%
    \ll\@gobble
  }{%
    \msh@ifnextchar>{%
      \neq\@gobble
    }{%
      \msh@ifnextchar={%
        \expandafter\msh@less@equal\@gobble
      }{%
        \msh@ifnextchar-{%
          \expandafter\msh@less@minus\@gobble
        }{%
          \msh@code@less
        }%
      }%
    }%
  }%
}
\newcommand*{\msh@less@equal}{%
  \msh@ifnextchar={%
    \Leftarrow\@gobble
  }{%
    \msh@ifnextchar>{%
      \Leftrightarrow\@gobble
    }{%
      \leq
    }%
  }%
}
\newcommand*{\msh@less@minus}{%
  \msh@ifnextchar-{%
    \leftarrow\@gobble
  }{%
    \msh@ifnextchar>{%
      \leftrightarrow\@gobble
    }{%
      \msh@code@less\msh@code@minus
    }%
  }%
}

% >>
% >=
\newcommand*{\msh@greater}{%
  \msh@ifnextchar>{%
    \gg\@gobble
  }{%
    \msh@ifnextchar={%
      \geq\@gobble
    }{%
      \msh@code@greater
    }%
  }%
}

% -->
% -+
\newcommand*{\msh@minus}{%
  \msh@ifnextchar-{%
    \expandafter\msh@minus@minus\@gobble
  }{%
    \msh@ifnextchar+{%
      \mp\@gobble
    }{%
      \msh@code@minus
    }%
  }%
}
\newcommand*{\msh@minus@minus}{%
  \msh@ifnextchar>{%
    \rightarrow\@gobble
  }{%
    \msh@code@minus\msh@code@minus
  }%
}

% +-
\newcommand*{\msh@plus}{%
  \msh@ifnextchar-{%
    \pm\@gobble
  }{%
    \msh@code@plus
  }%
}

% ...
\newcommand*{\msh@dot}{%
  \msh@ifnextchar.{%
    \expandafter\msh@dot@dot\@gobble
  }{%
    \msh@code@dot
  }%
}
\newcommand*{\msh@dot@dot}{%
  \msh@ifnextchar.{%
    \expandafter\msh@dot@dot@dot\@gobble
  }{%
    \msh@code@dot
    \msh@code@dot
  }%
}
\newcommand*{\msh@dot@dot@dot}{%
  % remove space after "...", because a space would
  % disturb \dots' auto-positioning feature.
  \expandafter\dots\romannumeral-`\x
}

% ==
% =.
% ==>
% =(
% =)
% =[
% =]
\newcommand*{\msh@equal}{%
  \msh@ifnextchar={%
    \expandafter\msh@equal@equal\@gobble
  }{%
    \msh@ifnextchar.{%
      \doteq\@gobble
    }{%
      \msh@ifnextchar({%
        \subseteq\@gobble
      }{%
        \msh@ifnextchar){%
          \supseteq\@gobble
        }{%
          \msh@ifnextchar[{%
            \sqsubseteq\@gobble
          }{%
            \msh@ifnextchar]{%
              \sqsupseteq\@gobble
            }{%
              \msh@code@equal
            }%
          }%
        }%
      }%
    }%
  }%
}
\newcommand*{\msh@equal@equal}{%
  \msh@ifnextchar>{%
    \Rightarrow\@gobble
  }{%
    \equiv
  }%
}
\makeatother

% Activate math shorthands in the math modes
\everymath{\setmathshorthands}
\everydisplay{\setmathshorthands}

\begin{document}
\centering
\newcommand*{\test}[1]{%
  $#1$%
  \[#1\]%
}
\test{a << b < c <= d >= e > f >> g}
\test{a <> b = c =. d == e}
\test{a <== b <-- c <-> d <=> e --> f ==> g}
\test{a +- b = -(-a -+ +b)}
\test{a, ..., z <> a + ...+ z}
\test{a =( b =) c =[ e =] f}
\end{document}

Result

Remarks:

  • Macro \msh@ifnextchar looks up the next token. In opposite to LaTeX's \@ifnextchar it does not gobble spaces. For example, this is important for a + -b (a - b) that is different from a +- b (a ± b).

  • ... are replaced by \dots of package amsmath, because it has an auto-detection feature. The vertical position of the dots depends on the next token. For example, in a comma separated list, \dots become \ldots; if the next token is a +, then \cdots is used.

    Spaces are gobbled after a command token like \dots, but not after other characters like .... Therefore \msh@dot@dot@dot removes a following space before calling \dots. Otherwise \dots would see the space and become \ldots, even, if the token after the space is a +.

  • The suggested _C for \subseteq looks too ambiguous too me, because it looks like a normal subscript C. Also there is not a good ASCII letter for use in the shorthand of \supseteq. Therefore I have implemented the shorthands =(, =) and the pair =[, =] for the square forms.

    If round or square parentheses follows the equal sign, then the shorthand replacement can be prevented by a space, e.g. a = (b + c).


The following is taken partially from Define a command so that it is only active within the document environment:

enter image description here

\documentclass{article}
\makeatletter
\AtBeginDocument{
  \begingroup\lccode`~=`<
  \lowercase{\endgroup\def~{\@ifnextchar={\leq\@gobble}{<}}}%
  \catcode`<=\active
}
\makeatother
\begin{document}
$2x <= 4x - 2 \leq y$
\end{document}

But using an editor's search-and-replace seems just as appropriate.


This is interesting problem which can be solved by more compact macros than in accepted answer:

\long\def\isnextchar#1#2#3{\begingroup\toks0={\endgroup#2}\toks1={\endgroup#3}%
   \let\tmp=#1\futurelet\next\isnextcharA
}
\def\isnextcharA{\the\toks\ifx\tmp\next0\else1\fi\space}

\def\skipnext#1#2{#1}    
\def\trynext#1{\trynextA#1\relax\relax}
\def\trynextA#1#2\relax#3\relax#4#5{%
   \ifx\relax#2\relax \def\next{\isnextchar#1{\skipnext{#4}}{#5#3}}\else
      \def\next{\isnextchar#1{\skipnext{\trynextA#2\relax#3#1\relax#4{#5}}}{#5#3}}\fi
   \next
}
\def\mspecdefA#1#2#3 : #4{\ifx#2\undefined
   \def#2{\trynext{#3}#4{#1}}\else
   \toks0={\trynext{#3}#4}\toks1=\expandafter{#2}%
   \edef#2{\the\toks0{\the\toks1}}\fi
}
\def\mspecdef#1{%
   \expandafter\ifx\csname m:#1\endcsname\relax
      \expandafter\mathchardef\csname m:#1\endcsname=\mathcode`#1
   \fi
   \mathcode`#1="8000 
   \begingroup \lccode`~=`#1 
   \lowercase{\endgroup\expandafter\mspecdefA\csname m:#1\endcsname~}%
}

\mspecdef << : \ll
\mspecdef <> : \neq
\mspecdef <= : \leq
\mspecdef <== : \Leftarrow
\mspecdef <=> : \Leftrightarrow
\mspecdef <-- : \leftarrow
\mspecdef <-> : \leftrightarrow
\mspecdef >> : \gg
\mspecdef >= : \geq
\mspecdef --> : \rightarrow
\mspecdef -+ : \pm
\mspecdef +- : \mp
\mspecdef ... : \dots
\mspecdef == : \equiv
\mspecdef =. : \doteq
\mspecdef ==> : \Rightarrow
\mspecdef =( : \subseteq
\mspecdef =) : \supseteq
\mspecdef =[ : \sqsubseteq
\mspecdef =] : \sqsubseteq

test:

$$ a << b < c <= d >= e > f >> g $$
$$ a <> b = c =. d == e $$
$$ a <== b <-- c <-> d <=> e --> f ==> g $$
$$ a +- b = -(-a -+ +b) $$
$$ a, ..., z <> a + ...+ z $$
$$ a =( b =) c =[ e =] f $$

\bye

The result is the same as in accepted answer.

Edit How it works? When we do

\mspecdef ax : \U    \mspecdef axy : \V    \mspecdef abcd : \W 

then the a character is set as math-active (i.e. \matcode is "8000) and it is defined as

\def a{\trynext{bcd}\W{\trynext{xy}\V{\trynext{x}\U{normal a}}}}

This macro does test if the following string is bcd (using repeatedly called \isnextchar). If it is true then next part of the macro is skipped and \W is processed. Else next part of the macro is processed. This means, that xy is tested. If fails then x is tested and if fails then normal a is printed.

We can do this only with TeX macros at primitive level without any non-TeX tools like lua code, without any obscure solutions like expl3.