How to use LaTeX for sequence alignement

Here's an implementation with a handy syntax, using expl3.

\documentclass{article}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[french]{babel}

\usepackage{xparse}

\providecommand{\textbar}{$|$}

\ExplSyntaxOn
\NewDocumentCommand{\sequence}{O{}m}
 {
  \group_begin:
  \keys_set:nn { egreg/sequence } { #1 }
  \egreg_sequence:n { #2 }
  \group_end:
 }
\NewDocumentCommand{\sequencesetup}{m}
 {
  \keys_set:nn { egreg/sequence } { #1 }
 }

\keys_define:nn { egreg/sequence }
 {
  top    .tl_set:N  = \l_egreg_sequence_top_tl,
  bottom .tl_set:N  = \l_egreg_sequence_bot_tl,
  width  .tl_set:N  = \l_egreg_sequence_wd_tl,
  sfont  .tl_set:N  = \l_egreg_sequence_sfont_tl,
  lfont  .tl_set:N  = \l_egreg_sequence_lfont_tl,
  top    .initial:n = {Reference~sequence},
  bottom .initial:n = {Query~sequence},
  width  .initial:n = 0.75em,
 }

\seq_new:N \l__egreg_sequence_rows_seq

\cs_new_protected:Nn \egreg_sequence:n
 {
  \seq_set_split:Nnn \l__egreg_sequence_rows_seq { \\ } { #1 }
  \begin{tabular}{@{}ll@{}}
  \tl_use:N \l_egreg_sequence_sfont_tl
  \tl_map_function:fN
   { \seq_item:Nn \l__egreg_sequence_rows_seq { 1 } }
   \egreg_sequence_item:n &
  \tl_use:N \l_egreg_sequence_lfont_tl
  (\l_egreg_sequence_top_tl) \\
  \tl_use:N \l_egreg_sequence_sfont_tl
  \tl_map_function:fN
   { \seq_item:Nn \l__egreg_sequence_rows_seq { 2 } }
   \egreg_sequence_item:n &
  \\
  \tl_use:N \l_egreg_sequence_sfont_tl
  \tl_map_function:fN
   { \seq_item:Nn \l__egreg_sequence_rows_seq { 3 } }
   \egreg_sequence_item:n &
  \tl_use:N \l_egreg_sequence_lfont_tl
  (\l_egreg_sequence_bot_tl) \\
  \end{tabular}
 }
\cs_generate_variant:Nn \tl_map_function:nN { f }

\cs_new_protected:Nn \egreg_sequence_item:n
 {
  \makebox[\l_egreg_sequence_wd_tl]
   {
    \token_if_eq_charcode:NNTF #1 |
     { \textbar }
     { \token_if_eq_charcode:NNF #1 . { #1 } }
   }
 }
\ExplSyntaxOff

\begin{document}

\section*{Standard}

\sequence{
  AGCATT--ATATTCTAAATTT \\
  ..|||||||||||||x||| \\
  ..CATTAGATA--CTTAA
}

\section*{With options}

\sequence[
  top=Séquence de référence,
  bottom=Séquence de requête,
  sfont=\ttfamily,
  width=0.5em,
]{
  AGCATT--ATATTCTAAATTT \\
  ..|||||||||||||x||| \\
  ..CATTAGATA--CTTAA
}

\section*{With setup}

\sequencesetup{
  top=Séquence de référence,
  bottom=Séquence de requête,
  sfont=\ttfamily,
  width=0.5em,
}

\sequence[lfont=\bfseries]{
  AGCATT--ATATTCTAAATTT \\
  ..|||||||||||||x||| \\
  ..CATTAGATA--CTTAA
}

\end{document}

The input is split at the \\ markers, then the three lines are typeset with each character in a box (of customizable width). The labels at the end are added to the top and bottom lines (also customizable). The font for the sequence (key sfont) and the labels (key lfont) are independent.

enter image description here


enter image description here

\documentclass{article}

\def\zz{\par\zzz}
\def\zzz#1{%
 \ifx!#1\hfill\else
 \makebox[.75em]{\ifx.#1\else\ifx|#1$|$\else#1\fi\fi}%
 \expandafter\zzz
 \fi}

\begin{document}


\zz AGCATT--ATATTCTAAATTT ! (Reference sequence)
\zz ..|||||||||||||x|||   !
\zz ..CATTAGATA--CTTAA    ! (Query sequence)


\end{document}

verbatim perhaps?

enter image description here

\documentclass{article}
\begin{document}
\begin{verbatim}
AGCATT--ATATTCTAAATTT (Reference sequence)
  |||||||||||||x|||
  CATTAGATA--CTTAA    (Query sequence)
\end{verbatim}
\end{document}