How to use LaTeX for sequence alignement
Here's an implementation with a handy syntax, using expl3
.
\documentclass{article}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[french]{babel}
\usepackage{xparse}
\providecommand{\textbar}{$|$}
\ExplSyntaxOn
\NewDocumentCommand{\sequence}{O{}m}
{
\group_begin:
\keys_set:nn { egreg/sequence } { #1 }
\egreg_sequence:n { #2 }
\group_end:
}
\NewDocumentCommand{\sequencesetup}{m}
{
\keys_set:nn { egreg/sequence } { #1 }
}
\keys_define:nn { egreg/sequence }
{
top .tl_set:N = \l_egreg_sequence_top_tl,
bottom .tl_set:N = \l_egreg_sequence_bot_tl,
width .tl_set:N = \l_egreg_sequence_wd_tl,
sfont .tl_set:N = \l_egreg_sequence_sfont_tl,
lfont .tl_set:N = \l_egreg_sequence_lfont_tl,
top .initial:n = {Reference~sequence},
bottom .initial:n = {Query~sequence},
width .initial:n = 0.75em,
}
\seq_new:N \l__egreg_sequence_rows_seq
\cs_new_protected:Nn \egreg_sequence:n
{
\seq_set_split:Nnn \l__egreg_sequence_rows_seq { \\ } { #1 }
\begin{tabular}{@{}ll@{}}
\tl_use:N \l_egreg_sequence_sfont_tl
\tl_map_function:fN
{ \seq_item:Nn \l__egreg_sequence_rows_seq { 1 } }
\egreg_sequence_item:n &
\tl_use:N \l_egreg_sequence_lfont_tl
(\l_egreg_sequence_top_tl) \\
\tl_use:N \l_egreg_sequence_sfont_tl
\tl_map_function:fN
{ \seq_item:Nn \l__egreg_sequence_rows_seq { 2 } }
\egreg_sequence_item:n &
\\
\tl_use:N \l_egreg_sequence_sfont_tl
\tl_map_function:fN
{ \seq_item:Nn \l__egreg_sequence_rows_seq { 3 } }
\egreg_sequence_item:n &
\tl_use:N \l_egreg_sequence_lfont_tl
(\l_egreg_sequence_bot_tl) \\
\end{tabular}
}
\cs_generate_variant:Nn \tl_map_function:nN { f }
\cs_new_protected:Nn \egreg_sequence_item:n
{
\makebox[\l_egreg_sequence_wd_tl]
{
\token_if_eq_charcode:NNTF #1 |
{ \textbar }
{ \token_if_eq_charcode:NNF #1 . { #1 } }
}
}
\ExplSyntaxOff
\begin{document}
\section*{Standard}
\sequence{
AGCATT--ATATTCTAAATTT \\
..|||||||||||||x||| \\
..CATTAGATA--CTTAA
}
\section*{With options}
\sequence[
top=Séquence de référence,
bottom=Séquence de requête,
sfont=\ttfamily,
width=0.5em,
]{
AGCATT--ATATTCTAAATTT \\
..|||||||||||||x||| \\
..CATTAGATA--CTTAA
}
\section*{With setup}
\sequencesetup{
top=Séquence de référence,
bottom=Séquence de requête,
sfont=\ttfamily,
width=0.5em,
}
\sequence[lfont=\bfseries]{
AGCATT--ATATTCTAAATTT \\
..|||||||||||||x||| \\
..CATTAGATA--CTTAA
}
\end{document}
The input is split at the \\
markers, then the three lines are typeset with each character in a box (of customizable width). The labels at the end are added to the top and bottom lines (also customizable). The font for the sequence (key sfont
) and the labels (key lfont
) are independent.
\documentclass{article}
\def\zz{\par\zzz}
\def\zzz#1{%
\ifx!#1\hfill\else
\makebox[.75em]{\ifx.#1\else\ifx|#1$|$\else#1\fi\fi}%
\expandafter\zzz
\fi}
\begin{document}
\zz AGCATT--ATATTCTAAATTT ! (Reference sequence)
\zz ..|||||||||||||x||| !
\zz ..CATTAGATA--CTTAA ! (Query sequence)
\end{document}
verbatim
perhaps?
\documentclass{article}
\begin{document}
\begin{verbatim}
AGCATT--ATATTCTAAATTT (Reference sequence)
|||||||||||||x|||
CATTAGATA--CTTAA (Query sequence)
\end{verbatim}
\end{document}