Read strings from file that contains the special character #

If you want only the file myfile2.txt to be readable, the solution may be as follows:

\documentclass{article}
\usepackage{xstring}
\usepackage{filecontents}

% WORKS FINE !!!
\begin{filecontents*}{myfile1.txt}
mean: 0.26068087303584447 lablab
\end{filecontents*}

% if in my file there are special characters (e.g #) then I get errors :( !!!
\begin{filecontents*}{myfile2.txt}
# mean: 0.26068087303584447 lablab
\end{filecontents*}

\newread\myread  
\newcommand{\readmean}[1]{
  \openin\myread=#1
  \read\myread to \mystr
  \StrBetween{\mystr}{mean: }{ lablab}[\mymean]
  \closein\myread
}

\begin{document}

\readmean{myfile1.txt}
mean1 : \mymean

{\catcode`\#=11
\readmean{myfile2.txt}
mean2 : \mymean
}


\end{document}

enter image description here

A general macro set for extracting the metadata from the typical file you showed.

\documentclass{article}
\usepackage{xparse}

\ExplSyntaxOn

\NewDocumentCommand{\getmetadata}{m}
 {
  \agh_getmetadata:n { #1 }
 }

\ior_new:N \g_agh_input_ior
\tl_const:Nx \c_agh_hashmark_tl { \token_to_str:N # }

\cs_generate_variant:Nn \tl_if_in:nnTF { nV }
\cs_generate_variant:Nn \tl_replace_once:Nnn { Nx }

\cs_new_protected:Npn \agh_getmetadata:n #1
 {
  \ior_open:Nn \g_agh_input_ior { #1 }
  \ior_str_map_inline:Nn \g_agh_input_ior
   { % read only lines containing #
    \tl_if_in:nVTF { ##1 } \c_agh_hashmark_tl
     {
      \agh_readline:n { ##1 }
     }
     {
      \ior_map_break:
     }
   }
 }

\cs_new_protected:Npn \agh_readline:n #1
 { % add a trailing space
  \tl_set:Nn \l_agh_current_line_tl { #1 }
  \tl_put_right:Nn \l_agh_current_line_tl { ~ }
  % change `mean:' into \agh_mean:w
  \tl_replace_once:Nxn \l_agh_current_line_tl
   { \tl_to_str:n { mean: } }
   { \agh_mean:w }
  % change `std:' into \agh_std:w
  \tl_replace_once:Nxn \l_agh_current_line_tl
   { \tl_to_str:n { std: } }
   { \agh_std:w }
  % change `args:' into \agh_args:w
  \tl_replace_once:Nxn \l_agh_current_line_tl
   { \tl_to_str:n { args: } }
   { \agh_args:w }
  % deliver the token list in a box
  % so the macros will do their work
  % and the other characters will be ignored
  \hbox_set:Nn \l_tmpa_box { \l_agh_current_line_tl }
 }

\cs_new:Npn \agh_mean:w #1#2 ~ %
 {
  \cs_gset:Npn \plotmean { #1#2 }
 }

\cs_new:Npn \agh_std:w #1#2 ~ %
 {
  \cs_gset:Npn \plotstd { #1#2 }
 }

\cs_new:Npn \agh_args:w #1(#2,#3#4) ~ %
 {
  \cs_gset:Npn \plotalpha { #2 }
  \cs_gset:Npn \plotbeta  { #3#4 }
 }

\ExplSyntaxOff

\begin{document}

\getmetadata{agh.txt}

\noindent
Mean is \plotmean\\
Std is \plotstd\\
Alpha is \plotalpha\\
Beta is \plotbeta

\end{document}

enter image description here

A slightly different approach uses regular expression substitution.

\documentclass{article}
\usepackage{xparse}

\ExplSyntaxOn

\NewDocumentCommand{\getmetadata}{m}
 {
  \agh_getmetadata:n { #1 }
 }

\ior_new:N \g_agh_input_ior
\tl_const:Nx \c_agh_hashmark_tl { \token_to_str:N # }
\tl_new:N \l_agh_metadata_tl
\tl_new:N \l_agh_temp_tl
\seq_new:N \l_agh_args_seq

\cs_generate_variant:Nn \tl_if_in:nnT { nV }
\cs_generate_variant:Nn \tl_replace_once:Nnn { Nx }

\cs_new_protected:Npn \agh_getmetadata:n #1
 {
  \tl_clear:N \l_agh_metadata_tl
  \ior_open:Nn \g_agh_input_ior { #1 }
  \ior_str_map_inline:Nn \g_agh_input_ior
   { % read only lines containing #
    \tl_if_in:nVT { ##1 } \c_agh_hashmark_tl
     {
      \tl_put_right:Nn \l_agh_metadata_tl { ##1 }
     }
   }
  \agh_search_mean:
  \agh_search_std:
  \agh_search_args:
 }

\cs_new_protected:Npn \agh_search_mean:
 {
  \tl_gset:Nn \plotmean { Not~found }
  \tl_set_eq:NN \l_agh_temp_tl \l_agh_metadata_tl
  \regex_replace_once:nnNT
   {\A .*? mean\: \s*? ( (\+|\-)? [0-9]* \.? [0-9]+ ) .* \Z } 
   { \1 }
   \l_agh_temp_tl
   { \tl_gset_eq:NN \plotmean \l_agh_temp_tl }
 }

\cs_new_protected:Npn \agh_search_std:
 {
  \tl_gset:Nn \plotstd { Not~found }
  \tl_set_eq:NN \l_agh_temp_tl \l_agh_metadata_tl
  \regex_replace_once:nnNT
   {\A .*? std\: \s*? ( (\+|\-)? [0-9]* \.? [0-9]+ ) .* \Z } 
   { \1 }
   \l_agh_temp_tl
   { \tl_gset_eq:NN \plotstd \l_agh_temp_tl }
 }

\cs_new_protected:Npn \agh_search_args:
 {
  \tl_gset:Nn \plotargs  { Not~found }
  \tl_gset:Nn \plotalpha { Not~found }
  \tl_gset:Nn \plotbeta  { Not~found }
  \tl_set_eq:NN \l_agh_temp_tl \l_agh_metadata_tl
  \regex_replace_once:nnNT
   {\A .*? args\: \s*? \( ( [^\)]* ) \) .* \Z }
   { \1 }
   \l_agh_temp_tl
   { \tl_gset:Nx \plotargs { (\l_agh_temp_tl) } \agh_split_args: }
 }

\cs_new_protected:Npn \agh_split_args:
 {
  \seq_set_split:NnV \l_agh_args_seq { , } \l_agh_temp_tl
  \tl_set:Nx \plotalpha { \seq_item:Nn \l_agh_args_seq { 1 } }
  \tl_set:Nx \plotbeta  { \seq_item:Nn \l_agh_args_seq { 2 } }
 }
\ExplSyntaxOff

\begin{document}

\getmetadata{agh.txt}

\noindent
Mean is \plotmean\\
Std is \plotstd\\
Args is \plotargs\\
Alpha is \plotalpha\\
Beta is \plotbeta

\end{document}

It is assumed that the metadata are found only once (the first appearance wins, with these macros, in case of multiple appearance). If data are missing, \plotmean and so on will expand to “Not found”.

Every file read will overwrite the macros' values.

(For releases before TeX Live 2017, you will need \usepackage{l3tl-analysis} and \usepackage{l3regex} in addition to \usepackage{xparse} for this to work.)

I used the answer given by @Przemysław Scherwentke. However, I changed it a little bit in order to avoid using \catcode`#=11 every time I call \readmean . Therefore, the following is my adaptation of @Przemysław's answer.

\documentclass{article}
\usepackage{xstring}
\usepackage{filecontents}

\begin{filecontents*}{myfile1.txt}
mean: 0.26068087303584447 lablab
\end{filecontents*}

\begin{filecontents*}{myfile2.txt}
# mean: 0.26068087303584447 lablab
\end{filecontents*}

\newread\myread  
\newcommand{\readmean}[1]{
  \openin\myread=#1
  {\catcode`\#=11
  \read\myread to \mystr
  \StrBetween{\mystr}{mean: }{ lablab}[\mystr]
  \global\edef\mymean{\mystr}
  }
  \closein\myread
}

\begin{document}

\readmean{myfile1.txt}
mean1 : \mymean

\readmean{myfile2.txt}
mean2 : \mymean

\end{document}

Read strings from file that contains the special character #

Tags:

Macros

Filesystem Access

Catcodes

Related

Recent Posts