Extract all strings between two strings
Here is a solution using RegEx. Don't forget to include the following using statement.
using System.Text.RegularExpressions
It will correctly return only text between the start and end strings given.
Will not be returned:
akslakhflkshdflhksdf
Will be returned:
FIRSTSTRING
SECONDSTRING
THIRDSTRING
It uses the regular expression pattern [start string].+?[end string]
The start and end strings are escaped in case they contain regular expression special characters.
private static List<string> ExtractFromString(string source, string start, string end)
{
var results = new List<string>();
string pattern = string.Format(
"{0}({1}){2}",
Regex.Escape(start),
".+?",
Regex.Escape(end));
foreach (Match m in Regex.Matches(source, pattern))
{
results.Add(m.Groups[1].Value);
}
return results;
}
You could make that into an extension method of String like this:
public static class StringExtensionMethods
{
public static List<string> EverythingBetween(this string source, string start, string end)
{
var results = new List<string>();
string pattern = string.Format(
"{0}({1}){2}",
Regex.Escape(start),
".+?",
Regex.Escape(end));
foreach (Match m in Regex.Matches(source, pattern))
{
results.Add(m.Groups[1].Value);
}
return results;
}
}
Usage:
string source = "A1FIRSTSTRINGA2A1SECONDSTRINGA2akslakhflkshdflhksdfA1THIRDSTRINGA2";
string start = "A1";
string end = "A2";
List<string> results = source.EverythingBetween(start, end);
private static List<string> ExtractFromBody(string body, string start, string end)
{
List<string> matched = new List<string>();
int indexStart = 0;
int indexEnd = 0;
bool exit = false;
while (!exit)
{
indexStart = body.IndexOf(start);
if (indexStart != -1)
{
indexEnd = indexStart + body.Substring(indexStart).IndexOf(end);
matched.Add(body.Substring(indexStart + start.Length, indexEnd - indexStart - start.Length));
body = body.Substring(indexEnd + end.Length);
}
else
{
exit = true;
}
}
return matched;
}