Parse Wordpress like Shortcode
Here's a utility class that we used on our project
It will match all shortcodes in a string (including html) and it will output an associative array including their name
, attributes
and content
final class Parser {
// Regex101 reference: https://regex101.com/r/pJ7lO1
const SHORTOCODE_REGEXP = "/(?P<shortcode>(?:(?:\\s?\\[))(?P<name>[\\w\\-]{3,})(?:\\s(?P<attrs>[\\w\\d,\\s=\\\"\\'\\-\\+\\#\\%\\!\\~\\`\\&\\.\\s\\:\\/\\?\\|]+))?(?:\\])(?:(?P<content>[\\w\\d\\,\\!\\@\\#\\$\\%\\^\\&\\*\\(\\\\)\\s\\=\\\"\\'\\-\\+\\&\\.\\s\\:\\/\\?\\|\\<\\>]+)(?:\\[\\/[\\w\\-\\_]+\\]))?)/u";
// Regex101 reference: https://regex101.com/r/sZ7wP0
const ATTRIBUTE_REGEXP = "/(?<name>\\S+)=[\"']?(?P<value>(?:.(?![\"']?\\s+(?:\\S+)=|[>\"']))+.)[\"']?/u";
public static function parse_shortcodes($text) {
preg_match_all(self::SHORTOCODE_REGEXP, $text, $matches, PREG_SET_ORDER);
$shortcodes = array();
foreach ($matches as $i => $value) {
$shortcodes[$i]['shortcode'] = $value['shortcode'];
$shortcodes[$i]['name'] = $value['name'];
if (isset($value['attrs'])) {
$attrs = self::parse_attrs($value['attrs']);
$shortcodes[$i]['attrs'] = $attrs;
}
if (isset($value['content'])) {
$shortcodes[$i]['content'] = $value['content'];
}
}
return $shortcodes;
}
private static function parse_attrs($attrs) {
preg_match_all(self::ATTRIBUTE_REGEXP, $attrs, $matches, PREG_SET_ORDER);
$attributes = array();
foreach ($matches as $i => $value) {
$key = $value['name'];
$attributes[$i][$key] = $value['value'];
}
return $attributes;
}
}
print_r(Parser::parse_shortcodes('[include file="header.html"]'));
Output:
Array
(
[0] => Array
(
[shortcode] => [include file="header.html"]
[name] => include
[attrs] => Array
(
[0] => Array
(
[file] => header.html
)
)
)
)
I also needed this functionality in my PHP framework. This is what I've written, it works pretty well. It works with anonymous functions, which I really like (it's a bit like the callback functions in JavaScript).
<?php
//The content which should be parsed
$content = '<p>Hello, my name is John an my age is [calc-age day="4" month="10" year="1991"].</p>';
$content .= '<p>Hello, my name is Carol an my age is [calc-age day="26" month="11" year="1996"].</p>';
//The array with all the shortcode handlers. This is just a regular associative array with anonymous functions as values. A very cool new feature in PHP, just like callbacks in JavaScript or delegates in C#.
$shortcodes = array(
"calc-age" => function($data){
$content = "";
//Calculate the age
if(isset($data["day"], $data["month"], $data["year"])){
$age = date("Y") - $data["year"];
if(date("m") < $data["month"]){
$age--;
}
if(date("m") == $data["month"] && date("d") < $data["day"]){
$age--;
}
$content = $age;
}
return $content;
}
);
//http://stackoverflow.com/questions/18196159/regex-extract-variables-from-shortcode
function handleShortcodes($content, $shortcodes){
//Loop through all shortcodes
foreach($shortcodes as $key => $function){
$dat = array();
preg_match_all("/\[".$key." (.+?)\]/", $content, $dat);
if(count($dat) > 0 && $dat[0] != array() && isset($dat[1])){
$i = 0;
$actual_string = $dat[0];
foreach($dat[1] as $temp){
$temp = explode(" ", $temp);
$params = array();
foreach ($temp as $d){
list($opt, $val) = explode("=", $d);
$params[$opt] = trim($val, '"');
}
$content = str_replace($actual_string[$i], $function($params), $content);
$i++;
}
}
}
return $content;
}
echo handleShortcodes($content, $shortcodes);
?>
The result:
Hello, my name is John an my age is 22.
Hello, my name is Carol an my age is 17.
Using this function
$code = '[include file="header.html"]';
$innerCode = GetBetween($code, '[', ']');
$innerCodeParts = explode(' ', $innerCode);
$command = $innerCodeParts[0];
$attributeAndValue = $innerCodeParts[1];
$attributeParts = explode('=', $attributeAndValue);
$attribute = $attributeParts[0];
$attributeValue = str_replace('"', '', $attributeParts[1]);
echo $command . ' ' . $attribute . '=' . $attributeValue;
//this will result in include file=header.html
$command will be "include"
$attribute will be "file"
$attributeValue will be "header.html"