子解析

子解析

游标不一定要到达字符串的末尾才算成功。也就是说,它不一定要匹配整个字符串。

subparse 总是返回一个 Match 对象

method subparse(
    $target, 
    :$rule = 'TOP', 
    Capture() :$args = \(),  
    Mu :$actions = Mu, 
    *%opt
)

Grammar

grammar RepeatChar {
    token start($character) { $character+ }
}

解析

say RepeatChar.subparse(
    'bbbabb', 
    :rule('start'), 
    :args(\('b'))
);          # 「bbb」

say RepeatChar.parse(
    'bbbabb', 
    :rule('start'), 
    :args(\('b'))
);          # Nil

say RepeatChar.subparse(
    'bbbabb', 
    :rule('start'), 
    :args(\('a'))
);          # <failed match>

say RepeatChar.subparse(
    'bbbabb', 
    :rule('start'), 
    :args(\('a')), 
    :pos(3)
);          # 「a」

解析结构化文本

数据样例

[28/04/2015 12:32] Title1

content line 1
content line 2
content line 3
content line 4
content line 5

[28/04/2015 12:16] Title2

content line 6
content line 7

[27/04/2015 17:30] ​Title3

content line 8
content line 9
content line 10

Grammar

grammar StructedText {
    token TOP { ^ <entry>+ $ }
    token entry {
        <head> \s*   # 每一项有一个标题
        <line>+ \s*  # 每个标题下面有很多行
    }
    
    token head     { '[' <datetime> ']' \s+ <title> }
    token datetime {  <filedate> \s+  <filetime> }
    token filedate { [\d+]+ % '/' }
    token filetime { [\d+]+ % ':' }
    token title    { \N+          }
    token line  {
        [
		    <!head>       # 前面不是 head 标题
            .             # 点号匹配换行符
        ]+
    }
}

Action

class StructedText::Actions {
    method line    ($/) { $/.make: ~$/                            }
    method filedate($/) { $/.make: ~$/.subst(rx/<[:/]>/, '-', :g) }
    method head    ($/) { $/.make: ~$/.subst(rx/<[:/]>/, '-', :g) }
    method entry   ($/) { make $<head>.ast => $<line>».made;      }
    method TOP     ($/) { $/.make: $<entry>».ast;                 }    
}

解析

my $actions = StructedText::Actions.new;
my $parsed = StructedText.parsefile('sample.txt', :$actions).made;
if $parsed {
    for @$parsed -> $e {
        my $filename = ~$e.key.match(/'[' <( <-[\[\]]>+ )> ']'/)  ~ ".txt";
        my $fh = open $filename, :w; 
        $fh.say: ~$e.key;
        for $e.value -> $v {
            $fh.say: $v;
        }
        $fh.close;
        say "生成文件 $filename ";       
    }
}

计算器

数据样例

x = 40 + 2;
print x;

y = x - (5/2);
print y;

z = 1 + y * x;
print z;

print 14 - 16/3 + x;

目录结构如下:

.
├── Lang
│   ├── Actions.pm6
│   └── Grammar.pm6
├── data
│   ├── calc.lang

Grammar

Grammar.pm6 的内容如下:

unit grammar Lang::Grammar;

rule TOP {
    ^ <statements> $
}

rule statements {
    <statement>+ %% ';'
}

rule statement {
    | <assignment>
    | <printout>
}

rule assignment {
    <identifier> '=' <expression>
}

rule printout {
    'print' <expression>
}

rule expression {
    | <term>+ %% $<op>=(['+'|'-'])
    | <group>
}

rule term {
    <factor>+  %% $<op>=(['*'|'/'])
}

rule factor {
    | <identifier>
    | <value>
    | <group>
}

rule group {
    '(' <expression> ')'
}

token identifier {
    (<:alpha>+)
}

token value {
    (
    | \d+['.' \d+]?
    | '.' \d+
    )
}

Action

Actions.pm6 的内容如下:

unit class Lang::Actions;

has %.var;

method assignment($/) {
    %!var{$<identifier>} = $<expression>.ast;
}

method printout($/) {
    say $<expression>.ast;
}

method expression($/) {
    if $<group> {
        $/.make: $<group>.ast
    }
    else {
        my $result = $<term>[0].ast;

        if $<op> {
            my @ops = $<op>.map(~*);
            my @vals = $<term>[1..*].map(*.ast);

            for 0..@ops.elems - 1 -> $c {
                if @ops[$c] eq '+' {
                    $result += @vals[$c];
                }
                else {
                    $result -= @vals[$c];
                }
            }
        }

        $/.make: $result;
    }
}

method term($/) {
    my $result = $<factor>[0].ast;

    if $<op> {
        my @ops = $<op>.map(~*);
        my @vals = $<factor>[1..*].map(*.ast);

        for 0..@ops.elems - 1 -> $c {
            if @ops[$c] eq '*' {
                $result *= @vals[$c];
            }
            else {
                $result /= @vals[$c];
            }
        }
    }

    $/.make: $result;
}

method factor($/) {
    if $<identifier> {
        $/.make: %!var{~$<identifier>} // 0
    }
    elsif $<value> {
        $/.make: $<value>.ast
    }
    elsif $<group> {
        $/.make: $<group>.ast
    }
}

method group($/) {
    $/.make: $<expression>.ast
}

method identifier($/) {
    $/.make: ~$0
}

method value($/) {
    $/.make: +$0
}

解析

use lib '.';
use Lang::Grammar;
use Lang::Actions;

my $parsed = Lang::Grammar.parsefile(@*ARGS[0] // 'data/calc.lang', :actions(Lang::Actions.new()));
say $parsed;

解析 JSON

数据样例

{
    "country": "Austria",
    "cities": [ "Wien", "Salzburg", "Innsbruck" ],
    "population": 8353243
 }

Grammar

 grammar JSON::Tiny::Grammar {
     rule TOP      { ^[ <object> | <array> ]$ }
     rule object   { '{' ~ '}' <pairlist>     }
     rule pairlist { <pair>* % [ \, ]         }
     rule pair     { <string> ':' <value>     }
     rule array    { '[' ~ ']' [ <value>* % [ \, ] ] }

    proto token value { <...> };

    token value:sym<number> {
        '-'?
        [ 0 | <[1..9]> <[0..9]>* ]
        [ \. <[0..9]>+ ]?
        [ <[eE]> [\+|\-]? <[0..9]>+ ]?
    }

    token value:sym<true>   { <sym>    };
    token value:sym<false>  { <sym>    };
    token value:sym<null>   { <sym>    };
    token value:sym<object> { <object> };
    token value:sym<array>  { <array>  };
    token value:sym<string> { <string> }

    token string {
        \" ~ \" [ <str> | \\ <str_escape> ]*
    }

    token str {
        [
            <!before \t>
            <!before \n>
            <!before \\>
            <!before \">
            .
        ]+
        # <-["\\\t\n]>+
    }

    token str_escape {
        <["\\/bfnrt]> | u <xdigit>**4
    }
 }

解析

 # test it:
 my $tester = '{
     "country": "Austria",
     "cities": [ "Wien", "Salzburg", "Innsbruck" ],
     "population": 8353243
 }';

 if JSON::Tiny::Grammar.parse($tester) {
     say "It's valid JSON";
 } else {
     # TODO: error reporting
     say "Not quite...";
 }

检测 CSV 是否有效

数据样例

Year,Make,Model,Length
1997,Ford,E350,2.34
2000,Mercury,Cougar,2.38

Grammar

grammar CSV {
    token TOP { [ <line> \n? ]+ }

    token line {
        ^^            # Beginning of a line
        <value>* % \, # Any number of <value>s with commas in `between` them
        $$            # End of a line
    }

    token value {
        [
        | <-[",\n]>     # Anything not a double quote, comma or newline
        | <quoted-text> # Or some quoted text
        ]*              # Any number of times
    }
    token quoted-text {
        \"
        [
        | <-["\\]> # Anything not a " or \
        | '\"'     # Or \", an escaped quotation mark
        ]*         # Any number of times
        \"
    }
}

解析

say "Valid CSV file!" if CSV.parse( q:to/EOCSV/ );
    Year,Make,Model,Length
    1997,Ford,E350,2.34
    2000,Mercury,Cougar,2.38
    EOCSV

解析带 Action 的纸牌游戏

数据样例

a♥ a♥ 7♦ 8♣ j♥
a♥ 7♥ 7♦ 8♣ j♥; 10♥ j♥ q♥ k♥ a♦

Grammar

grammar CardGame {
 
    rule TOP { ^ <deal> $ }
 
    rule deal {
       :my %*PLAYED = ();
       <hand>+ % ';'
    }
 
    rule hand { [ <card> ]**5 }
    token card {<face><suit>}
 
    proto token suit {*}
    token suit:sym<♥>  {<sym>}
    token suit:sym<♦>  {<sym>}
    token suit:sym<♣>  {<sym>}
    token suit:sym<♠>  {<sym>}
 
    token face {:i <[2..9]> | 10 | j | q | k | a }
}

Action

class CardGame::Actions {
    method card($/) {
       my $card = $/.lc;
       say "Hey, there's an extra $card"
           if %*PLAYED{$card}++;
   }
}

解析

my $a = CardGame::Actions.new;
say CardGame.parse("a♥ a♥ 7♦ 8♣ j♥", :actions($a));
# "Hey there's an extra a♥"
say CardGame.parse("a♥ 7♥ 7♦ 8♣ j♥; 10♥ j♥ q♥ k♥ a♦",
                   :actions($a));
# "Hey there's an extra j♥"

解析纸牌游戏

数据样例

2♥ 5♥ 7♦ 8♣ 9♠
2♥ a♥ 7♦ 8♣ j♥

Grammar

grammar CardGame {
 
    rule TOP { ^ <deal> $ }
 
    rule deal {
        <hand>+ % ';'
    }
 
    rule hand { [ <card> ]**5 }
    token card {<face><suit>}
 
    proto token suit {*}
    token suit:sym<♥>  {<sym>}
    token suit:sym<♦>  {<sym>}
    token suit:sym<♣>  {<sym>}
    token suit:sym<♠>  {<sym>}
 
    token face {:i <[2..9]> | 10 | j | q | k | a }
}

解析

say CardGame.parse("2♥ 5♥ 7♦ 8♣ 9♠");
say CardGame.parse("2♥ a♥ 7♦ 8♣ j♥");

Alaways Succeed Assertion

数据样例

255 435 777
123 456 789
098 764 125

Grammar

grammar Digifier {
    rule TOP {
        [ <.succ> <digit>+ ]+
    }
    token succ   { <?> }
    token digit { <[0..9]> }
}

Action

class Devanagari {
    has @!numbers;
    method digit ($/) { @!numbers.tail ~= <零 一 二 三 四 五 六 七 八 九>[$/] }
    method succ  ($)  { @!numbers.push: ''     }
    method TOP   ($/) { make @!numbers[^(*-1)] }
}

解析

say Digifier.parse('255 435 777', actions => Devanagari.new).made;
# OUTPUT: 二五五 四三五 七七七

解析校名

数据样本

[Wang, Zhiguo; Zhao, Zhiguo] Hangzhou Normal Univ, Ctr Cognit & Brain Disorders, Hangzhou, Zhejiang, Peoples R China; [Wang, Zhiguo; Theeuwes, Jan] Vrije Univ Amsterdam, Dept Cognit Psychol, Amsterdam, Netherlands

Grammar

grammar University {
    token TOP             { ^ <university> $             }
    token university      { [ <bracket> <info> ]+ % '; ' }
    token bracket         { '[' <studentname>  '] '      }
    token studentname     { <stdname=.info>+ % '; '      }
    token info            { <field>+ % ', '              }
    token field           { <-[,\]\[;\n]>+               }
}

grammar MyUniversity  is University {
    token university      { <info>+ % '; ' }
}

Action

class MyUniversityAction {
    ...
}

解析

my $parsed = University::Grammar.parse($string);

for @($parsed<university><info>) -> $u {
    say $u<field>[0];
}

解析键值对儿

数据样例

version=6.d
backend=MoarVM
disto=Rakudo Star

Grammar

grammar KeyValuePairs {
    token TOP {
        [<pair> \v+]*
    }

    token pair {
        <key=.identifier> '=' <value=.identifier>
    }

    token identifier {
        \w+
    }
}

Action

class KeyValuePairsActions {
    method pair      ($/) {
        $/.make: $<key>.made => $<value>.made
    }
    method identifier($/) {
        # subroutine `make` is the same as calling .make on $/
        make ~$/
    }

    method TOP ($match) {
        # can use any variable name for parameter, not just $/
        $match.make: $match<pair>».made
    }
}

解析数学表达式

数据样例

3 + 4 - 5
3 * 4 * 5
4 + 5 * (1 + 3)

Grammar

grammar MathExpression {
    token TOP    { <sum>              } 
    rule sum     { <product>+ % '+'   } 
    rule product { <term>+    % '*'   } 
    rule term    { <number> | <group> } 
    rule group   { '(' <sum> ')'      } 
    token number { \d+                }
}

Action

class MathEvalAction {
    method TOP($/) {
        make $<sum>.made;
    }
    method sum($/) {
        make [+] $<product>».made;
    }
    method product($/) {
        make [*] $<term>».made;
    }
    method term($/) {
        make $/.values[0].made;
    }
    method group($/) {
        make $<sum>.made;
    }
    method number($/) {
        make $/.Int;
    }
}

解析

my $match = MathExpression.parse(
    '4 + 5 * (1 + 3)',
    actions => MathEvalAction.new,
);

say $match.made; # Output: 24
say $match.raku;

解析变量名

数据样例

@array
%hash
$sum

Grammar

grammar VariableNames {

    token variable {
        <sigil> <name>
    }
	
    token sigil {
        '$' | '@' | '&' | '%' | '::'
    }
	
	# [ ... ] are non-capturing groups
	token name {
        <identifier> 
        [ '::' <identifier> ] * 
    }
	# 标识符以字母开头
    token identifier {
        <alpha> \w+
    }
}

匹配

my $match = VariableNames.parse("@array",:rule('variable'));
say $match;

继承

# we inherit from the original grammar...
grammar VARIABLENAMES is VariableNames {
    
    # ... and override that parsing rule that we want to change
    token identifier {
        # char classes are <[ ... ]> in Perl 6
        <[A..Z]> <[A..Z0..9_]>* 
    }
}

匹配

my $test = VARIABLENAMES.parse("%A_HASH_TABLE",:rule('variable'));
say $test;

继承

grammar LackMoney is VariableNames {
    token sigil {
        '¢' | '@' | '&' | '%' | '::'
    }
}

匹配

# 继承以后, 带¢的变量能够解析, 带$的变量解析不了了
my $money = LackMoney.parse('$i_m_not_dollor',:rule('variable'));
say so $money; # false

解析括号对儿之间的数据

数据样本

[Lue, Fan]
[Lou, Man-Li]
[Tian, Mijie; Zhou, Lin; Zou, Xiao; Zheng, Qiaoji; Luo, Lingling; Jiang, Na; Lin, Dunmin]

Grrammar

grammar PairBracket {
    token TOP {
        ^ <line>+ $
    }

    token line {
        \[
        <student>+ % <semicolon>
        \]
        \n                   # 换行 \n 是最容易被忽略的地方
    }

    token student {
       <myname>+ % <comma>   # 分隔符也可以是一个 subrule
    }

    token myname {
        <[A..Za..z-]>+       # 字符类的写法 <[...]>
    }

    token comma {
        ',' \s+              # 逗号
    }

    token semicolon {
        ';' \s+
    }

}

Action

class PairBracketAction {
    ...
}

提取数据

my $parse = Lines.parsefile('test.txt');
say $parse<line>;

解析 INI 文件

INI 数据

access=user
;; more details of user below
[person]
name=john doe
address=555 Canndy Lane

Grammar

grammar IniFile {
    token key     { \w+ }
    token value   { <!before \s> <-[\n;]>+ <!after \s> }
    token pair    { <key> \h* '=' \h* <value> \n+      }
    token header  { '[' <-[ \[ \] \n ]>+ ']' \n+       }
    token comment { ';' \N*\n+                         }
    token block   { [<pair> | <comment>]*              }
    token section { <header> <block>                   }
    token TOP     { <block> <section>*                 }
}

Action

class IniFile::Action  {
    method key($/)     { make $/.Str }
    method value($/)   { make $/.Str }
    method headse($/)  { make $/.Str }
    method pair($/)    { make $<key>.made => $<value>.made }
    method block($/)   { make $<pair>.map({ .made }).hash  }
    method section($/) { make $<header>.made => $<block>.made }
    method TOP($/) {
        make {
            _ => $<block>.made,
            $<section>.map: { .made },
        }
    }
}

提取信息

sub parse-ini(Str $input) {
    my $m = IniFile.parse( $input, :actions(IniFile::Action) );
    unless $m {
        die "the input is not a valid INI file";
    }
    return $m.made;
}

sub MAIN() {
    say parse-ini($example);
}

解析天气预报数据

天气观测数据

Name= Jan Mayen 
Country= NORWAY 
Lat=   70.9 
Long=    8.7 
Height= 10 
Start year= 1921 
End year= 2009 
Obs: 
1921 -4.4 -7.1 -6.8 -4.3 -0.8  2.2  4.7  5.8  2.7 -2.0 -2.1 -4.0  
1922 -0.9 -1.7 -6.2 -3.7 -1.6  2.9  4.8  6.3  2.7 -0.2 -3.8 -2.6  
2008 -2.8 -2.7 -4.6 -1.8  1.1  3.3  6.1  6.9  5.8  1.2 -3.5 -0.8  
2009 -2.3 -5.3 -3.2 -1.6  2.0  2.9  6.7  7.2  3.8  0.6 -0.3 -1.3 

Grammar

grammar StationDataParser { 
    token TOP          { ^ <keyval>+ <observations> $             } 
    token keyval       { $<key>=[<-[=]>+] '=' \h* $<val>=[\N+] \n } 
    token observations { 'Obs:' \h* \n <observation>+             } 
    token observation  { $<year>=[\d+] \h* <temp>+ %% [\h*] \n    } 
    token temp         { '-'? \d+ \. \d+                          } 
}

StationData 对象

class StationData { 
    has $.name; 
    has $.country; 
    has @.data; 
     
    submethod BUILD(:%info (:Name($!name), :Country($!country), *%), :@!data) { 
    } 
}

Action

class StationDataActions { 
    method TOP($/) { 
        make StationData.new( 
            info => $<keyval>.map(*.ast).hash, 
            data => $<observations>.ast 
        ); 
    } 
	
    method keyval($/) { 
        make ~$<key> => ~$<val>; 
    } 
    method observations($/) { 
        make $<observation>.map(*.ast).grep(*.value.none <= -99); 
    } 
    method observation($/) { 
        make +$<year> => $<temp>.map(*.Num); 
    } 
}

解析

say StationDataParser.parse( q:to/EOCSV/, :actions(StationDataActions) ).ast

解析行程数据

行程数据

Russia
    Vladivostok : 43.131621,131.923828 : 4
    Ulan Ude : 51.841624,107.608101 : 2
    Saint Petersburg : 59.939977,30.315785 : 10
Norway
    Oslo : 59.914289,10.738739 : 2
    Bergen : 60.388533,5.331856 : 4
Ukraine
    Kiev : 50.456001,30.50384 : 3
Switzerland
    Wengen : 46.608265,7.922065 : 3
    Bern : 46.949076,7.448151 : 1

Grammar

grammar SalesExport {
    token TOP { ^ <country>+ $ }
    token country {
        <name> \n
        <destination>+
    }
    token destination {
        \s+ <name> \s+ ':' \s+
        <lat=.num> ',' <long=.num> \s+ ':' \s+
        <sales=.integer> \n
    }
    token name    { \w+ [ \s \w+ ]*   }
    token num     { '-'? \d+ [\.\d+]? }
    token integer { '-'? \d+          }
}

Action

class SalesExport::Action  {
	method destination($/) { make ~$<dname> => [$<sales>.map(*.Num+10),$<lat>.map(*.Num+90) ] }
    method country($/)     { make ~$<cname> => $<destination>>>.made            }
    method TOP($/)         { make $<country>>>.made                             }
}

提取信息

my $actions = SalesExport::Grammar::Actions.new;
my $grammar_action = SalesExport::Grammar.parse($string, :actions($actions)).made;
#say $grammar_action.Str;
# 获取所有国家的名字
for @$grammar_action -> $p {
    say "$p.key()";
}
say '-' x 45;
# 获取所有目的地
for @$grammar_action -> $p {
    for $p.value() -> $d {
	    for @$d -> $n {
		    say $n.key();
		}
	}
}
say '-' x 45;
# 获取出售的票数
for @$grammar_action -> $p {
    print "$p.key()\t";
    for $p.value() -> $d {
	    my $count;
	    for @$d -> $n {
		    $count += $n.value()[0];
		}
	say $count;
	}
}

say '-' x 45;
# 获取经度 lat
for @$grammar_action -> $p {
    for $p.value() -> $d {
	    for @$d -> $n {
		    say $n.value()[1];
		}
	}
}

欢乐谷

img

滴水湖

img

上海

img

阿拉丁

img