mirror of
https://github.com/RubyMetric/chsrc
synced 2025-07-20 23:26:47 +08:00
Rewrite Parser consistent from bottom to top
This commit is contained in:
parent
4e5003f36e
commit
c4d0d1eb0e
@ -5,7 +5,7 @@
|
|||||||
# File Authors : Aoran Zeng <ccmywish@qq.com>
|
# File Authors : Aoran Zeng <ccmywish@qq.com>
|
||||||
# Contributors : Nul None <nul@none.org>
|
# Contributors : Nul None <nul@none.org>
|
||||||
# Created On : <2025-07-12>
|
# Created On : <2025-07-12>
|
||||||
# Last Modified : <2025-07-15>
|
# Last Modified : <2025-07-16>
|
||||||
#
|
#
|
||||||
# rawstr4c.md parsing
|
# rawstr4c.md parsing
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
@ -100,37 +100,78 @@ my class Config {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#|( 仅存在两个域:)
|
|
||||||
|
|
||||||
1. Global dom
|
#| 表示一个 section
|
||||||
2. Section dom
|
class Section {
|
||||||
|
|
||||||
我们要求,在 Global dom 里,只存在配置,不存在 code block. 而 code block 只能在 Section dom 中存在。
|
has Str $.title;
|
||||||
|
has Int $.level;
|
||||||
|
has Config $.config;
|
||||||
|
has Str $.codeblock is rw;
|
||||||
|
has Section $.parent is rw;
|
||||||
|
has Section @.children;
|
||||||
|
|
||||||
因此,Parser 解析完毕后将包含:
|
|
||||||
- input-file
|
|
||||||
- global-config
|
|
||||||
- sections (多个 section)
|
|
||||||
|
|
||||||
一个 section 是 Hash,其包含:
|
method new($title, $level, $config = Config.new(), Section $parent?) {
|
||||||
- title
|
self.bless(:$title, :level($level), :config($config), :$parent, :children([]));
|
||||||
- level
|
}
|
||||||
- raw-string
|
|
||||||
- config
|
method add-child($child-section) {
|
||||||
|
$child-section.parent = self;
|
||||||
|
@.children.push: $child-section;
|
||||||
|
}
|
||||||
|
|
||||||
|
method has-children() {
|
||||||
|
return @.children.elems > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
# 递归获取所有后代section(深度优先遍历)
|
||||||
|
method get-all-descendants() {
|
||||||
|
my @descendants;
|
||||||
|
for @.children -> $child {
|
||||||
|
@descendants.push: $child;
|
||||||
|
@descendants.append: $child.get-all-descendants();
|
||||||
|
}
|
||||||
|
return @descendants;
|
||||||
|
}
|
||||||
|
|
||||||
|
# 获取section的路径(从根到当前节点)
|
||||||
|
method get-hierarchical-path() {
|
||||||
|
my @path;
|
||||||
|
my $current = self;
|
||||||
|
while $current {
|
||||||
|
@path.unshift: $current.title;
|
||||||
|
$current = $current.parent;
|
||||||
|
}
|
||||||
|
return @path.join(" > ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#|()
|
||||||
|
所有内容都是 section:
|
||||||
|
|
||||||
|
- level 0: root section 无标题
|
||||||
|
- level 1: # 一级标题
|
||||||
|
- level 2: ## 二级标题
|
||||||
|
- ...
|
||||||
)
|
)
|
||||||
class Parser {
|
class Parser {
|
||||||
has IO::Path $.input-file is rw;
|
has IO::Path $.input-file is rw;
|
||||||
has Config $.global-config;
|
#| 所有sections的扁平数组,已经是深度遍历的了
|
||||||
has Hash @.sections;
|
has Section @.sections;
|
||||||
|
|
||||||
method new($input-file) {
|
method new($input-file) {
|
||||||
self.bless(
|
self.bless(
|
||||||
:$input-file,
|
:$input-file,
|
||||||
global-config => Config.new(),
|
|
||||||
sections => []
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 获取根section(level 0)
|
||||||
|
method root-section() {
|
||||||
|
return @.sections.first({ $_.level == 0 });
|
||||||
|
}
|
||||||
|
|
||||||
# 配置项所在行 -> 解析为配置项
|
# 配置项所在行 -> 解析为配置项
|
||||||
method parse-config-item-line($line, $section-config) {
|
method parse-config-item-line($line, $section-config) {
|
||||||
# 语法: - key = `value`
|
# 语法: - key = `value`
|
||||||
@ -143,109 +184,90 @@ class Parser {
|
|||||||
return False;
|
return False;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
method parse() {
|
method parse() {
|
||||||
my $content = $.input-file.slurp;
|
my $content = $.input-file.slurp;
|
||||||
my @lines = $content.lines;
|
my @lines = $content.lines;
|
||||||
|
|
||||||
my $current-section;
|
my $current-section;
|
||||||
my $current-section-config = Config.new();
|
my $in-codeblock = False;
|
||||||
my $in-global = True;
|
|
||||||
my $in-code-block = False;
|
|
||||||
|
|
||||||
# 在代码块中的 raw string
|
# 在代码块中的 raw string
|
||||||
my $rawstr = "";
|
my $rawstr = "";
|
||||||
|
|
||||||
|
# 无论有没有具体的 root 信息 (比如所处理的文件第一行就是标题),
|
||||||
|
# 都创建一个 root section (level 0)
|
||||||
|
my $root-config = Config.new();
|
||||||
|
$current-section = Section.new("", 0, $root-config);
|
||||||
|
@.sections.push: $current-section;
|
||||||
|
|
||||||
# 开始遍历
|
# 开始遍历
|
||||||
|
my $line-count = 0;
|
||||||
for @lines -> $line {
|
for @lines -> $line {
|
||||||
|
$line-count++;
|
||||||
|
|
||||||
# Step1: 记录层次 level
|
# Step1: 处理标题,这里最重要,因为是判断上一个 section 结束的标志
|
||||||
#
|
if !$in-codeblock && $line ~~ /^ '#' ('#'*) \s* (.+) / {
|
||||||
# @note 我们要避免,在代码块中也有 # 字符,比如在代码块里写的是 shell 脚本
|
|
||||||
if !$in-code-block && $line ~~ /^ '#' ('#'*) \s* (.+) / {
|
|
||||||
my $level = 1 + $0.chars;
|
my $level = 1 + $0.chars;
|
||||||
my $title = ~$1;
|
my $title = ~$1;
|
||||||
|
|
||||||
# 只有匹配到下一个标题时,才说明前一个 section 已经结束,此时才有机会存下来
|
# 保存当前section的codeblock
|
||||||
# Global dom 里是没有 raw string 的,所以被这里的条件排除掉了
|
$current-section.codeblock = $rawstr;
|
||||||
if $rawstr && $current-section && $current-section<title> {
|
|
||||||
$current-section<raw-string> = $rawstr;
|
|
||||||
$current-section<config> = $current-section-config;
|
|
||||||
@.sections.push: $current-section;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
# 准备创建一个新的 section
|
||||||
$rawstr = "";
|
$rawstr = "";
|
||||||
|
my $new-section = Section.new($title, $level, Config.new());
|
||||||
|
@.sections.push: $new-section;
|
||||||
|
|
||||||
if $level == 1 {
|
# 找到合适的父节点
|
||||||
$in-global = True;
|
my $parent = self.find-parent-section($level);
|
||||||
$current-section = {};
|
|
||||||
$current-section-config = Config.new();
|
if $parent {
|
||||||
} else {
|
$parent.add-child($new-section);
|
||||||
$in-global = False;
|
|
||||||
$current-section = {
|
|
||||||
title => $title,
|
|
||||||
level => $level,
|
|
||||||
};
|
|
||||||
$current-section-config = Config.new();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$current-section = $new-section;
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Step2: 处理配置项
|
# Step2: 处理配置项 (如果该行不是配置项则下一行)
|
||||||
if $in-global {
|
if self.parse-config-item-line($line, $current-section.config) {
|
||||||
if self.parse-config-item-line($line, $.global-config) {
|
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
} elsif $current-section {
|
|
||||||
if self.parse-config-item-line($line, $current-section-config) {
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Step3: 开始处理raw string
|
# Step3: 开始处理 codeblock
|
||||||
if $line ~~ /^ '```' (.*)? / {
|
if $line ~~ /^ '```' (.*)? / {
|
||||||
if $in-code-block {
|
if $in-codeblock {
|
||||||
$in-code-block = False;
|
$in-codeblock = False;
|
||||||
} else {
|
} else {
|
||||||
$in-code-block = True;
|
$in-codeblock = True;
|
||||||
my $lang = ~($0 // '');
|
my $lang = ~($0 // '');
|
||||||
if $lang && $current-section && !$current-section-config.exist('language') {
|
if $lang && $current-section && !$current-section.config.exist('language') {
|
||||||
$current-section-config.set('language', $lang);
|
$current-section.config.set('language', $lang);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
# 代码块里的内容统统进来
|
# 代码块里的内容统统进来
|
||||||
if $in-code-block {
|
if $in-codeblock {
|
||||||
$rawstr ~= $line ~ "\n";
|
$rawstr ~= $line ~ "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# 遍历结束, 这意味着文件已经阅读完毕,最后一个section还没存,现在存它
|
# 遍历结束,保存最后一个section的codeblock
|
||||||
if $rawstr && $current-section && $current-section<title> {
|
if $rawstr && $current-section {
|
||||||
$current-section<raw-string> = $rawstr;
|
$current-section.codeblock = $rawstr;
|
||||||
$current-section<config> = $current-section-config;
|
|
||||||
@.sections.push: $current-section;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# 输出 config, 包含 global config 以及 section config
|
method find-parent-section($new-level) {
|
||||||
method debug() {
|
# 从@.sections尾部向前找,找到第一个level小于new-level的section作为父节点
|
||||||
say "--- Global config ---";
|
for @.sections.reverse -> $section {
|
||||||
for $.global-config.keys.sort -> $item {
|
if $section.level < $new-level {
|
||||||
my $value = $.global-config.get($item);
|
return $section;
|
||||||
say "$item = {$value.as-string} (type: {$value.type})";
|
|
||||||
}
|
}
|
||||||
say "";
|
|
||||||
|
|
||||||
# 设置debug标志,后续在 Generator 中根据此信息输出 section config
|
|
||||||
$.global-config.set('debug', "true");
|
|
||||||
|
|
||||||
say "Found " ~ @.sections.elems ~ " sections:";
|
|
||||||
for @.sections -> $section {
|
|
||||||
say "Section: " ~ $section<title>;
|
|
||||||
}
|
}
|
||||||
say "";
|
return Nil; # 没有找到父节点,说明是 root section
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user