chsrc/tool/rawstr4c/lib/Parser.rakumod
2025-07-16 17:31:11 +08:00

345 lines
9.3 KiB
Raku
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ---------------------------------------------------------------
# SPDX-License-Identifier: GPL-3.0-or-later
# ---------------------------------------------------------------
# File Name : Parser.rakumod
# File Authors : Aoran Zeng <ccmywish@qq.com>
# Contributors : Nul None <nul@none.org>
# Created On : <2025-07-12>
# Last Modified : <2025-07-16>
#
# rawstr4c.md parsing
# ---------------------------------------------------------------
unit module Parser;
#| Bool Boolean
my enum ConfigItem's-ValueType < RS4C-Nil RS4C-String RS4C-Mode RS4C-Bool>;
#|
class ConfigItem's-Value {
has ConfigItem's-ValueType $.type;
has Str $.raw-value;
has Any $.parsed-value;
#| $raw-text undefined
method new(Str $input-text) {
my $type;
my $parsed-value;
my $raw-value = $input-text;
# 明确区分空字符串和无值情况
# 这种情况不可能是用户写的(并没有nil这个字面量)
if ! $input-text.defined {
$type = RS4C-Nil;
$parsed-value = Nil;
$raw-value = "<internal-rs4c-nil>"; # 一个完全用不到的值,但是由于 $.raw-value 类型是字符串,所以必须随便给一个值
}
else {
# wrtd: 不要试图在这里利用 given when 统一处理未定义的值,因为会一直报错
given $input-text {
when /^ ':' (.+) $/ {
# 模式值 :mode
$type = RS4C-Mode;
$parsed-value = ~$0;
}
when /^ ('true'|'false'|'yes'|'no') $/ {
# 特殊字面量 - true/false/yes/no 都是 literal
$type = RS4C-Bool;
$parsed-value = ~$0 ~~ /^('true'|'yes')$/ ?? True !! False;
}
# 输入为空时被当做是字符串类型
default {
# 普通字符串
$type = RS4C-String;
$parsed-value = $input-text;
}
}
}
self.bless(:$type, :$raw-value, :$parsed-value);
}
# 获得适合调用者接受的值
method value() {
given $.type {
when RS4C-Nil | RS4C-String | RS4C-Bool | RS4C-Mode { return $.parsed-value; }
default { die "Unknown config value type: {$.type}"; }
}
}
# 这些函数防止开发者写错类型
method nil-value() {
return self.value if $.type == RS4C-Nil;
die "The config value type is not RS4C-Nil, but: {$.type}";
}
method string-value() {
return self.value if $.type == RS4C-String;
die "The config value type is not RS4C-String, but: {$.type}";
}
method bool-value() {
return self.value if $.type == RS4C-Bool;
die "The config value type is not RS4C-Bool, but: {$.type}";
}
method mode-value() {
return self.value if $.type == RS4C-Mode;
die "The config value type is not RS4C-Mode, but: {$.type}";
}
# 类型检查方法
method is-nil() { return $.type == RS4C-Nil; }
method is-mode() { return $.type == RS4C-Mode; }
method is-bool() { return $.type == RS4C-Bool; }
method is-string() { return $.type == RS4C-String; }
}
#| config items
my class ConfigBlock {
has %!items;
# 如果非要在程序内部中调用,而不是直接从 Markdown 文件中读取出来
# 一定要记得 $raw-value 用的是 rawstr4c 的语法!也就是说,这里一定是一个字符串
method set($k, $raw-value) {
%!items{$k} = ConfigItem's-Value.new($raw-value);
}
method get($k) {
return %!items{$k};
}
method exist($k) {
return %!items{$k}:exists;
}
# 配置项名称
# @danger: 把这个函数命名为 items会让我的机器蓝屏.....
method keys() {
return %!items.keys;
}
}
#| section
class Section {
has Str $.title;
has Int $.level;
has ConfigBlock $.configblock;
has Str $.codeblock is rw;
has Section $.parent is rw;
has Section @.children;
method new($title, $level) {
my $configblock = ConfigBlock.new();
# parent 和 codeblock 刻意不初始化
self.bless(:$title, :$level, :$configblock, :children([]));
}
method add-child($child-section) {
$child-section.parent = self;
@.children.push: $child-section;
}
method has-children() {
return @.children.elems > 0;
}
# 递归获取所有后代section深度优先遍历
method get-all-descendants() {
my @descendants;
for @.children -> $child {
@descendants.push: $child;
@descendants.append: $child.get-all-descendants();
}
return @descendants;
}
# 获取section的路径从根到当前节点
method get-hierarchical-path() {
my @path;
my $current = self;
while $current {
@path.unshift: $current.title;
$current = $current.parent;
}
return @path.join(" > ");
}
}
#|(
section:
- level 0: root section
- level 1: #
- level 2: ##
- ...
)
class Parser {
has Str $.input-file is rw;
#| sections
has Section @!sections;
#| $markdown-file markdown
method new($markdown-file) {
self.bless(:input-file($markdown-file));
}
# 获取根sectionlevel 0
method root-section() {
return @!sections.first({ $_.level == 0 });
}
# 配置项所在行 -> 解析为配置项
method parse-config-item-line($line, $section-config) {
# 语法: - key = `value`
if $line ~~ /^ '-' \s* (<[a..z\-]>+) \s* '=' \s* '`' (.+?) '`' / {
my $key = ~$0;
my $value = ~$1;
$section-config.set($key, $value);
return True;
}
return False;
}
method parse() {
my $content = $.input-file.IO.slurp;
my @lines = $content.lines;
my $current-section;
my $in-codeblock = False;
# 在代码块中的 raw string
my $rawstr = "";
# 无论有没有具体的 root 信息 (比如所处理的文件第一行就是标题)
# 都创建一个 root section (level 0)
$current-section = Section.new("", 0);
@!sections.push: $current-section;
# 开始遍历
my $line-count = 0;
for @lines -> $line {
$line-count++;
# Step1: 处理标题,这里最重要,因为是判断上一个 section 结束的标志
if !$in-codeblock && $line ~~ /^ '#' ('#'*) \s* (.+) / {
my $level = 1 + $0.chars;
my $title = ~$1;
# 保存当前section的codeblock
$current-section.codeblock = $rawstr;
# 准备创建一个新的 section
$rawstr = "";
my $new-section = Section.new($title, $level);
@!sections.push: $new-section;
# 找到合适的父节点
my $parent = self.find-parent-section($level);
if $parent {
$parent.add-child($new-section);
}
$current-section = $new-section;
next;
}
# Step2: 处理配置项 (如果该行不是配置项则下一行)
if self.parse-config-item-line($line, $current-section.configblock) {
next;
}
# Step3: 开始处理 codeblock
if $line ~~ /^ '```' (.*)? / {
if $in-codeblock {
$in-codeblock = False;
} else {
$in-codeblock = True;
my $lang = ~($0 // '');
if $lang && $current-section && !$current-section.configblock.exist('language') {
$current-section.configblock.set('language', $lang);
}
}
next;
}
# 代码块里的内容统统进来
if $in-codeblock {
$rawstr ~= $line ~ "\n";
}
}
# 遍历结束保存最后一个section的codeblock
if $rawstr && $current-section {
$current-section.codeblock = $rawstr;
}
}
method find-parent-section($new-level) {
# 从@!sections尾部向前找找到第一个level小于new-level的section作为父节点
for @!sections.reverse -> $section {
if $section.level < $new-level {
return $section;
}
}
return Nil; # 没有找到父节点,说明是 root section
}
# 调试方法扁平打印所有sections
method debug-print-sections-flatly() {
say "====== sections ======";
for @!sections.kv -> $i, $section {
my $title = $section.title || "(Root)";
my $has-config = $section.configblock.keys ?? "" !! "";
my $has-code = $section.codeblock ?? "" !! "";
say " [$i] Level {$section.level}: $title - $has-config, $has-code";
}
}
# 调试方法层级打印sections
method debug-print-sections-hierarchyly() {
say "====== hierarchy ======";
my $indent = 0;
# 嵌套的格式化函数
my sub format-section($section, $level) {
my $prefix = ' ' x $level;
my $title = $section.title // '(Root)';
return "{$prefix}- {$title} (level {$section.level})";
}
# 嵌套的递归打印函数
my sub print-section-tree($section, $level) {
say format-section($section, $level);
if $section.has-children {
for $section.children -> $child {
print-section-tree($child, $level + 1);
}
}
}
my $root = self.root-section();
print-section-tree($root, $indent);
}
# 调试方法:完整的调试信息打印
method debug-print-summary() {
self.debug-print-sections-flatly();
self.debug-print-sections-hierarchyly();
}
}