Basic implementation of rawstr4c

This commit is contained in:
Aoran Zeng 2025-07-12 19:16:08 +08:00
parent 747c29e85f
commit a54f704719
No known key found for this signature in database
GPG Key ID: 8F8BA8488E10ED98

View File

@ -7,6 +7,10 @@
# Last Modified : <2025-07-12> # Last Modified : <2025-07-12>
# #
# Generate raw strings for C programming language # Generate raw strings for C programming language
#
# ---------------------------------------------------------------
# 全局设置称为 Global config其他均称为 section config
# 每一部分称为 section我们要处理的input都在 code block 里,我们简称为 block
# --------------------------------------------------------------- # ---------------------------------------------------------------
unless @*ARGS { unless @*ARGS {
@ -15,37 +19,20 @@ unless @*ARGS {
my $markdown-file = @*ARGS[0]; my $markdown-file = @*ARGS[0];
# 确保文件存在
unless $markdown-file.IO.e { unless $markdown-file.IO.e {
die "Error: File '$markdown-file' not found.\n"; die "Error: File '$markdown-file' not found.\n";
} }
# 解析配置信息
sub parse-config($content) {
my %config;
for $content.lines -> $line {
# say "Processing line: $line";
if $line ~~ /^ '-' \s* 'translate' \s* '=' \s* '`' ':' (<[a..z\-]>+) '`' / {
%config<translation> = ~$0;
say "Global translation mode: " ~ %config<translation>;
last;
}
}
return %config;
}
# 根据转换模式处理字符 # 根据转换模式处理字符
sub convert-char($char, $mode) { sub convert-char($char, $mode) {
my $byte = $char.encode('UTF-8')[0];
given $mode { given $mode {
when 'octal' { when 'oct' {
return "\\" ~ sprintf("%03o", $byte); my $bytes = $char.encode('UTF-8');
return $bytes.map({ "\\" ~ sprintf("%03o", $_) }).join('');
} }
when 'hex' { when 'hex' {
return "\\x" ~ sprintf("%02x", $byte); my $bytes = $char.encode('UTF-8');
return $bytes.map({ "\\x" ~ sprintf("%02x", $_) }).join('');
} }
when 'escape' { when 'escape' {
# 只转义必要的字符 # 只转义必要的字符
@ -66,24 +53,170 @@ sub convert-char($char, $mode) {
} }
} }
# 处理字符串转换
sub process-content($content, $mode) {
my $result = "";
for $content.comb -> $char {
$result ~= convert-char($char, $mode);
}
return $result;
}
# 生成变量名
sub generate-variable-name($global-config, $section-config, $title) {
my $prefix = $global-config<prefix> // "_rawstr4c";
my $postfix = $global-config<postfix> // "";
# 处理前缀
$prefix = $prefix.subst(/^'`'/, '').subst(/'`'$/, '');
# 处理后缀
if $postfix {
$postfix = $postfix.subst(/^':'/, '');
if $section-config<language> {
my $lang = $section-config<language>.subst(/^'`'/, '').subst(/'`'$/, '');
$postfix = $postfix.subst('use-language', "in_$lang");
} else {
$postfix = $postfix.subst('use-language', ''); # 默认为无语言
}
}
# 生成的变量名称
my $name = $section-config<name> // $title.lc;
$name = $name.subst(/^'`'/, '').subst(/'`'$/, '');
# 处理标题中包含的空格
$name = $name.subst(/\s+/, '_', :g);
my $var-name = $prefix;
if $name {
$var-name ~= "_" ~ $name;
}
if $postfix {
$var-name ~= "_" ~ $postfix;
}
return $var-name;
}
#`( 真正的 main 流程开始
)
my $content = $markdown-file.IO.slurp; my $content = $markdown-file.IO.slurp;
my @lines = $content.lines;
my %config = parse-config($content); my %global-config;
my $translation-mode = %config<translation> // 'octal'; my @sections;
my $current-section;
my $in-global = True;
my $in-code-block = False; my $in-code-block = False;
for $content.lines -> $line { my $current-code = "";
if $line ~~ /^ '```' / {
# 遇到代码块的开始或结束标记 for @lines -> $line {
$in-code-block = !$in-code-block; # 检查标题级别 (注意,要确保不在代码块内)
next; # 跳过代码块标记行 if !$in-code-block && $line ~~ /^ '#' ('#'*) \s* (.+) / {
my $level = 1 + $0.chars;
my $title = ~$1;
# 如果之前有代码块,保存
if $current-code && $current-section && $current-section<title> {
$current-section<code> = $current-code;
@sections.push: $current-section;
}
# 重置代码块
$current-code = "";
# 一级标题后面是全局配置其他级别标题开始新的section
if $level == 1 {
$in-global = True;
$current-section = {};
} else {
$in-global = False;
$current-section = {
title => $title,
level => $level,
config => {},
};
}
next;
}
# 解析配置项
if $line ~~ /^ '-' \s* (<[a..z\-]>+) \s* '=' \s* '`' (.+?) '`' / {
my $key = ~$0;
my $value = ~$1;
if $in-global {
%global-config{$key} = $value;
} elsif $current-section {
$current-section<config>{$key} = $value;
}
next;
}
# 处理代码块,即真正想要转换的 raw string
# 检测代码块语言并记录
if $line ~~ /^ '```' (.*)? / {
if $in-code-block {
# 代码块结束
$in-code-block = False;
} else {
# 代码块开始,记录语言
$in-code-block = True;
my $lang = ~($0 // '');
if $lang && $current-section {
# 如果代码块指定了语言且当前section没有language配置则自动设置
unless $current-section<config><language> {
$current-section<config><language> = $lang;
}
}
}
next;
} }
if $in-code-block { if $in-code-block {
# 如果在代码块内部,处理每一行 $current-code ~= $line ~ "\n";
for $line.comb -> $char {
print convert-char($char, $translation-mode);
}
print convert-char("\n", $translation-mode); # 处理换行符
} }
} }
# 保存最后一个代码块
if $current-code && $current-section && $current-section<title> {
$current-section<code> = $current-code;
@sections.push: $current-section;
}
# 解析完毕,最后输出结果
say "Global config:";
for %global-config.kv -> $k, $v {
say " $k = $v";
}
say "";
say "Found " ~ @sections.elems ~ " sections:";
for @sections -> $section {
say "Section: " ~ $section<title>;
}
say "";
for @sections -> $section {
say "=== Section: " ~ $section<title> ~ " ===";
# 确定转换模式 (section config 优先,否则使用 global config)
my $translate = $section<config><translate> // %global-config<translate> // ':oct';
$translate = $translate.subst(/^':'/, '');
# 生成变量名
my $var-name = generate-variable-name(%global-config, $section<config>, $section<title>);
say "Variable name: $var-name";
say "Translation mode: $translate";
my $language = $section<config><language>;
say "Language: $language";
if $section<code> {
say '';
say 'char ' ~ $var-name ~ '[] = "' ~ process-content($section<code>, $translate) ~ '";';
}
say "\n";
}