CompilerDesignIFLR1/CompilerDesignIflr1/LR1Creator.cs

410 lines
14 KiB
C#
Raw Normal View History

2024-12-23 01:42:58 +08:00
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.Dynamic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace CompilerDesignIFlr1
{
internal class LR1Creator
{
internal string StartSymbol { get; init; }
internal Dictionary<int, LR1Closure> Closures = [];
// 终结符
internal Dictionary<string, LR1Unit> TokenUnit = [];
// 非终结符 考虑 或 的情况
internal Dictionary<string, List<LR1Unit>> GrammarUnit = [];
// First集
internal Dictionary<string, HashSet<string>> FirstGroup = [];
internal Dictionary<int, LR1Unit> UnitIndex = [];
internal LR1Creator(GrammarReader reader)
{
TokenUnit.Add("End", new LR1Unit("End", "#"));
foreach (var (key, value) in reader.SymbolToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, value) in reader.KeyToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, value) in reader.RegexToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, _) in reader.RegularToken)
{
if (key.StartsWith("@skip"))
continue;
if (key.StartsWith("@top"))
GrammarUnit.Add(key.Substring(5), []);
else
GrammarUnit.Add(key, []);
}
string startSymbol = "";
foreach (var (key, value) in reader.RegularToken)
{
if (key.StartsWith("@skip"))
continue;
if (key.StartsWith("@top"))
{
startSymbol = key.Substring(5);
GrammarUnitCreate(value, key.Substring(5));
}
else
{
GrammarUnitCreate(value, key);
}
}
SetGrammarIndex();
PrintGrammar();
foreach (var (key, _) in TokenUnit)
GetFirst(key);
foreach (var (key, _) in GrammarUnit)
GetFirst(key);
PrintFirst();
StartSymbol = startSymbol;
I0Creator();
CalculateAllClosure();
PrintClosure(Enumerable.Range(0, Closures.Count).ToArray());
}
internal void SetGrammarIndex()
{
int id = 0;
foreach (var (key, units) in GrammarUnit)
foreach (var unit in units)
{
unit.Id = id++;
UnitIndex[unit.Id] = unit;
}
}
internal void PrintClosure(params int[] index)
{
foreach (int t in index)
{
//Console.WriteLine($"闭包{t}:");
Console.WriteLine(Closures[t]);
//var closure = Closures[t];
//foreach (var unit in closure.Units)
//{
// Console.Write($"{unit.Name} ::= ");
// Console.WriteLine(unit);
//}
}
}
internal void I0Creator()
{
var closure = new LR1Closure(this);
foreach (var root in GrammarUnit[StartSymbol])
{
var res = root.Clone();
res.Prospect.Add("End");
closure.Units.Add(res);
}
closure.AddMissingUnits();
//foreach (var (key, value) in GrammarUnit)
//{
// if (key == StartSymbol)
// continue;
// value.ForEach(x => closure.Units.Add(x.Clone()));
//}
closure.CalculateProspects();
Closures.Add(closure.Index, closure);
}
internal HashSet<string> GetFirst(string root)
{
if (FirstGroup.TryGetValue(root, out var ans))
return ans;
if (GrammarUnit.TryGetValue(root, out var units))
{
HashSet<string> res = [];
foreach (var unit in units)
{
if (FirstGroup.TryGetValue(root, out var list))
res.UnionWith(list);
else
{
if (unit.Type == "Token")
res.UnionWith(GetFirst(unit.Name));
else if (unit.Type == "Grammar")
{
var val = unit.Grammar.FirstOrDefault();
if (val == null)
continue;
res.UnionWith(GetFirst(val));
}
}
}
FirstGroup.Add(root, res);
return res;
}
else if (TokenUnit.TryGetValue(root, out var unit))
{
FirstGroup.Add(root, [unit.Name]);
return [unit.Name];
}
else
{
throw new Exception("Grammar can't be recognized. " + root);
}
}
internal void CalculateAllClosure()
{
int t = 0;
bool haveNew = true;
while (Closures.ContainsKey(t))
{
haveNew = false;
if (Closures.TryGetValue(t, out var closure))
{
List<LR1Closure> nextClosures = closure.NextClosures();
if (nextClosures.Count > 0)
haveNew = true;
nextClosures.ForEach(x => Closures.Add(x.Index, x));
}
t++;
}
if (!Closures.ContainsKey(t) && haveNew)
throw new Exception("Really weird. This situation shouldn't be possible.");
}
internal string GrammarUnitCreate(string value, string name = "")
{
if (value.EndsWith('*') || value.EndsWith('+'))
{
string s;
if (value.StartsWith('(') && value[^2] == ')')
{
s = DeClosure(value.Substring(1, value.Length - 3), value[^1]);
if (name != "")
{
GrammarUnit[name] = [new LR1Unit(name, [s])];
}
return s;
}
else if (value.IndexOf(' ') == -1)
{
s = DeClosure(value.Substring(0, value.Length - 1), value[^1]);
if (name != "")
{
GrammarUnit[name] = [new LR1Unit(name, [s])];
}
return s;
}
}
bool inParen = false;
int last = 0;
name = name.Length == 0 ? NameGenerator(value) : name;
List<List<string>> ans = [];
List<string> grammar = [];
value = value + ' ';
for (var i = 0; i < value.Length; i++)
{
char c = value[i];
if (inParen && c != ')')
continue;
switch (c)
{
case '(':
last = i;
inParen = true;
break;
case ')':
{
inParen = false;
break;
}
case ' ':
{
string subValue = value.Substring(last, i - last);
last = i + 1;
2024-12-24 16:17:20 +08:00
if (GrammarUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (TokenUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (subValue.StartsWith('(') && subValue.EndsWith(')'))
grammar.Add(GrammarUnitCreate(subValue.Substring(1, subValue.Length - 2)));
else
{
if (subValue.Trim().Length == 0)
break;
throw new Exception("Grammar can't be recognized. " + subValue);
}
2024-12-23 01:42:58 +08:00
break;
}
case '|':
{
string subValue = value.Substring(last, i - last);
if (subValue.Trim().TrimEnd('|').Length != 0)
{
if (GrammarUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (TokenUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else
throw new Exception("Grammar can't be recognized. " + subValue);
}
ans.Add(grammar);
grammar = [];
last = i + 1;
break;
}
case '*':
{
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
grammar.Add(subString);
last = i + 1;
break;
}
case '+':
{
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
grammar.Add(subString);
last = i + 1;
break;
}
default:
break;
}
}
if (grammar.Count > 0)
ans.Add(grammar);
if (GrammarUnit.TryGetValue(name, out var val))
GrammarUnit[name] = ToLR1Units(name, ans);
else
GrammarUnit.Add(name, ToLR1Units(name, ans));
return name;
}
// 嵌套解包直接报错!!
internal string DeClosure(string value, char type)
{
if (value.IndexOf('+') != -1 || value.IndexOf("*") != -1)
throw new Exception("Grammar too complex. " + value);
string name = NameGenerator(value);
List<List<string>> ans = [];
List<string> res = [];
if (type == '*')
{
ans.Add([]);
var grammars = value.Split('|').Select(x => x.Split(' ').Where(x => x.Length != 0));
foreach (var grammar in grammars)
{
res = [];
foreach (var gram in grammar)
{
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
res.Add(gram);
else
{
Console.WriteLine(gram.Length);
throw new Exception("Unknown grammar. " + gram);
}
}
//ans.Add([name,..res]);
ans.Add([.. res, name]);
}
}
else if (type == '+')
{
var grammars = value.Split('|').Select(x => x.Trim().Split(' '));
foreach (var grammar in grammars)
{
res = [];
foreach (var gram in grammar)
{
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
res.Add(gram);
else
throw new Exception("Unknown grammar. " + gram);
}
ans.Add([.. res]);
//ans.Add([name,.. res]);
ans.Add([.. res, name]);
}
}
else
{
throw new Exception("Unknown type. " + type);
}
if (GrammarUnit.TryGetValue(name, out var val))
GrammarUnit[name] = ToLR1Units(name, ans);
else
GrammarUnit.Add(name, ToLR1Units(name, ans));
return name;
}
internal string NameGenerator(string key)
{
var list = key.Replace('(', ' ')
.Replace(')', ' ')
.Replace('*', ' ')
.Replace('|', ' ')
.Replace(" ", " ")
.Split(' ')
.Select(x => x.Trim());
var res = String.Join('_', list);
for (int i = 0; i < 9; i++)
if (!GrammarUnit.ContainsKey(res + '_' + i))
return res + '_' + i;
throw new Exception("Grammar too Complex:" + key);
}
internal List<LR1Unit> ToLR1Units(string name, List<List<string>> grammars)
{
var res = new List<LR1Unit>();
foreach (var grammar in grammars)
{
res.Add(new LR1Unit(name, grammar));
}
return res;
}
private void PrintGrammar()
{
Console.WriteLine("文法:");
foreach (var (key, value) in GrammarUnit)
{
Console.WriteLine("key: " + key);
foreach (var unit in value)
{
Console.WriteLine(" " + unit);
}
}
}
internal void PrintFirst()
{
Console.WriteLine("First集");
foreach (var (key, value) in FirstGroup)
{
Console.Write($"{key}: ");
foreach (var val in value)
Console.Write($"{val} ");
Console.WriteLine();
}
}
internal List<LR1Unit> GetUnits(string name)
{
if (TokenUnit.TryGetValue(name, out var unit))
return [unit];
if (GrammarUnit.TryGetValue(name, out var units))
return units;
throw new Exception($"No Unit Called: {name}");
}
}
}