2024-12-27 01:25:50 +08:00
|
|
|
|
namespace CompilerDesignIFlr1
|
2024-12-23 01:42:58 +08:00
|
|
|
|
{
|
|
|
|
|
internal class LR1Creator
|
|
|
|
|
{
|
|
|
|
|
internal string StartSymbol { get; init; }
|
|
|
|
|
internal Dictionary<int, LR1Closure> Closures = [];
|
|
|
|
|
|
|
|
|
|
// 终结符
|
|
|
|
|
internal Dictionary<string, LR1Unit> TokenUnit = [];
|
|
|
|
|
|
|
|
|
|
// 非终结符 考虑 或 的情况
|
|
|
|
|
internal Dictionary<string, List<LR1Unit>> GrammarUnit = [];
|
|
|
|
|
|
|
|
|
|
// First集
|
|
|
|
|
internal Dictionary<string, HashSet<string>> FirstGroup = [];
|
|
|
|
|
internal Dictionary<int, LR1Unit> UnitIndex = [];
|
|
|
|
|
|
|
|
|
|
internal LR1Creator(GrammarReader reader)
|
|
|
|
|
{
|
|
|
|
|
TokenUnit.Add("End", new LR1Unit("End", "#"));
|
|
|
|
|
foreach (var (key, value) in reader.SymbolToken)
|
|
|
|
|
{
|
|
|
|
|
TokenUnit.Add(key, new LR1Unit(key, value));
|
|
|
|
|
}
|
|
|
|
|
foreach (var (key, value) in reader.KeyToken)
|
|
|
|
|
{
|
|
|
|
|
TokenUnit.Add(key, new LR1Unit(key, value));
|
|
|
|
|
}
|
|
|
|
|
foreach (var (key, value) in reader.RegexToken)
|
|
|
|
|
{
|
|
|
|
|
TokenUnit.Add(key, new LR1Unit(key, value));
|
|
|
|
|
}
|
|
|
|
|
foreach (var (key, _) in reader.RegularToken)
|
|
|
|
|
{
|
|
|
|
|
if (key.StartsWith("@skip"))
|
|
|
|
|
continue;
|
|
|
|
|
if (key.StartsWith("@top"))
|
|
|
|
|
GrammarUnit.Add(key.Substring(5), []);
|
|
|
|
|
else
|
|
|
|
|
GrammarUnit.Add(key, []);
|
|
|
|
|
}
|
|
|
|
|
string startSymbol = "";
|
|
|
|
|
foreach (var (key, value) in reader.RegularToken)
|
|
|
|
|
{
|
|
|
|
|
if (key.StartsWith("@skip"))
|
|
|
|
|
continue;
|
|
|
|
|
if (key.StartsWith("@top"))
|
|
|
|
|
{
|
|
|
|
|
startSymbol = key.Substring(5);
|
|
|
|
|
GrammarUnitCreate(value, key.Substring(5));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
GrammarUnitCreate(value, key);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
SetGrammarIndex();
|
|
|
|
|
PrintGrammar();
|
|
|
|
|
foreach (var (key, _) in TokenUnit)
|
|
|
|
|
GetFirst(key);
|
|
|
|
|
foreach (var (key, _) in GrammarUnit)
|
|
|
|
|
GetFirst(key);
|
|
|
|
|
PrintFirst();
|
|
|
|
|
StartSymbol = startSymbol;
|
|
|
|
|
I0Creator();
|
|
|
|
|
CalculateAllClosure();
|
|
|
|
|
PrintClosure(Enumerable.Range(0, Closures.Count).ToArray());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal void SetGrammarIndex()
|
|
|
|
|
{
|
|
|
|
|
int id = 0;
|
|
|
|
|
foreach (var (key, units) in GrammarUnit)
|
|
|
|
|
foreach (var unit in units)
|
|
|
|
|
{
|
|
|
|
|
unit.Id = id++;
|
|
|
|
|
UnitIndex[unit.Id] = unit;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal void PrintClosure(params int[] index)
|
|
|
|
|
{
|
|
|
|
|
foreach (int t in index)
|
|
|
|
|
{
|
|
|
|
|
//Console.WriteLine($"闭包{t}:");
|
|
|
|
|
Console.WriteLine(Closures[t]);
|
|
|
|
|
//var closure = Closures[t];
|
|
|
|
|
//foreach (var unit in closure.Units)
|
|
|
|
|
//{
|
|
|
|
|
// Console.Write($"{unit.Name} ::= ");
|
|
|
|
|
// Console.WriteLine(unit);
|
|
|
|
|
//}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal void I0Creator()
|
|
|
|
|
{
|
|
|
|
|
var closure = new LR1Closure(this);
|
|
|
|
|
foreach (var root in GrammarUnit[StartSymbol])
|
|
|
|
|
{
|
|
|
|
|
var res = root.Clone();
|
|
|
|
|
res.Prospect.Add("End");
|
|
|
|
|
closure.Units.Add(res);
|
|
|
|
|
}
|
|
|
|
|
closure.AddMissingUnits();
|
|
|
|
|
//foreach (var (key, value) in GrammarUnit)
|
|
|
|
|
//{
|
|
|
|
|
// if (key == StartSymbol)
|
|
|
|
|
// continue;
|
|
|
|
|
// value.ForEach(x => closure.Units.Add(x.Clone()));
|
|
|
|
|
//}
|
|
|
|
|
closure.CalculateProspects();
|
|
|
|
|
Closures.Add(closure.Index, closure);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal HashSet<string> GetFirst(string root)
|
|
|
|
|
{
|
|
|
|
|
if (FirstGroup.TryGetValue(root, out var ans))
|
|
|
|
|
return ans;
|
|
|
|
|
if (GrammarUnit.TryGetValue(root, out var units))
|
|
|
|
|
{
|
|
|
|
|
HashSet<string> res = [];
|
|
|
|
|
foreach (var unit in units)
|
|
|
|
|
{
|
|
|
|
|
if (FirstGroup.TryGetValue(root, out var list))
|
|
|
|
|
res.UnionWith(list);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (unit.Type == "Token")
|
|
|
|
|
res.UnionWith(GetFirst(unit.Name));
|
|
|
|
|
else if (unit.Type == "Grammar")
|
|
|
|
|
{
|
|
|
|
|
var val = unit.Grammar.FirstOrDefault();
|
|
|
|
|
if (val == null)
|
|
|
|
|
continue;
|
|
|
|
|
res.UnionWith(GetFirst(val));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
FirstGroup.Add(root, res);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
else if (TokenUnit.TryGetValue(root, out var unit))
|
|
|
|
|
{
|
|
|
|
|
FirstGroup.Add(root, [unit.Name]);
|
|
|
|
|
return [unit.Name];
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
throw new Exception("Grammar can't be recognized. " + root);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal void CalculateAllClosure()
|
|
|
|
|
{
|
|
|
|
|
int t = 0;
|
|
|
|
|
bool haveNew = true;
|
|
|
|
|
while (Closures.ContainsKey(t))
|
|
|
|
|
{
|
|
|
|
|
haveNew = false;
|
|
|
|
|
if (Closures.TryGetValue(t, out var closure))
|
|
|
|
|
{
|
|
|
|
|
List<LR1Closure> nextClosures = closure.NextClosures();
|
|
|
|
|
if (nextClosures.Count > 0)
|
|
|
|
|
haveNew = true;
|
|
|
|
|
nextClosures.ForEach(x => Closures.Add(x.Index, x));
|
|
|
|
|
}
|
|
|
|
|
t++;
|
|
|
|
|
}
|
|
|
|
|
if (!Closures.ContainsKey(t) && haveNew)
|
|
|
|
|
throw new Exception("Really weird. This situation shouldn't be possible.");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal string GrammarUnitCreate(string value, string name = "")
|
|
|
|
|
{
|
|
|
|
|
if (value.EndsWith('*') || value.EndsWith('+'))
|
|
|
|
|
{
|
|
|
|
|
string s;
|
|
|
|
|
if (value.StartsWith('(') && value[^2] == ')')
|
|
|
|
|
{
|
|
|
|
|
s = DeClosure(value.Substring(1, value.Length - 3), value[^1]);
|
|
|
|
|
if (name != "")
|
|
|
|
|
{
|
|
|
|
|
GrammarUnit[name] = [new LR1Unit(name, [s])];
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
else if (value.IndexOf(' ') == -1)
|
|
|
|
|
{
|
|
|
|
|
s = DeClosure(value.Substring(0, value.Length - 1), value[^1]);
|
|
|
|
|
if (name != "")
|
|
|
|
|
{
|
|
|
|
|
GrammarUnit[name] = [new LR1Unit(name, [s])];
|
|
|
|
|
}
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
bool inParen = false;
|
|
|
|
|
int last = 0;
|
|
|
|
|
name = name.Length == 0 ? NameGenerator(value) : name;
|
|
|
|
|
List<List<string>> ans = [];
|
|
|
|
|
List<string> grammar = [];
|
|
|
|
|
value = value + ' ';
|
|
|
|
|
for (var i = 0; i < value.Length; i++)
|
|
|
|
|
{
|
|
|
|
|
char c = value[i];
|
|
|
|
|
if (inParen && c != ')')
|
|
|
|
|
continue;
|
|
|
|
|
switch (c)
|
|
|
|
|
{
|
|
|
|
|
case '(':
|
|
|
|
|
last = i;
|
|
|
|
|
inParen = true;
|
|
|
|
|
break;
|
|
|
|
|
case ')':
|
|
|
|
|
{
|
|
|
|
|
inParen = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case ' ':
|
|
|
|
|
{
|
|
|
|
|
string subValue = value.Substring(last, i - last);
|
|
|
|
|
last = i + 1;
|
2024-12-24 16:17:20 +08:00
|
|
|
|
if (GrammarUnit.ContainsKey(subValue.Trim()))
|
|
|
|
|
grammar.Add(subValue.Trim());
|
|
|
|
|
else if (TokenUnit.ContainsKey(subValue.Trim()))
|
|
|
|
|
grammar.Add(subValue.Trim());
|
|
|
|
|
else if (subValue.StartsWith('(') && subValue.EndsWith(')'))
|
|
|
|
|
grammar.Add(GrammarUnitCreate(subValue.Substring(1, subValue.Length - 2)));
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (subValue.Trim().Length == 0)
|
|
|
|
|
break;
|
|
|
|
|
throw new Exception("Grammar can't be recognized. " + subValue);
|
|
|
|
|
}
|
2024-12-23 01:42:58 +08:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case '|':
|
|
|
|
|
{
|
|
|
|
|
string subValue = value.Substring(last, i - last);
|
|
|
|
|
if (subValue.Trim().TrimEnd('|').Length != 0)
|
|
|
|
|
{
|
|
|
|
|
if (GrammarUnit.ContainsKey(subValue.Trim()))
|
|
|
|
|
grammar.Add(subValue.Trim());
|
|
|
|
|
else if (TokenUnit.ContainsKey(subValue.Trim()))
|
|
|
|
|
grammar.Add(subValue.Trim());
|
|
|
|
|
else
|
|
|
|
|
throw new Exception("Grammar can't be recognized. " + subValue);
|
|
|
|
|
}
|
|
|
|
|
ans.Add(grammar);
|
|
|
|
|
grammar = [];
|
|
|
|
|
last = i + 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case '*':
|
|
|
|
|
{
|
|
|
|
|
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
|
|
|
|
|
grammar.Add(subString);
|
|
|
|
|
last = i + 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case '+':
|
|
|
|
|
{
|
|
|
|
|
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
|
|
|
|
|
grammar.Add(subString);
|
|
|
|
|
last = i + 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (grammar.Count > 0)
|
|
|
|
|
ans.Add(grammar);
|
|
|
|
|
if (GrammarUnit.TryGetValue(name, out var val))
|
|
|
|
|
GrammarUnit[name] = ToLR1Units(name, ans);
|
|
|
|
|
else
|
|
|
|
|
GrammarUnit.Add(name, ToLR1Units(name, ans));
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 嵌套解包直接报错!!
|
|
|
|
|
internal string DeClosure(string value, char type)
|
|
|
|
|
{
|
|
|
|
|
if (value.IndexOf('+') != -1 || value.IndexOf("*") != -1)
|
|
|
|
|
throw new Exception("Grammar too complex. " + value);
|
|
|
|
|
string name = NameGenerator(value);
|
|
|
|
|
List<List<string>> ans = [];
|
|
|
|
|
List<string> res = [];
|
|
|
|
|
if (type == '*')
|
|
|
|
|
{
|
|
|
|
|
ans.Add([]);
|
|
|
|
|
var grammars = value.Split('|').Select(x => x.Split(' ').Where(x => x.Length != 0));
|
|
|
|
|
foreach (var grammar in grammars)
|
|
|
|
|
{
|
|
|
|
|
res = [];
|
|
|
|
|
foreach (var gram in grammar)
|
|
|
|
|
{
|
|
|
|
|
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
|
|
|
|
|
res.Add(gram);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine(gram.Length);
|
|
|
|
|
throw new Exception("Unknown grammar. " + gram);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//ans.Add([name,..res]);
|
|
|
|
|
ans.Add([.. res, name]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (type == '+')
|
|
|
|
|
{
|
|
|
|
|
var grammars = value.Split('|').Select(x => x.Trim().Split(' '));
|
|
|
|
|
foreach (var grammar in grammars)
|
|
|
|
|
{
|
|
|
|
|
res = [];
|
|
|
|
|
foreach (var gram in grammar)
|
|
|
|
|
{
|
|
|
|
|
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
|
|
|
|
|
res.Add(gram);
|
|
|
|
|
else
|
|
|
|
|
throw new Exception("Unknown grammar. " + gram);
|
|
|
|
|
}
|
|
|
|
|
ans.Add([.. res]);
|
|
|
|
|
|
|
|
|
|
//ans.Add([name,.. res]);
|
|
|
|
|
ans.Add([.. res, name]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
throw new Exception("Unknown type. " + type);
|
|
|
|
|
}
|
|
|
|
|
if (GrammarUnit.TryGetValue(name, out var val))
|
|
|
|
|
GrammarUnit[name] = ToLR1Units(name, ans);
|
|
|
|
|
else
|
|
|
|
|
GrammarUnit.Add(name, ToLR1Units(name, ans));
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal string NameGenerator(string key)
|
|
|
|
|
{
|
|
|
|
|
var list = key.Replace('(', ' ')
|
|
|
|
|
.Replace(')', ' ')
|
|
|
|
|
.Replace('*', ' ')
|
|
|
|
|
.Replace('|', ' ')
|
|
|
|
|
.Replace(" ", " ")
|
|
|
|
|
.Split(' ')
|
|
|
|
|
.Select(x => x.Trim());
|
|
|
|
|
var res = String.Join('_', list);
|
|
|
|
|
for (int i = 0; i < 9; i++)
|
|
|
|
|
if (!GrammarUnit.ContainsKey(res + '_' + i))
|
|
|
|
|
return res + '_' + i;
|
|
|
|
|
throw new Exception("Grammar too Complex:" + key);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal List<LR1Unit> ToLR1Units(string name, List<List<string>> grammars)
|
|
|
|
|
{
|
|
|
|
|
var res = new List<LR1Unit>();
|
|
|
|
|
foreach (var grammar in grammars)
|
|
|
|
|
{
|
|
|
|
|
res.Add(new LR1Unit(name, grammar));
|
|
|
|
|
}
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void PrintGrammar()
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("文法:");
|
|
|
|
|
foreach (var (key, value) in GrammarUnit)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("key: " + key);
|
|
|
|
|
foreach (var unit in value)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine(" " + unit);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal void PrintFirst()
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("First集:");
|
|
|
|
|
foreach (var (key, value) in FirstGroup)
|
|
|
|
|
{
|
|
|
|
|
Console.Write($"{key}: ");
|
|
|
|
|
foreach (var val in value)
|
|
|
|
|
Console.Write($"{val} ");
|
|
|
|
|
Console.WriteLine();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
internal List<LR1Unit> GetUnits(string name)
|
|
|
|
|
{
|
|
|
|
|
if (TokenUnit.TryGetValue(name, out var unit))
|
|
|
|
|
return [unit];
|
|
|
|
|
if (GrammarUnit.TryGetValue(name, out var units))
|
|
|
|
|
return units;
|
|
|
|
|
throw new Exception($"No Unit Called: {name}");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|