CompilerDesignIFLR1/CompilerDesignIflr1/LR1Creator.cs

402 lines
14 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

namespace CompilerDesignIFlr1
{
internal class LR1Creator
{
internal string StartSymbol { get; init; }
internal Dictionary<int, LR1Closure> Closures = [];
// 终结符
internal Dictionary<string, LR1Unit> TokenUnit = [];
// 非终结符 考虑 或 的情况
internal Dictionary<string, List<LR1Unit>> GrammarUnit = [];
// First集
internal Dictionary<string, HashSet<string>> FirstGroup = [];
internal Dictionary<int, LR1Unit> UnitIndex = [];
internal LR1Creator(GrammarReader reader)
{
TokenUnit.Add("End", new LR1Unit("End", "#"));
foreach (var (key, value) in reader.SymbolToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, value) in reader.KeyToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, value) in reader.RegexToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, _) in reader.RegularToken)
{
if (key.StartsWith("@skip"))
continue;
if (key.StartsWith("@top"))
GrammarUnit.Add(key.Substring(5), []);
else
GrammarUnit.Add(key, []);
}
string startSymbol = "";
foreach (var (key, value) in reader.RegularToken)
{
if (key.StartsWith("@skip"))
continue;
if (key.StartsWith("@top"))
{
startSymbol = key.Substring(5);
GrammarUnitCreate(value, key.Substring(5));
}
else
{
GrammarUnitCreate(value, key);
}
}
SetGrammarIndex();
PrintGrammar();
foreach (var (key, _) in TokenUnit)
GetFirst(key);
foreach (var (key, _) in GrammarUnit)
GetFirst(key);
PrintFirst();
StartSymbol = startSymbol;
I0Creator();
CalculateAllClosure();
PrintClosure(Enumerable.Range(0, Closures.Count).ToArray());
}
internal void SetGrammarIndex()
{
int id = 0;
foreach (var (key, units) in GrammarUnit)
foreach (var unit in units)
{
unit.Id = id++;
UnitIndex[unit.Id] = unit;
}
}
internal void PrintClosure(params int[] index)
{
foreach (int t in index)
{
//Console.WriteLine($"闭包{t}:");
Console.WriteLine(Closures[t]);
//var closure = Closures[t];
//foreach (var unit in closure.Units)
//{
// Console.Write($"{unit.Name} ::= ");
// Console.WriteLine(unit);
//}
}
}
internal void I0Creator()
{
var closure = new LR1Closure(this);
foreach (var root in GrammarUnit[StartSymbol])
{
var res = root.Clone();
res.Prospect.Add("End");
closure.Units.Add(res);
}
closure.AddMissingUnits();
//foreach (var (key, value) in GrammarUnit)
//{
// if (key == StartSymbol)
// continue;
// value.ForEach(x => closure.Units.Add(x.Clone()));
//}
closure.CalculateProspects();
Closures.Add(closure.Index, closure);
}
internal HashSet<string> GetFirst(string root)
{
if (FirstGroup.TryGetValue(root, out var ans))
return ans;
if (GrammarUnit.TryGetValue(root, out var units))
{
HashSet<string> res = [];
foreach (var unit in units)
{
if (FirstGroup.TryGetValue(root, out var list))
res.UnionWith(list);
else
{
if (unit.Type == "Token")
res.UnionWith(GetFirst(unit.Name));
else if (unit.Type == "Grammar")
{
var val = unit.Grammar.FirstOrDefault();
if (val == null)
continue;
res.UnionWith(GetFirst(val));
}
}
}
FirstGroup.Add(root, res);
return res;
}
else if (TokenUnit.TryGetValue(root, out var unit))
{
FirstGroup.Add(root, [unit.Name]);
return [unit.Name];
}
else
{
throw new Exception("Grammar can't be recognized. " + root);
}
}
internal void CalculateAllClosure()
{
int t = 0;
bool haveNew = true;
while (Closures.ContainsKey(t))
{
haveNew = false;
if (Closures.TryGetValue(t, out var closure))
{
List<LR1Closure> nextClosures = closure.NextClosures();
if (nextClosures.Count > 0)
haveNew = true;
nextClosures.ForEach(x => Closures.Add(x.Index, x));
}
t++;
}
if (!Closures.ContainsKey(t) && haveNew)
throw new Exception("Really weird. This situation shouldn't be possible.");
}
internal string GrammarUnitCreate(string value, string name = "")
{
if (value.EndsWith('*') || value.EndsWith('+'))
{
string s;
if (value.StartsWith('(') && value[^2] == ')')
{
s = DeClosure(value.Substring(1, value.Length - 3), value[^1]);
if (name != "")
{
GrammarUnit[name] = [new LR1Unit(name, [s])];
}
return s;
}
else if (value.IndexOf(' ') == -1)
{
s = DeClosure(value.Substring(0, value.Length - 1), value[^1]);
if (name != "")
{
GrammarUnit[name] = [new LR1Unit(name, [s])];
}
return s;
}
}
bool inParen = false;
int last = 0;
name = name.Length == 0 ? NameGenerator(value) : name;
List<List<string>> ans = [];
List<string> grammar = [];
value = value + ' ';
for (var i = 0; i < value.Length; i++)
{
char c = value[i];
if (inParen && c != ')')
continue;
switch (c)
{
case '(':
last = i;
inParen = true;
break;
case ')':
{
inParen = false;
break;
}
case ' ':
{
string subValue = value.Substring(last, i - last);
last = i + 1;
if (GrammarUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (TokenUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (subValue.StartsWith('(') && subValue.EndsWith(')'))
grammar.Add(GrammarUnitCreate(subValue.Substring(1, subValue.Length - 2)));
else
{
if (subValue.Trim().Length == 0)
break;
throw new Exception("Grammar can't be recognized. " + subValue);
}
break;
}
case '|':
{
string subValue = value.Substring(last, i - last);
if (subValue.Trim().TrimEnd('|').Length != 0)
{
if (GrammarUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (TokenUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else
throw new Exception("Grammar can't be recognized. " + subValue);
}
ans.Add(grammar);
grammar = [];
last = i + 1;
break;
}
case '*':
{
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
grammar.Add(subString);
last = i + 1;
break;
}
case '+':
{
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
grammar.Add(subString);
last = i + 1;
break;
}
default:
break;
}
}
if (grammar.Count > 0)
ans.Add(grammar);
if (GrammarUnit.TryGetValue(name, out var val))
GrammarUnit[name] = ToLR1Units(name, ans);
else
GrammarUnit.Add(name, ToLR1Units(name, ans));
return name;
}
// 嵌套解包直接报错!!
internal string DeClosure(string value, char type)
{
if (value.IndexOf('+') != -1 || value.IndexOf("*") != -1)
throw new Exception("Grammar too complex. " + value);
string name = NameGenerator(value);
List<List<string>> ans = [];
List<string> res = [];
if (type == '*')
{
ans.Add([]);
var grammars = value.Split('|').Select(x => x.Split(' ').Where(x => x.Length != 0));
foreach (var grammar in grammars)
{
res = [];
foreach (var gram in grammar)
{
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
res.Add(gram);
else
{
Console.WriteLine(gram.Length);
throw new Exception("Unknown grammar. " + gram);
}
}
//ans.Add([name,..res]);
ans.Add([.. res, name]);
}
}
else if (type == '+')
{
var grammars = value.Split('|').Select(x => x.Trim().Split(' '));
foreach (var grammar in grammars)
{
res = [];
foreach (var gram in grammar)
{
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
res.Add(gram);
else
throw new Exception("Unknown grammar. " + gram);
}
ans.Add([.. res]);
//ans.Add([name,.. res]);
ans.Add([.. res, name]);
}
}
else
{
throw new Exception("Unknown type. " + type);
}
if (GrammarUnit.TryGetValue(name, out var val))
GrammarUnit[name] = ToLR1Units(name, ans);
else
GrammarUnit.Add(name, ToLR1Units(name, ans));
return name;
}
internal string NameGenerator(string key)
{
var list = key.Replace('(', ' ')
.Replace(')', ' ')
.Replace('*', ' ')
.Replace('|', ' ')
.Replace(" ", " ")
.Split(' ')
.Select(x => x.Trim());
var res = String.Join('_', list);
for (int i = 0; i < 9; i++)
if (!GrammarUnit.ContainsKey(res + '_' + i))
return res + '_' + i;
throw new Exception("Grammar too Complex:" + key);
}
internal List<LR1Unit> ToLR1Units(string name, List<List<string>> grammars)
{
var res = new List<LR1Unit>();
foreach (var grammar in grammars)
{
res.Add(new LR1Unit(name, grammar));
}
return res;
}
private void PrintGrammar()
{
Console.WriteLine("文法:");
foreach (var (key, value) in GrammarUnit)
{
Console.WriteLine("key: " + key);
foreach (var unit in value)
{
Console.WriteLine(" " + unit);
}
}
}
internal void PrintFirst()
{
Console.WriteLine("First集");
foreach (var (key, value) in FirstGroup)
{
Console.Write($"{key}: ");
foreach (var val in value)
Console.Write($"{val} ");
Console.WriteLine();
}
}
internal List<LR1Unit> GetUnits(string name)
{
if (TokenUnit.TryGetValue(name, out var unit))
return [unit];
if (GrammarUnit.TryGetValue(name, out var units))
return units;
throw new Exception($"No Unit Called: {name}");
}
}
}