410 lines
14 KiB
C#
410 lines
14 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.ComponentModel.DataAnnotations;
|
||
using System.Dynamic;
|
||
using System.Linq;
|
||
using System.Text;
|
||
using System.Threading.Tasks;
|
||
|
||
namespace CompilerDesignIFlr1
|
||
{
|
||
internal class LR1Creator
|
||
{
|
||
internal string StartSymbol { get; init; }
|
||
internal Dictionary<int, LR1Closure> Closures = [];
|
||
|
||
// 终结符
|
||
internal Dictionary<string, LR1Unit> TokenUnit = [];
|
||
|
||
// 非终结符 考虑 或 的情况
|
||
internal Dictionary<string, List<LR1Unit>> GrammarUnit = [];
|
||
|
||
// First集
|
||
internal Dictionary<string, HashSet<string>> FirstGroup = [];
|
||
internal Dictionary<int, LR1Unit> UnitIndex = [];
|
||
|
||
internal LR1Creator(GrammarReader reader)
|
||
{
|
||
TokenUnit.Add("End", new LR1Unit("End", "#"));
|
||
foreach (var (key, value) in reader.SymbolToken)
|
||
{
|
||
TokenUnit.Add(key, new LR1Unit(key, value));
|
||
}
|
||
foreach (var (key, value) in reader.KeyToken)
|
||
{
|
||
TokenUnit.Add(key, new LR1Unit(key, value));
|
||
}
|
||
foreach (var (key, value) in reader.RegexToken)
|
||
{
|
||
TokenUnit.Add(key, new LR1Unit(key, value));
|
||
}
|
||
foreach (var (key, _) in reader.RegularToken)
|
||
{
|
||
if (key.StartsWith("@skip"))
|
||
continue;
|
||
if (key.StartsWith("@top"))
|
||
GrammarUnit.Add(key.Substring(5), []);
|
||
else
|
||
GrammarUnit.Add(key, []);
|
||
}
|
||
string startSymbol = "";
|
||
foreach (var (key, value) in reader.RegularToken)
|
||
{
|
||
if (key.StartsWith("@skip"))
|
||
continue;
|
||
if (key.StartsWith("@top"))
|
||
{
|
||
startSymbol = key.Substring(5);
|
||
GrammarUnitCreate(value, key.Substring(5));
|
||
}
|
||
else
|
||
{
|
||
GrammarUnitCreate(value, key);
|
||
}
|
||
}
|
||
SetGrammarIndex();
|
||
PrintGrammar();
|
||
foreach (var (key, _) in TokenUnit)
|
||
GetFirst(key);
|
||
foreach (var (key, _) in GrammarUnit)
|
||
GetFirst(key);
|
||
PrintFirst();
|
||
StartSymbol = startSymbol;
|
||
I0Creator();
|
||
CalculateAllClosure();
|
||
PrintClosure(Enumerable.Range(0, Closures.Count).ToArray());
|
||
}
|
||
|
||
internal void SetGrammarIndex()
|
||
{
|
||
int id = 0;
|
||
foreach (var (key, units) in GrammarUnit)
|
||
foreach (var unit in units)
|
||
{
|
||
unit.Id = id++;
|
||
UnitIndex[unit.Id] = unit;
|
||
}
|
||
}
|
||
|
||
internal void PrintClosure(params int[] index)
|
||
{
|
||
foreach (int t in index)
|
||
{
|
||
//Console.WriteLine($"闭包{t}:");
|
||
Console.WriteLine(Closures[t]);
|
||
//var closure = Closures[t];
|
||
//foreach (var unit in closure.Units)
|
||
//{
|
||
// Console.Write($"{unit.Name} ::= ");
|
||
// Console.WriteLine(unit);
|
||
//}
|
||
}
|
||
}
|
||
|
||
internal void I0Creator()
|
||
{
|
||
var closure = new LR1Closure(this);
|
||
foreach (var root in GrammarUnit[StartSymbol])
|
||
{
|
||
var res = root.Clone();
|
||
res.Prospect.Add("End");
|
||
closure.Units.Add(res);
|
||
}
|
||
closure.AddMissingUnits();
|
||
//foreach (var (key, value) in GrammarUnit)
|
||
//{
|
||
// if (key == StartSymbol)
|
||
// continue;
|
||
// value.ForEach(x => closure.Units.Add(x.Clone()));
|
||
//}
|
||
closure.CalculateProspects();
|
||
Closures.Add(closure.Index, closure);
|
||
}
|
||
|
||
internal HashSet<string> GetFirst(string root)
|
||
{
|
||
if (FirstGroup.TryGetValue(root, out var ans))
|
||
return ans;
|
||
if (GrammarUnit.TryGetValue(root, out var units))
|
||
{
|
||
HashSet<string> res = [];
|
||
foreach (var unit in units)
|
||
{
|
||
if (FirstGroup.TryGetValue(root, out var list))
|
||
res.UnionWith(list);
|
||
else
|
||
{
|
||
if (unit.Type == "Token")
|
||
res.UnionWith(GetFirst(unit.Name));
|
||
else if (unit.Type == "Grammar")
|
||
{
|
||
var val = unit.Grammar.FirstOrDefault();
|
||
if (val == null)
|
||
continue;
|
||
res.UnionWith(GetFirst(val));
|
||
}
|
||
}
|
||
}
|
||
FirstGroup.Add(root, res);
|
||
return res;
|
||
}
|
||
else if (TokenUnit.TryGetValue(root, out var unit))
|
||
{
|
||
FirstGroup.Add(root, [unit.Name]);
|
||
return [unit.Name];
|
||
}
|
||
else
|
||
{
|
||
throw new Exception("Grammar can't be recognized. " + root);
|
||
}
|
||
}
|
||
|
||
internal void CalculateAllClosure()
|
||
{
|
||
int t = 0;
|
||
bool haveNew = true;
|
||
while (Closures.ContainsKey(t))
|
||
{
|
||
haveNew = false;
|
||
if (Closures.TryGetValue(t, out var closure))
|
||
{
|
||
List<LR1Closure> nextClosures = closure.NextClosures();
|
||
if (nextClosures.Count > 0)
|
||
haveNew = true;
|
||
nextClosures.ForEach(x => Closures.Add(x.Index, x));
|
||
}
|
||
t++;
|
||
}
|
||
if (!Closures.ContainsKey(t) && haveNew)
|
||
throw new Exception("Really weird. This situation shouldn't be possible.");
|
||
}
|
||
|
||
internal string GrammarUnitCreate(string value, string name = "")
|
||
{
|
||
if (value.EndsWith('*') || value.EndsWith('+'))
|
||
{
|
||
string s;
|
||
if (value.StartsWith('(') && value[^2] == ')')
|
||
{
|
||
s = DeClosure(value.Substring(1, value.Length - 3), value[^1]);
|
||
if (name != "")
|
||
{
|
||
GrammarUnit[name] = [new LR1Unit(name, [s])];
|
||
}
|
||
return s;
|
||
}
|
||
else if (value.IndexOf(' ') == -1)
|
||
{
|
||
s = DeClosure(value.Substring(0, value.Length - 1), value[^1]);
|
||
if (name != "")
|
||
{
|
||
GrammarUnit[name] = [new LR1Unit(name, [s])];
|
||
}
|
||
return s;
|
||
}
|
||
}
|
||
bool inParen = false;
|
||
int last = 0;
|
||
name = name.Length == 0 ? NameGenerator(value) : name;
|
||
List<List<string>> ans = [];
|
||
List<string> grammar = [];
|
||
value = value + ' ';
|
||
for (var i = 0; i < value.Length; i++)
|
||
{
|
||
char c = value[i];
|
||
if (inParen && c != ')')
|
||
continue;
|
||
switch (c)
|
||
{
|
||
case '(':
|
||
last = i;
|
||
inParen = true;
|
||
break;
|
||
case ')':
|
||
{
|
||
inParen = false;
|
||
break;
|
||
}
|
||
case ' ':
|
||
{
|
||
string subValue = value.Substring(last, i - last);
|
||
last = i + 1;
|
||
if (GrammarUnit.ContainsKey(subValue.Trim()))
|
||
grammar.Add(subValue.Trim());
|
||
else if (TokenUnit.ContainsKey(subValue.Trim()))
|
||
grammar.Add(subValue.Trim());
|
||
else if (subValue.StartsWith('(') && subValue.EndsWith(')'))
|
||
grammar.Add(GrammarUnitCreate(subValue.Substring(1, subValue.Length - 2)));
|
||
else
|
||
{
|
||
if (subValue.Trim().Length == 0)
|
||
break;
|
||
throw new Exception("Grammar can't be recognized. " + subValue);
|
||
}
|
||
break;
|
||
}
|
||
case '|':
|
||
{
|
||
string subValue = value.Substring(last, i - last);
|
||
if (subValue.Trim().TrimEnd('|').Length != 0)
|
||
{
|
||
if (GrammarUnit.ContainsKey(subValue.Trim()))
|
||
grammar.Add(subValue.Trim());
|
||
else if (TokenUnit.ContainsKey(subValue.Trim()))
|
||
grammar.Add(subValue.Trim());
|
||
else
|
||
throw new Exception("Grammar can't be recognized. " + subValue);
|
||
}
|
||
ans.Add(grammar);
|
||
grammar = [];
|
||
last = i + 1;
|
||
break;
|
||
}
|
||
case '*':
|
||
{
|
||
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
|
||
grammar.Add(subString);
|
||
last = i + 1;
|
||
break;
|
||
}
|
||
case '+':
|
||
{
|
||
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
|
||
grammar.Add(subString);
|
||
last = i + 1;
|
||
break;
|
||
}
|
||
default:
|
||
break;
|
||
}
|
||
}
|
||
if (grammar.Count > 0)
|
||
ans.Add(grammar);
|
||
if (GrammarUnit.TryGetValue(name, out var val))
|
||
GrammarUnit[name] = ToLR1Units(name, ans);
|
||
else
|
||
GrammarUnit.Add(name, ToLR1Units(name, ans));
|
||
return name;
|
||
}
|
||
|
||
// 嵌套解包直接报错!!
|
||
internal string DeClosure(string value, char type)
|
||
{
|
||
if (value.IndexOf('+') != -1 || value.IndexOf("*") != -1)
|
||
throw new Exception("Grammar too complex. " + value);
|
||
string name = NameGenerator(value);
|
||
List<List<string>> ans = [];
|
||
List<string> res = [];
|
||
if (type == '*')
|
||
{
|
||
ans.Add([]);
|
||
var grammars = value.Split('|').Select(x => x.Split(' ').Where(x => x.Length != 0));
|
||
foreach (var grammar in grammars)
|
||
{
|
||
res = [];
|
||
foreach (var gram in grammar)
|
||
{
|
||
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
|
||
res.Add(gram);
|
||
else
|
||
{
|
||
Console.WriteLine(gram.Length);
|
||
throw new Exception("Unknown grammar. " + gram);
|
||
}
|
||
}
|
||
//ans.Add([name,..res]);
|
||
ans.Add([.. res, name]);
|
||
}
|
||
}
|
||
else if (type == '+')
|
||
{
|
||
var grammars = value.Split('|').Select(x => x.Trim().Split(' '));
|
||
foreach (var grammar in grammars)
|
||
{
|
||
res = [];
|
||
foreach (var gram in grammar)
|
||
{
|
||
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
|
||
res.Add(gram);
|
||
else
|
||
throw new Exception("Unknown grammar. " + gram);
|
||
}
|
||
ans.Add([.. res]);
|
||
|
||
//ans.Add([name,.. res]);
|
||
ans.Add([.. res, name]);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
throw new Exception("Unknown type. " + type);
|
||
}
|
||
if (GrammarUnit.TryGetValue(name, out var val))
|
||
GrammarUnit[name] = ToLR1Units(name, ans);
|
||
else
|
||
GrammarUnit.Add(name, ToLR1Units(name, ans));
|
||
return name;
|
||
}
|
||
|
||
internal string NameGenerator(string key)
|
||
{
|
||
var list = key.Replace('(', ' ')
|
||
.Replace(')', ' ')
|
||
.Replace('*', ' ')
|
||
.Replace('|', ' ')
|
||
.Replace(" ", " ")
|
||
.Split(' ')
|
||
.Select(x => x.Trim());
|
||
var res = String.Join('_', list);
|
||
for (int i = 0; i < 9; i++)
|
||
if (!GrammarUnit.ContainsKey(res + '_' + i))
|
||
return res + '_' + i;
|
||
throw new Exception("Grammar too Complex:" + key);
|
||
}
|
||
|
||
internal List<LR1Unit> ToLR1Units(string name, List<List<string>> grammars)
|
||
{
|
||
var res = new List<LR1Unit>();
|
||
foreach (var grammar in grammars)
|
||
{
|
||
res.Add(new LR1Unit(name, grammar));
|
||
}
|
||
return res;
|
||
}
|
||
|
||
private void PrintGrammar()
|
||
{
|
||
Console.WriteLine("文法:");
|
||
foreach (var (key, value) in GrammarUnit)
|
||
{
|
||
Console.WriteLine("key: " + key);
|
||
foreach (var unit in value)
|
||
{
|
||
Console.WriteLine(" " + unit);
|
||
}
|
||
}
|
||
}
|
||
|
||
internal void PrintFirst()
|
||
{
|
||
Console.WriteLine("First集:");
|
||
foreach (var (key, value) in FirstGroup)
|
||
{
|
||
Console.Write($"{key}: ");
|
||
foreach (var val in value)
|
||
Console.Write($"{val} ");
|
||
Console.WriteLine();
|
||
}
|
||
}
|
||
|
||
internal List<LR1Unit> GetUnits(string name)
|
||
{
|
||
if (TokenUnit.TryGetValue(name, out var unit))
|
||
return [unit];
|
||
if (GrammarUnit.TryGetValue(name, out var units))
|
||
return units;
|
||
throw new Exception($"No Unit Called: {name}");
|
||
}
|
||
}
|
||
}
|