[feature] 完成语法分析
This commit is contained in:
parent
235500f8ce
commit
3b735a43b8
|
@ -7,4 +7,13 @@
|
|||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="files\code">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="files\if-grammar.grammar">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
|
|
@ -1,16 +1,124 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using System.Text;
|
||||
|
||||
namespace CompilerDesignIFlr1
|
||||
{
|
||||
internal class GrammarReader
|
||||
{
|
||||
public GrammarReader(Uri grammarFilePath)
|
||||
{
|
||||
//词法分析解决
|
||||
internal Dictionary<string, string> KeyToken { get; set; } = [];
|
||||
|
||||
//此法分析解决
|
||||
internal Dictionary<string, string> SymbolToken { get; set; } = [];
|
||||
|
||||
//词法分析解决
|
||||
internal Dictionary<string, string> RegexToken { get; set; } = [];
|
||||
|
||||
//语法分析解决
|
||||
internal Dictionary<string, string> RegularToken { get; set; } = [];
|
||||
|
||||
internal GrammarReader(string grammarFilePath)
|
||||
{
|
||||
string fileContent = File.ReadAllText(grammarFilePath);
|
||||
var dict = fileContent.PartParser();
|
||||
|
||||
var tokenDict = dict["@tokens"].PartParser();
|
||||
foreach (var (key, value) in tokenDict)
|
||||
{
|
||||
if (value[1] == '!' && value[2] != '=')
|
||||
KeyToken.Add(key, value.DeDoubleQuote().Substring(1));
|
||||
else if (value.Contains('$'))
|
||||
RegexToken.Add(key, value.Replace("$", "").DeDoubleQuote());
|
||||
else
|
||||
SymbolToken.Add(key, value.DeDoubleQuote());
|
||||
}
|
||||
Console.WriteLine("Key:");
|
||||
foreach (var (key, value) in KeyToken)
|
||||
Console.WriteLine($"{key}: {value}");
|
||||
Console.WriteLine("Symbol:");
|
||||
foreach (var (key, value) in SymbolToken)
|
||||
Console.WriteLine($"{key}: {value}");
|
||||
Console.WriteLine("Regex:");
|
||||
foreach (var (key, value) in RegexToken)
|
||||
Console.WriteLine($"{key}: {value}");
|
||||
foreach (var (key, value) in dict)
|
||||
{
|
||||
if (key == "@tokens")
|
||||
continue;
|
||||
RegularToken.Add(key, value);
|
||||
Console.WriteLine(key + ": " + value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file static class StringExtension
|
||||
{
|
||||
internal static Dictionary<string, string> PartParser(this string input)
|
||||
{
|
||||
Dictionary<string, string> result = [];
|
||||
input = input.Replace("\n", " ").Replace("\r", " ");
|
||||
int start = 0;
|
||||
int layer = 0;
|
||||
string key = "";
|
||||
string value = "";
|
||||
bool inQuote = false;
|
||||
for (int i = 0; i < input.Length; i++)
|
||||
{
|
||||
char c = input[i];
|
||||
switch (c)
|
||||
{
|
||||
case '"':
|
||||
inQuote = !inQuote;
|
||||
break;
|
||||
case '{':
|
||||
if (inQuote)
|
||||
break;
|
||||
if (layer++ == 0)
|
||||
{
|
||||
key = input.Substring(start, i - start);
|
||||
start = i + 1;
|
||||
}
|
||||
break;
|
||||
case '}':
|
||||
if (inQuote)
|
||||
break;
|
||||
if (layer-- == 1)
|
||||
{
|
||||
value = input.Substring(start, i - start);
|
||||
result.Add(key.Trim(), value.Trim());
|
||||
start = i + 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
internal static string DeDoubleQuote(this string s)
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
bool inLayer = false;
|
||||
for (int i = 0; i < s.Length; i++)
|
||||
{
|
||||
if (s[i] == '"')
|
||||
{
|
||||
if (!inLayer)
|
||||
{
|
||||
inLayer = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sb[^1] == '\\')
|
||||
{
|
||||
sb[^1] = '\"';
|
||||
}
|
||||
else
|
||||
inLayer = false;
|
||||
}
|
||||
}
|
||||
else if (s[i] != ' ')
|
||||
sb.Append(s[i]);
|
||||
}
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
@ -8,5 +9,198 @@ namespace CompilerDesignIFlr1
|
|||
{
|
||||
internal class LR1Closure
|
||||
{
|
||||
internal static int GlobalIndex = 0;
|
||||
internal int Index = 0;
|
||||
internal LR1Creator LR1Creator;
|
||||
internal HashSet<LR1Unit> Units { get; set; } = [];
|
||||
internal Dictionary<string, int> Next { get; set; } = [];
|
||||
|
||||
internal LR1Closure(LR1Creator lr1Creator)
|
||||
{
|
||||
LR1Creator = lr1Creator;
|
||||
Index = GlobalIndex++;
|
||||
}
|
||||
|
||||
internal LR1Closure(LR1Creator lr1Creator, List<LR1Unit> units)
|
||||
{
|
||||
Units.UnionWith(units);
|
||||
Index = GlobalIndex++;
|
||||
LR1Creator = lr1Creator;
|
||||
AddMissingUnits();
|
||||
CalculateProspects();
|
||||
}
|
||||
|
||||
internal void CalculateProspects()
|
||||
{
|
||||
//Queue<LR1Unit> queue = [];
|
||||
//var startUnit = Units.Where(x=>x.Name == LR1Creator.StartSymbol).First();
|
||||
//startUnit.Prospect.Add("End");
|
||||
|
||||
//queue.Enqueue(startUnit);
|
||||
//while (queue.Count > 0)
|
||||
//{
|
||||
// var unit = queue.Dequeue();
|
||||
// var left = unit.Name;
|
||||
// var units = UnitsHaveDotBefore(left);
|
||||
// var next = unit.Next();
|
||||
// if (next is not null && !LR1Creator.TokenUnit.ContainsKey(next))
|
||||
// foreach (var item in Units.Where(x => x.Name == next))
|
||||
// queue.Enqueue(item);
|
||||
// HashSet<string> prospect = units
|
||||
// .Select(x => GetProspectsOf(x))
|
||||
// .Aggregate(
|
||||
// new HashSet<string>(),
|
||||
// (set, x) =>
|
||||
// {
|
||||
// set.UnionWith(x);
|
||||
// return set;
|
||||
// }
|
||||
// );
|
||||
// unit.Prospect.UnionWith(prospect);
|
||||
//}
|
||||
|
||||
bool haveChange = true;
|
||||
while (haveChange)
|
||||
{
|
||||
haveChange = false;
|
||||
foreach (var unit in Units)
|
||||
{
|
||||
var left = unit.Name;
|
||||
var units = UnitsHaveDotBefore(left);
|
||||
HashSet<string> prospect = units
|
||||
.Select(x => GetProspectsOf(x))
|
||||
.Aggregate(
|
||||
unit.Prospect,
|
||||
(set, x) =>
|
||||
{
|
||||
set.UnionWith(x);
|
||||
return set;
|
||||
}
|
||||
);
|
||||
if (!prospect.IsSubsetOf(unit.Prospect))
|
||||
haveChange = true;
|
||||
unit.Prospect.UnionWith(prospect);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal List<LR1Unit> UnitsHaveDotBefore(string name)
|
||||
{
|
||||
List<LR1Unit> res = [];
|
||||
foreach (LR1Unit unit in Units)
|
||||
{
|
||||
if (unit.Next() == name)
|
||||
res.Add(unit);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
// 对于文法 X => yz...
|
||||
// 该方法返回 A => .XB 中的B 以及B可能为空时其后的元素
|
||||
internal List<string> GetProspectsOf(LR1Unit unit)
|
||||
{
|
||||
if (unit.Name == LR1Creator.StartSymbol)
|
||||
return ["End"];
|
||||
//if(unit.Prospect.Count != 0)
|
||||
//{
|
||||
// throw new Exception("有环???");
|
||||
//}
|
||||
|
||||
List<string> ans = unit.PointPosition + 1 == unit.Grammar.Count ? [.. unit.Prospect] : [];
|
||||
for (int i = unit.PointPosition + 1; i < unit.Grammar.Count; i++)
|
||||
{
|
||||
var units = LR1Creator.GetUnits(unit.Grammar[i]);
|
||||
ans.AddRange(LR1Creator.FirstGroup[unit.Grammar[i]]);
|
||||
if (units.All(x => !x.Nullable()))
|
||||
break;
|
||||
else if (i == unit.Grammar.Count - 1)
|
||||
ans.AddRange(unit.Prospect);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
internal void AddMissingUnits()
|
||||
{
|
||||
bool haveNew = true;
|
||||
while (haveNew)
|
||||
{
|
||||
HashSet<LR1Unit> addingUnits = [];
|
||||
haveNew = false;
|
||||
foreach (LR1Unit unit in Units)
|
||||
{
|
||||
string? next = unit.Next();
|
||||
if (next is null)
|
||||
continue;
|
||||
if (LR1Creator.GrammarUnit.TryGetValue(next, out var value))
|
||||
{
|
||||
var set = new HashSet<LR1Unit>(value.Select(x => x.Clone()));
|
||||
if (set.IsSubsetOf(Units))
|
||||
continue;
|
||||
haveNew = true;
|
||||
addingUnits.UnionWith(set);
|
||||
}
|
||||
}
|
||||
Units.UnionWith(addingUnits);
|
||||
}
|
||||
}
|
||||
internal List<LR1Closure> NextClosures()
|
||||
{
|
||||
List<LR1Closure> ans = [];
|
||||
Dictionary<string, List<LR1Unit>> nextSteps = [];
|
||||
foreach(LR1Unit unit in Units)
|
||||
{
|
||||
string? next = unit.Next();
|
||||
if (next is null)
|
||||
continue;
|
||||
if (nextSteps.TryGetValue(next, out var units))
|
||||
units.Add(unit);
|
||||
else
|
||||
nextSteps.Add(next, [unit]);
|
||||
}
|
||||
foreach(var (next,value) in nextSteps)
|
||||
{
|
||||
var closure = new LR1Closure(LR1Creator, value.Select(x => x.ToNext()).ToList());
|
||||
var existClosure = LR1Creator.Closures.Values.Where(x => x.Equals(closure)).FirstOrDefault();
|
||||
if(existClosure is not null)
|
||||
{
|
||||
LR1Closure.GlobalIndex--;
|
||||
Next.Add(next,existClosure.Index);
|
||||
continue;
|
||||
}
|
||||
ans.Add(closure);
|
||||
Next.Add(next, closure.Index);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
internal List<LR1Unit> GetReduceUnits()
|
||||
=> Units.Where(x => x.ReadyToReduce()).ToList();
|
||||
|
||||
public override bool Equals(object? obj)
|
||||
{
|
||||
if (obj is not LR1Closure closure || obj is null)
|
||||
return false;
|
||||
|
||||
return closure.Units.Count == Units.Count && closure.Units.Select(x => Units.Where(y => y.Equals(x)).Any()).All(x=> x);
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
int hash = 0;
|
||||
foreach (var unit in Units.OrderBy(e => e.GetHashCode()))
|
||||
{
|
||||
hash ^= unit.GetHashCode();
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder sb = new();
|
||||
sb.Append($"闭包{Index}:\n");
|
||||
foreach (var unit in Units)
|
||||
{
|
||||
sb.Append(unit + "\n");
|
||||
}
|
||||
return sb.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,407 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using System.Dynamic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace CompilerDesignIFlr1
|
||||
{
|
||||
internal class LR1Creator
|
||||
{
|
||||
internal string StartSymbol { get; init; }
|
||||
internal Dictionary<int, LR1Closure> Closures = [];
|
||||
|
||||
// 终结符
|
||||
internal Dictionary<string, LR1Unit> TokenUnit = [];
|
||||
|
||||
// 非终结符 考虑 或 的情况
|
||||
internal Dictionary<string, List<LR1Unit>> GrammarUnit = [];
|
||||
|
||||
// First集
|
||||
internal Dictionary<string, HashSet<string>> FirstGroup = [];
|
||||
internal Dictionary<int, LR1Unit> UnitIndex = [];
|
||||
|
||||
internal LR1Creator(GrammarReader reader)
|
||||
{
|
||||
TokenUnit.Add("End", new LR1Unit("End", "#"));
|
||||
foreach (var (key, value) in reader.SymbolToken)
|
||||
{
|
||||
TokenUnit.Add(key, new LR1Unit(key, value));
|
||||
}
|
||||
foreach (var (key, value) in reader.KeyToken)
|
||||
{
|
||||
TokenUnit.Add(key, new LR1Unit(key, value));
|
||||
}
|
||||
foreach (var (key, value) in reader.RegexToken)
|
||||
{
|
||||
TokenUnit.Add(key, new LR1Unit(key, value));
|
||||
}
|
||||
foreach (var (key, _) in reader.RegularToken)
|
||||
{
|
||||
if (key.StartsWith("@skip"))
|
||||
continue;
|
||||
if (key.StartsWith("@top"))
|
||||
GrammarUnit.Add(key.Substring(5), []);
|
||||
else
|
||||
GrammarUnit.Add(key, []);
|
||||
}
|
||||
string startSymbol = "";
|
||||
foreach (var (key, value) in reader.RegularToken)
|
||||
{
|
||||
if (key.StartsWith("@skip"))
|
||||
continue;
|
||||
if (key.StartsWith("@top"))
|
||||
{
|
||||
startSymbol = key.Substring(5);
|
||||
GrammarUnitCreate(value, key.Substring(5));
|
||||
}
|
||||
else
|
||||
{
|
||||
GrammarUnitCreate(value, key);
|
||||
}
|
||||
}
|
||||
SetGrammarIndex();
|
||||
PrintGrammar();
|
||||
foreach (var (key, _) in TokenUnit)
|
||||
GetFirst(key);
|
||||
foreach (var (key, _) in GrammarUnit)
|
||||
GetFirst(key);
|
||||
PrintFirst();
|
||||
StartSymbol = startSymbol;
|
||||
I0Creator();
|
||||
CalculateAllClosure();
|
||||
PrintClosure(Enumerable.Range(0, Closures.Count).ToArray());
|
||||
}
|
||||
|
||||
internal void SetGrammarIndex()
|
||||
{
|
||||
int id = 0;
|
||||
foreach (var (key, units) in GrammarUnit)
|
||||
foreach (var unit in units)
|
||||
{
|
||||
unit.Id = id++;
|
||||
UnitIndex[unit.Id] = unit;
|
||||
}
|
||||
}
|
||||
|
||||
internal void PrintClosure(params int[] index)
|
||||
{
|
||||
foreach (int t in index)
|
||||
{
|
||||
//Console.WriteLine($"闭包{t}:");
|
||||
Console.WriteLine(Closures[t]);
|
||||
//var closure = Closures[t];
|
||||
//foreach (var unit in closure.Units)
|
||||
//{
|
||||
// Console.Write($"{unit.Name} ::= ");
|
||||
// Console.WriteLine(unit);
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
internal void I0Creator()
|
||||
{
|
||||
var closure = new LR1Closure(this);
|
||||
foreach (var root in GrammarUnit[StartSymbol])
|
||||
{
|
||||
var res = root.Clone();
|
||||
res.Prospect.Add("End");
|
||||
closure.Units.Add(res);
|
||||
}
|
||||
closure.AddMissingUnits();
|
||||
//foreach (var (key, value) in GrammarUnit)
|
||||
//{
|
||||
// if (key == StartSymbol)
|
||||
// continue;
|
||||
// value.ForEach(x => closure.Units.Add(x.Clone()));
|
||||
//}
|
||||
closure.CalculateProspects();
|
||||
Closures.Add(closure.Index, closure);
|
||||
}
|
||||
|
||||
internal HashSet<string> GetFirst(string root)
|
||||
{
|
||||
if (FirstGroup.TryGetValue(root, out var ans))
|
||||
return ans;
|
||||
if (GrammarUnit.TryGetValue(root, out var units))
|
||||
{
|
||||
HashSet<string> res = [];
|
||||
foreach (var unit in units)
|
||||
{
|
||||
if (FirstGroup.TryGetValue(root, out var list))
|
||||
res.UnionWith(list);
|
||||
else
|
||||
{
|
||||
if (unit.Type == "Token")
|
||||
res.UnionWith(GetFirst(unit.Name));
|
||||
else if (unit.Type == "Grammar")
|
||||
{
|
||||
var val = unit.Grammar.FirstOrDefault();
|
||||
if (val == null)
|
||||
continue;
|
||||
res.UnionWith(GetFirst(val));
|
||||
}
|
||||
}
|
||||
}
|
||||
FirstGroup.Add(root, res);
|
||||
return res;
|
||||
}
|
||||
else if (TokenUnit.TryGetValue(root, out var unit))
|
||||
{
|
||||
FirstGroup.Add(root, [unit.Name]);
|
||||
return [unit.Name];
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception("Grammar can't be recognized. " + root);
|
||||
}
|
||||
}
|
||||
|
||||
internal void CalculateAllClosure()
|
||||
{
|
||||
int t = 0;
|
||||
bool haveNew = true;
|
||||
while (Closures.ContainsKey(t))
|
||||
{
|
||||
haveNew = false;
|
||||
if (Closures.TryGetValue(t, out var closure))
|
||||
{
|
||||
List<LR1Closure> nextClosures = closure.NextClosures();
|
||||
if (nextClosures.Count > 0)
|
||||
haveNew = true;
|
||||
nextClosures.ForEach(x => Closures.Add(x.Index, x));
|
||||
}
|
||||
t++;
|
||||
}
|
||||
if (!Closures.ContainsKey(t) && haveNew)
|
||||
throw new Exception("Really weird. This situation shouldn't be possible.");
|
||||
}
|
||||
|
||||
internal string GrammarUnitCreate(string value, string name = "")
|
||||
{
|
||||
if (value.EndsWith('*') || value.EndsWith('+'))
|
||||
{
|
||||
string s;
|
||||
if (value.StartsWith('(') && value[^2] == ')')
|
||||
{
|
||||
s = DeClosure(value.Substring(1, value.Length - 3), value[^1]);
|
||||
if (name != "")
|
||||
{
|
||||
GrammarUnit[name] = [new LR1Unit(name, [s])];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
else if (value.IndexOf(' ') == -1)
|
||||
{
|
||||
s = DeClosure(value.Substring(0, value.Length - 1), value[^1]);
|
||||
if (name != "")
|
||||
{
|
||||
GrammarUnit[name] = [new LR1Unit(name, [s])];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
}
|
||||
bool inParen = false;
|
||||
int last = 0;
|
||||
name = name.Length == 0 ? NameGenerator(value) : name;
|
||||
List<List<string>> ans = [];
|
||||
List<string> grammar = [];
|
||||
value = value + ' ';
|
||||
for (var i = 0; i < value.Length; i++)
|
||||
{
|
||||
char c = value[i];
|
||||
if (inParen && c != ')')
|
||||
continue;
|
||||
switch (c)
|
||||
{
|
||||
case '(':
|
||||
last = i;
|
||||
inParen = true;
|
||||
break;
|
||||
case ')':
|
||||
{
|
||||
inParen = false;
|
||||
break;
|
||||
}
|
||||
case ' ':
|
||||
{
|
||||
string subValue = value.Substring(last, i - last);
|
||||
last = i + 1;
|
||||
if (GrammarUnit.ContainsKey(subValue.Trim()))
|
||||
grammar.Add(subValue.Trim());
|
||||
else if (TokenUnit.ContainsKey(subValue.Trim()))
|
||||
grammar.Add(subValue.Trim());
|
||||
else
|
||||
{
|
||||
if (subValue.Trim().Length == 0)
|
||||
break;
|
||||
throw new Exception("Grammar can't be recognized. " + subValue);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '|':
|
||||
{
|
||||
string subValue = value.Substring(last, i - last);
|
||||
if (subValue.Trim().TrimEnd('|').Length != 0)
|
||||
{
|
||||
if (GrammarUnit.ContainsKey(subValue.Trim()))
|
||||
grammar.Add(subValue.Trim());
|
||||
else if (TokenUnit.ContainsKey(subValue.Trim()))
|
||||
grammar.Add(subValue.Trim());
|
||||
else
|
||||
throw new Exception("Grammar can't be recognized. " + subValue);
|
||||
}
|
||||
ans.Add(grammar);
|
||||
grammar = [];
|
||||
last = i + 1;
|
||||
break;
|
||||
}
|
||||
case '*':
|
||||
{
|
||||
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
|
||||
grammar.Add(subString);
|
||||
last = i + 1;
|
||||
break;
|
||||
}
|
||||
case '+':
|
||||
{
|
||||
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
|
||||
grammar.Add(subString);
|
||||
last = i + 1;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (grammar.Count > 0)
|
||||
ans.Add(grammar);
|
||||
if (GrammarUnit.TryGetValue(name, out var val))
|
||||
GrammarUnit[name] = ToLR1Units(name, ans);
|
||||
else
|
||||
GrammarUnit.Add(name, ToLR1Units(name, ans));
|
||||
return name;
|
||||
}
|
||||
|
||||
// 嵌套解包直接报错!!
|
||||
internal string DeClosure(string value, char type)
|
||||
{
|
||||
if (value.IndexOf('+') != -1 || value.IndexOf("*") != -1)
|
||||
throw new Exception("Grammar too complex. " + value);
|
||||
string name = NameGenerator(value);
|
||||
List<List<string>> ans = [];
|
||||
List<string> res = [];
|
||||
if (type == '*')
|
||||
{
|
||||
ans.Add([]);
|
||||
var grammars = value.Split('|').Select(x => x.Split(' ').Where(x => x.Length != 0));
|
||||
foreach (var grammar in grammars)
|
||||
{
|
||||
res = [];
|
||||
foreach (var gram in grammar)
|
||||
{
|
||||
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
|
||||
res.Add(gram);
|
||||
else
|
||||
{
|
||||
Console.WriteLine(gram.Length);
|
||||
throw new Exception("Unknown grammar. " + gram);
|
||||
}
|
||||
}
|
||||
//ans.Add([name,..res]);
|
||||
ans.Add([.. res, name]);
|
||||
}
|
||||
}
|
||||
else if (type == '+')
|
||||
{
|
||||
var grammars = value.Split('|').Select(x => x.Trim().Split(' '));
|
||||
foreach (var grammar in grammars)
|
||||
{
|
||||
res = [];
|
||||
foreach (var gram in grammar)
|
||||
{
|
||||
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
|
||||
res.Add(gram);
|
||||
else
|
||||
throw new Exception("Unknown grammar. " + gram);
|
||||
}
|
||||
ans.Add([.. res]);
|
||||
|
||||
//ans.Add([name,.. res]);
|
||||
ans.Add([.. res, name]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception("Unknown type. " + type);
|
||||
}
|
||||
if (GrammarUnit.TryGetValue(name, out var val))
|
||||
GrammarUnit[name] = ToLR1Units(name, ans);
|
||||
else
|
||||
GrammarUnit.Add(name, ToLR1Units(name, ans));
|
||||
return name;
|
||||
}
|
||||
|
||||
internal string NameGenerator(string key)
|
||||
{
|
||||
var list = key.Replace('(', ' ')
|
||||
.Replace(')', ' ')
|
||||
.Replace('*', ' ')
|
||||
.Replace('|', ' ')
|
||||
.Replace(" ", " ")
|
||||
.Split(' ')
|
||||
.Select(x => x.Trim());
|
||||
var res = String.Join('_', list);
|
||||
for (int i = 0; i < 9; i++)
|
||||
if (!GrammarUnit.ContainsKey(res + '_' + i))
|
||||
return res + '_' + i;
|
||||
throw new Exception("Grammar too Complex:" + key);
|
||||
}
|
||||
|
||||
internal List<LR1Unit> ToLR1Units(string name, List<List<string>> grammars)
|
||||
{
|
||||
var res = new List<LR1Unit>();
|
||||
foreach (var grammar in grammars)
|
||||
{
|
||||
res.Add(new LR1Unit(name, grammar));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
private void PrintGrammar()
|
||||
{
|
||||
Console.WriteLine("文法:");
|
||||
foreach (var (key, value) in GrammarUnit)
|
||||
{
|
||||
Console.WriteLine("key: " + key);
|
||||
foreach (var unit in value)
|
||||
{
|
||||
Console.WriteLine(" " + unit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal void PrintFirst()
|
||||
{
|
||||
Console.WriteLine("First集:");
|
||||
foreach (var (key, value) in FirstGroup)
|
||||
{
|
||||
Console.Write($"{key}: ");
|
||||
foreach (var val in value)
|
||||
Console.Write($"{val} ");
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
|
||||
internal List<LR1Unit> GetUnits(string name)
|
||||
{
|
||||
if (TokenUnit.TryGetValue(name, out var unit))
|
||||
return [unit];
|
||||
if (GrammarUnit.TryGetValue(name, out var units))
|
||||
return units;
|
||||
throw new Exception($"No Unit Called: {name}");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace CompilerDesignIFlr1
|
||||
{
|
||||
internal class LR1Table
|
||||
{
|
||||
internal List<Row> Rows = [];
|
||||
|
||||
internal LR1Table(LR1Creator creator)
|
||||
{
|
||||
for (int i = 0; i < creator.Closures.Count; i++)
|
||||
{
|
||||
LR1Closure closure = creator.Closures[i];
|
||||
Dictionary<string, int> @goto = [];
|
||||
Dictionary<string, string> action = [];
|
||||
foreach (var (by, nextIndex) in closure.Next)
|
||||
{
|
||||
if (creator.TokenUnit.ContainsKey(by))
|
||||
action.Add(by, "s" + nextIndex);
|
||||
else
|
||||
@goto.Add(by, nextIndex);
|
||||
}
|
||||
foreach (var unit in closure.GetReduceUnits())
|
||||
{
|
||||
foreach (string prospect in unit.Prospect)
|
||||
{
|
||||
if (action.ContainsKey(prospect))
|
||||
throw new Exception("Reduce,Reduce/Reduce,Shift error occurred.");
|
||||
action.Add(prospect, "r" + unit.Id);
|
||||
}
|
||||
}
|
||||
Rows.Add(new Row(i, @goto, action));
|
||||
}
|
||||
PrintRow();
|
||||
return;
|
||||
}
|
||||
|
||||
private void PrintRow()
|
||||
{
|
||||
foreach (Row row in Rows)
|
||||
{
|
||||
Console.WriteLine(row);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal class Row
|
||||
{
|
||||
internal int Id;
|
||||
internal Dictionary<string, int> GOTO;
|
||||
internal Dictionary<string, string> ACTION;
|
||||
|
||||
internal Row(int id, Dictionary<string, int> @goto, Dictionary<string, string> action)
|
||||
{
|
||||
Id = id;
|
||||
GOTO = @goto;
|
||||
ACTION = action;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.Append(Id + " \n");
|
||||
sb.Append("ACTION: \n");
|
||||
foreach (var (key, value) in ACTION)
|
||||
sb.Append(key).Append(": ").Append(value).Append(", ");
|
||||
sb.Append("GOTO: \n");
|
||||
foreach (var (key, value) in GOTO)
|
||||
sb.Append(key).Append(": ").Append(value).Append(", ");
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
public (string, int) Next(string name)
|
||||
{
|
||||
if (name == "StatementList" && Id == 0)
|
||||
return ("ACC", 0);
|
||||
if (GOTO.TryGetValue(name, out var result))
|
||||
return ("GOTO", result);
|
||||
if (ACTION.TryGetValue(name, out var value))
|
||||
if (value.StartsWith("s"))
|
||||
return ("GOTO", int.Parse(value.Substring(1)));
|
||||
else if (value.StartsWith("r"))
|
||||
return ("Reduce", int.Parse(value.Substring(1)));
|
||||
else
|
||||
throw new Exception($"Value can't be recognized. {value}");
|
||||
throw new InvalidOperationException($"Code error in state {Id}: {name} not found");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,6 +8,127 @@ namespace CompilerDesignIFlr1
|
|||
{
|
||||
internal class LR1Unit
|
||||
{
|
||||
public LR1Unit() { }
|
||||
internal LR1Unit() { }
|
||||
|
||||
internal LR1Unit(string name, string grammar)
|
||||
{
|
||||
Type = "Token";
|
||||
Name = name;
|
||||
Grammar = [grammar];
|
||||
}
|
||||
|
||||
internal LR1Unit(string name, string value, string grammar)
|
||||
{
|
||||
Type = "Token";
|
||||
Name = name;
|
||||
Grammar = [grammar];
|
||||
Value = value;
|
||||
}
|
||||
|
||||
internal LR1Unit(string name, List<string> grammar)
|
||||
{
|
||||
Type = "Grammar";
|
||||
Name = name;
|
||||
Grammar = grammar;
|
||||
}
|
||||
|
||||
internal LR1Unit(
|
||||
int id,
|
||||
string type,
|
||||
string name,
|
||||
HashSet<string> prospect,
|
||||
int pointPosition,
|
||||
List<string> grammar
|
||||
)
|
||||
{
|
||||
Id = id;
|
||||
Type = type;
|
||||
Name = name;
|
||||
Prospect = prospect;
|
||||
PointPosition = pointPosition;
|
||||
Grammar = grammar;
|
||||
}
|
||||
|
||||
internal int Id { get; set; } = -1;
|
||||
internal string Type { get; set; } = "";
|
||||
internal string Name { get; set; } = "";
|
||||
internal string Value { get; set; } = "";
|
||||
internal HashSet<string> Prospect { get; set; } = [];
|
||||
internal int PointPosition { get; set; } = 0;
|
||||
internal List<string> Grammar { get; set; } = [];
|
||||
|
||||
internal string? Next() => PointPosition >= Grammar.Count ? null : Grammar[PointPosition];
|
||||
|
||||
internal bool ReadyToReduce() => PointPosition >= Grammar.Count;
|
||||
|
||||
internal bool CanReduce(List<LR1Unit> stack)
|
||||
{
|
||||
for (int i = 1; i <= Grammar.Count; i++)
|
||||
{
|
||||
if (stack[^i].Name != Grammar[^i])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
StringBuilder sb = new();
|
||||
//if (Grammar.Count == 0)
|
||||
// return ".";
|
||||
sb.Append($"{Id, 2} ");
|
||||
sb.Append(Name).Append(" ::= ");
|
||||
for (int i = 0; i < Grammar.Count; i++)
|
||||
{
|
||||
if (PointPosition == i)
|
||||
sb.Append(". ");
|
||||
sb.Append(Grammar[i] + " ");
|
||||
}
|
||||
if (PointPosition == Grammar.Count)
|
||||
sb.Append('.');
|
||||
foreach (var item in Prospect)
|
||||
{
|
||||
sb.Append(", " + item);
|
||||
}
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
internal LR1Unit Clone() =>
|
||||
new LR1Unit(Id, Type, Name, new HashSet<string>(Prospect), PointPosition, [.. Grammar]);
|
||||
|
||||
public override bool Equals(object? obj)
|
||||
{
|
||||
if (obj is not LR1Unit other)
|
||||
return false;
|
||||
|
||||
return Type == other.Type
|
||||
&& Name == other.Name
|
||||
&& Id == other.Id
|
||||
&& PointPosition == other.PointPosition
|
||||
&& Grammar.SequenceEqual(other.Grammar)
|
||||
&& Prospect.SetEquals(other.Prospect);
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
HashCode hash = new HashCode();
|
||||
hash.Add(Type);
|
||||
hash.Add(Name);
|
||||
hash.Add(PointPosition);
|
||||
foreach (var item in Grammar)
|
||||
hash.Add(item);
|
||||
foreach (var item in Prospect)
|
||||
hash.Add(item);
|
||||
return hash.ToHashCode();
|
||||
}
|
||||
|
||||
internal bool Nullable() => Grammar.Count == 0;
|
||||
|
||||
internal LR1Unit ToNext()
|
||||
{
|
||||
var unit = Clone();
|
||||
unit.PointPosition++;
|
||||
return unit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,14 +2,153 @@
|
|||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using CompilerDesignIFlr1;
|
||||
|
||||
namespace CompilerDesignIflr1
|
||||
{
|
||||
internal class LexicalAnalysis
|
||||
{
|
||||
string Text { get; set; } = "";
|
||||
Dictionary<string, LR1Unit> Symbol = [];
|
||||
Dictionary<string, LR1Unit> Keyword = [];
|
||||
Dictionary<string, LR1Unit> Patterns = [];
|
||||
internal List<LR1Unit> Result;
|
||||
|
||||
internal LexicalAnalysis(LR1Creator creator, string codeFilePath)
|
||||
{
|
||||
foreach (var (_, unit) in creator.TokenUnit)
|
||||
{
|
||||
var val = unit.Grammar[0];
|
||||
if (val.Contains('['))
|
||||
Patterns.Add(val, unit);
|
||||
else if (val.Select(x => char.IsLetterOrDigit(x)).All(x => x))
|
||||
Keyword.Add(val, unit);
|
||||
else
|
||||
Symbol.Add(val, unit);
|
||||
}
|
||||
PrintStrSplitResult();
|
||||
var list = StrSplit(File.ReadAllText(codeFilePath));
|
||||
foreach (string s in list)
|
||||
Console.WriteLine(s);
|
||||
Result = Analyze(list);
|
||||
PrintAnalyzeResult();
|
||||
}
|
||||
|
||||
internal Stack<LR1Unit> GetStack()
|
||||
{
|
||||
Stack<LR1Unit> stack = [];
|
||||
for (int i = 1; i <= Result.Count; i++)
|
||||
{
|
||||
stack.Push(Result[^i]);
|
||||
}
|
||||
return stack;
|
||||
}
|
||||
|
||||
internal void PrintStrSplitResult()
|
||||
{
|
||||
Console.WriteLine("Symbol");
|
||||
foreach (var (a, b) in Symbol)
|
||||
Console.WriteLine($"{a}: {b}");
|
||||
Console.WriteLine("keyword");
|
||||
foreach (var (a, b) in Keyword)
|
||||
Console.WriteLine($"{a}: {b}");
|
||||
Console.WriteLine("Regex");
|
||||
foreach (var (a, b) in Patterns)
|
||||
Console.WriteLine($"{a}: {b}");
|
||||
}
|
||||
|
||||
internal void PrintAnalyzeResult()
|
||||
{
|
||||
Console.WriteLine("units");
|
||||
foreach (var unit in Result)
|
||||
Console.WriteLine($"{unit.Name}: {unit.Value}");
|
||||
}
|
||||
|
||||
public List<LR1Unit> Analyze(List<string> list)
|
||||
{
|
||||
List<LR1Unit> ans = [];
|
||||
for (int i = 0; i < list.Count; i++)
|
||||
{
|
||||
string s = list[i];
|
||||
if (Keyword.TryGetValue(s, out var unit))
|
||||
{
|
||||
var temp = unit.Clone();
|
||||
temp.Value = s;
|
||||
ans.Add(temp);
|
||||
}
|
||||
else if (Symbol.TryGetValue(s, out var symbol))
|
||||
{
|
||||
var temp = symbol.Clone();
|
||||
if (i + 1 < list.Count && Symbol.TryGetValue(s + list[i + 1], out symbol))
|
||||
{
|
||||
temp = symbol.Clone();
|
||||
temp.Value = s + list[i + 1];
|
||||
i++;
|
||||
}
|
||||
else
|
||||
temp.Value = s;
|
||||
ans.Add(temp);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool noAnswer = true;
|
||||
foreach (var (pattern, ut) in Patterns)
|
||||
{
|
||||
if (Regex.IsMatch(s, pattern))
|
||||
{
|
||||
var temp = ut.Clone();
|
||||
temp.Value = s;
|
||||
ans.Add(temp);
|
||||
noAnswer = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (noAnswer)
|
||||
{
|
||||
Console.WriteLine($"无法识别的字符串: {s}");
|
||||
}
|
||||
}
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
public List<string> StrSplit(string s)
|
||||
{
|
||||
List<string> lt = new List<string>();
|
||||
int l = 0;
|
||||
int r = 1;
|
||||
|
||||
while (r < s.Length)
|
||||
{
|
||||
if (!char.IsLetterOrDigit(s[r]))
|
||||
{
|
||||
string k = s.Substring(l, r - l).Trim();
|
||||
if (k.Length != 0)
|
||||
{
|
||||
lt.Add(k);
|
||||
}
|
||||
k = s[r].ToString().Trim();
|
||||
if (k.Length != 0)
|
||||
{
|
||||
lt.Add(k);
|
||||
}
|
||||
l = r + 1;
|
||||
r = l;
|
||||
}
|
||||
else
|
||||
{
|
||||
r += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (l < s.Length)
|
||||
{
|
||||
lt.Add(s.Substring(l, r - l));
|
||||
}
|
||||
lt.Add("#");
|
||||
return lt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,2 +1,10 @@
|
|||
// See https://aka.ms/new-console-template for more information
|
||||
using CompilerDesignIflr1;
|
||||
using CompilerDesignIFlr1;
|
||||
|
||||
Console.WriteLine("Hello, World!");
|
||||
var grammarReader = new GrammarReader("./files/if-grammar.grammar");
|
||||
var lr1Creator = new LR1Creator(grammarReader);
|
||||
var lr1Table = new LR1Table(lr1Creator);
|
||||
var lexicalAnalysis = new LexicalAnalysis(lr1Creator, "./files/code");
|
||||
var stateMachine = new StateMachine(lr1Table, lexicalAnalysis, lr1Creator);
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using CompilerDesignIflr1;
|
||||
|
||||
namespace CompilerDesignIFlr1
|
||||
{
|
||||
internal class StateMachine
|
||||
{
|
||||
internal LR1Table Table;
|
||||
internal LexicalAnalysis AnalysisResult;
|
||||
internal LR1Creator Creator;
|
||||
internal List<(LR1Unit, int)> stack = [(new LR1Unit("End","#"),0)];
|
||||
internal Stack<LR1Unit> Tokens;
|
||||
|
||||
internal StateMachine(LR1Table table, LexicalAnalysis analysis, LR1Creator creator)
|
||||
{
|
||||
Table = table;
|
||||
AnalysisResult = analysis;
|
||||
Creator = creator;
|
||||
Tokens = AnalysisResult.GetStack();
|
||||
Compute();
|
||||
}
|
||||
|
||||
internal void Compute()
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
if (ComputeOnce(Tokens.Pop()))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
internal bool ComputeOnce(LR1Unit unit)
|
||||
{
|
||||
var (action, destination) = Table.Rows[stack[^1].Item2].Next(unit.Name);
|
||||
switch (action)
|
||||
{
|
||||
case "GOTO":
|
||||
stack.Add((unit, destination));
|
||||
break;
|
||||
case "Reduce":
|
||||
var reduceUnit = Creator.UnitIndex[destination];
|
||||
if (reduceUnit.CanReduce(stack.Select(x => x.Item1).ToList()))
|
||||
{
|
||||
stack.RemoveRange(stack.Count - reduceUnit.Grammar.Count, reduceUnit.Grammar.Count);
|
||||
}
|
||||
else
|
||||
throw new Exception("Reduce not allow.");
|
||||
Console.WriteLine(reduceUnit);
|
||||
Tokens.Push(unit);
|
||||
Tokens.Push(reduceUnit.Clone());
|
||||
break;
|
||||
case "ACC":
|
||||
Console.WriteLine(Creator.UnitIndex[destination]);
|
||||
return true;
|
||||
default:
|
||||
throw new Exception("Action is not recognized.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
int i=0,t,b=15;
|
||||
if(i>=10)
|
||||
if(k==9)
|
||||
h = 6;
|
||||
else
|
||||
{
|
||||
c=10;
|
||||
if(k<=95 && )
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
@top Program {
|
||||
@top Program {
|
||||
StatementList
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ Statement {
|
|||
}
|
||||
|
||||
NoIfStatement {
|
||||
AssignmentStatement Semicolon | VariableDefinition Semicolon | LBrace Statement* RBrace | ConstantDefinition Semicolon
|
||||
AssignmentStatement Semicolon | VariableDefinition Semicolon | LBrace Statement* RBrace | ConstantDefinition Semicolon | Semicolon
|
||||
}
|
||||
|
||||
AssignmentStatement {
|
||||
|
@ -40,14 +40,14 @@ Term {
|
|||
Factor (MultiplyLike Factor)*
|
||||
}
|
||||
|
||||
VariableDefinition {
|
||||
Type (Identifier | Identifier Equal Expression)+
|
||||
}
|
||||
|
||||
ConstantDefinition {
|
||||
Const VariableDefinition
|
||||
}
|
||||
|
||||
VariableDefinition {
|
||||
Type (Identifier | AssignmentStatement)+ (Comma Identifier | Comma AssignmentStatement)*
|
||||
}
|
||||
|
||||
Type {
|
||||
Int | Char
|
||||
}
|
||||
|
@ -67,6 +67,11 @@ MultiplyLike {
|
|||
Number {
|
||||
UnsignedNumber | Minus UnsignedNumber | Plus UnsignedNumber
|
||||
}
|
||||
|
||||
Operator {
|
||||
EuqalTo | NotEqualTo | LessThan | GreaterThan | LessThanOrEqual | GreaterThanOrEqual
|
||||
}
|
||||
|
||||
@tokens {
|
||||
If { "!if" }
|
||||
Else { "!else" }
|
||||
|
@ -82,12 +87,18 @@ Number {
|
|||
RParen { ")" }
|
||||
LBrace { "{" }
|
||||
RBrace { "}" }
|
||||
Comma { "," }
|
||||
Semicolon { ";" }
|
||||
Identifier { $[a-zA-Z_]$[a-zA-Z0-9_]* }
|
||||
UnsignedNumber { $[0-9]+ }
|
||||
String { "\"" $[\x00-\x7F]* "\"" }
|
||||
Character { "'" $[\x00-\x7F] "'" }
|
||||
Operator { "==" | "!=" | "<=" | ">=" | "<" | ">" }
|
||||
EuqalTo { "==" }
|
||||
NotEqualTo { "!=" }
|
||||
LessThan { "<" }
|
||||
GreaterThan { ">" }
|
||||
LessThanOrEqual { "<=" }
|
||||
GreaterThanOrEqual { ">=" }
|
||||
Equal { "=" }
|
||||
Whitespace { $[\t\n\r]+ }
|
||||
}
|
Loading…
Reference in New Issue