[feature] 完成语法分析

This commit is contained in:
lichx 2024-12-23 01:42:58 +08:00
parent 235500f8ce
commit 3b735a43b8
11 changed files with 1179 additions and 15 deletions

View File

@ -7,4 +7,13 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<None Update="files\code">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="files\if-grammar.grammar">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@ -1,16 +1,124 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Text;
namespace CompilerDesignIFlr1
{
internal class GrammarReader
{
public GrammarReader(Uri grammarFilePath)
{
//词法分析解决
internal Dictionary<string, string> KeyToken { get; set; } = [];
//此法分析解决
internal Dictionary<string, string> SymbolToken { get; set; } = [];
//词法分析解决
internal Dictionary<string, string> RegexToken { get; set; } = [];
//语法分析解决
internal Dictionary<string, string> RegularToken { get; set; } = [];
internal GrammarReader(string grammarFilePath)
{
string fileContent = File.ReadAllText(grammarFilePath);
var dict = fileContent.PartParser();
var tokenDict = dict["@tokens"].PartParser();
foreach (var (key, value) in tokenDict)
{
if (value[1] == '!' && value[2] != '=')
KeyToken.Add(key, value.DeDoubleQuote().Substring(1));
else if (value.Contains('$'))
RegexToken.Add(key, value.Replace("$", "").DeDoubleQuote());
else
SymbolToken.Add(key, value.DeDoubleQuote());
}
Console.WriteLine("Key:");
foreach (var (key, value) in KeyToken)
Console.WriteLine($"{key}: {value}");
Console.WriteLine("Symbol:");
foreach (var (key, value) in SymbolToken)
Console.WriteLine($"{key}: {value}");
Console.WriteLine("Regex:");
foreach (var (key, value) in RegexToken)
Console.WriteLine($"{key}: {value}");
foreach (var (key, value) in dict)
{
if (key == "@tokens")
continue;
RegularToken.Add(key, value);
Console.WriteLine(key + ": " + value);
}
}
}
file static class StringExtension
{
internal static Dictionary<string, string> PartParser(this string input)
{
Dictionary<string, string> result = [];
input = input.Replace("\n", " ").Replace("\r", " ");
int start = 0;
int layer = 0;
string key = "";
string value = "";
bool inQuote = false;
for (int i = 0; i < input.Length; i++)
{
char c = input[i];
switch (c)
{
case '"':
inQuote = !inQuote;
break;
case '{':
if (inQuote)
break;
if (layer++ == 0)
{
key = input.Substring(start, i - start);
start = i + 1;
}
break;
case '}':
if (inQuote)
break;
if (layer-- == 1)
{
value = input.Substring(start, i - start);
result.Add(key.Trim(), value.Trim());
start = i + 1;
}
break;
}
}
return result;
}
internal static string DeDoubleQuote(this string s)
{
StringBuilder sb = new StringBuilder();
bool inLayer = false;
for (int i = 0; i < s.Length; i++)
{
if (s[i] == '"')
{
if (!inLayer)
{
inLayer = true;
}
else
{
if (sb[^1] == '\\')
{
sb[^1] = '\"';
}
else
inLayer = false;
}
}
else if (s[i] != ' ')
sb.Append(s[i]);
}
return sb.ToString();
}
}
}

View File

@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
@ -8,5 +9,198 @@ namespace CompilerDesignIFlr1
{
internal class LR1Closure
{
internal static int GlobalIndex = 0;
internal int Index = 0;
internal LR1Creator LR1Creator;
internal HashSet<LR1Unit> Units { get; set; } = [];
internal Dictionary<string, int> Next { get; set; } = [];
internal LR1Closure(LR1Creator lr1Creator)
{
LR1Creator = lr1Creator;
Index = GlobalIndex++;
}
internal LR1Closure(LR1Creator lr1Creator, List<LR1Unit> units)
{
Units.UnionWith(units);
Index = GlobalIndex++;
LR1Creator = lr1Creator;
AddMissingUnits();
CalculateProspects();
}
internal void CalculateProspects()
{
//Queue<LR1Unit> queue = [];
//var startUnit = Units.Where(x=>x.Name == LR1Creator.StartSymbol).First();
//startUnit.Prospect.Add("End");
//queue.Enqueue(startUnit);
//while (queue.Count > 0)
//{
// var unit = queue.Dequeue();
// var left = unit.Name;
// var units = UnitsHaveDotBefore(left);
// var next = unit.Next();
// if (next is not null && !LR1Creator.TokenUnit.ContainsKey(next))
// foreach (var item in Units.Where(x => x.Name == next))
// queue.Enqueue(item);
// HashSet<string> prospect = units
// .Select(x => GetProspectsOf(x))
// .Aggregate(
// new HashSet<string>(),
// (set, x) =>
// {
// set.UnionWith(x);
// return set;
// }
// );
// unit.Prospect.UnionWith(prospect);
//}
bool haveChange = true;
while (haveChange)
{
haveChange = false;
foreach (var unit in Units)
{
var left = unit.Name;
var units = UnitsHaveDotBefore(left);
HashSet<string> prospect = units
.Select(x => GetProspectsOf(x))
.Aggregate(
unit.Prospect,
(set, x) =>
{
set.UnionWith(x);
return set;
}
);
if (!prospect.IsSubsetOf(unit.Prospect))
haveChange = true;
unit.Prospect.UnionWith(prospect);
}
}
}
internal List<LR1Unit> UnitsHaveDotBefore(string name)
{
List<LR1Unit> res = [];
foreach (LR1Unit unit in Units)
{
if (unit.Next() == name)
res.Add(unit);
}
return res;
}
// 对于文法 X => yz...
// 该方法返回 A => .XB 中的B 以及B可能为空时其后的元素
internal List<string> GetProspectsOf(LR1Unit unit)
{
if (unit.Name == LR1Creator.StartSymbol)
return ["End"];
//if(unit.Prospect.Count != 0)
//{
// throw new Exception("有环???");
//}
List<string> ans = unit.PointPosition + 1 == unit.Grammar.Count ? [.. unit.Prospect] : [];
for (int i = unit.PointPosition + 1; i < unit.Grammar.Count; i++)
{
var units = LR1Creator.GetUnits(unit.Grammar[i]);
ans.AddRange(LR1Creator.FirstGroup[unit.Grammar[i]]);
if (units.All(x => !x.Nullable()))
break;
else if (i == unit.Grammar.Count - 1)
ans.AddRange(unit.Prospect);
}
return ans;
}
internal void AddMissingUnits()
{
bool haveNew = true;
while (haveNew)
{
HashSet<LR1Unit> addingUnits = [];
haveNew = false;
foreach (LR1Unit unit in Units)
{
string? next = unit.Next();
if (next is null)
continue;
if (LR1Creator.GrammarUnit.TryGetValue(next, out var value))
{
var set = new HashSet<LR1Unit>(value.Select(x => x.Clone()));
if (set.IsSubsetOf(Units))
continue;
haveNew = true;
addingUnits.UnionWith(set);
}
}
Units.UnionWith(addingUnits);
}
}
internal List<LR1Closure> NextClosures()
{
List<LR1Closure> ans = [];
Dictionary<string, List<LR1Unit>> nextSteps = [];
foreach(LR1Unit unit in Units)
{
string? next = unit.Next();
if (next is null)
continue;
if (nextSteps.TryGetValue(next, out var units))
units.Add(unit);
else
nextSteps.Add(next, [unit]);
}
foreach(var (next,value) in nextSteps)
{
var closure = new LR1Closure(LR1Creator, value.Select(x => x.ToNext()).ToList());
var existClosure = LR1Creator.Closures.Values.Where(x => x.Equals(closure)).FirstOrDefault();
if(existClosure is not null)
{
LR1Closure.GlobalIndex--;
Next.Add(next,existClosure.Index);
continue;
}
ans.Add(closure);
Next.Add(next, closure.Index);
}
return ans;
}
internal List<LR1Unit> GetReduceUnits()
=> Units.Where(x => x.ReadyToReduce()).ToList();
public override bool Equals(object? obj)
{
if (obj is not LR1Closure closure || obj is null)
return false;
return closure.Units.Count == Units.Count && closure.Units.Select(x => Units.Where(y => y.Equals(x)).Any()).All(x=> x);
}
public override int GetHashCode()
{
int hash = 0;
foreach (var unit in Units.OrderBy(e => e.GetHashCode()))
{
hash ^= unit.GetHashCode();
}
return hash;
}
public override string ToString()
{
StringBuilder sb = new();
sb.Append($"闭包{Index}:\n");
foreach (var unit in Units)
{
sb.Append(unit + "\n");
}
return sb.ToString();
}
}
}

View File

@ -0,0 +1,407 @@
using System;
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations;
using System.Dynamic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace CompilerDesignIFlr1
{
internal class LR1Creator
{
internal string StartSymbol { get; init; }
internal Dictionary<int, LR1Closure> Closures = [];
// 终结符
internal Dictionary<string, LR1Unit> TokenUnit = [];
// 非终结符 考虑 或 的情况
internal Dictionary<string, List<LR1Unit>> GrammarUnit = [];
// First集
internal Dictionary<string, HashSet<string>> FirstGroup = [];
internal Dictionary<int, LR1Unit> UnitIndex = [];
internal LR1Creator(GrammarReader reader)
{
TokenUnit.Add("End", new LR1Unit("End", "#"));
foreach (var (key, value) in reader.SymbolToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, value) in reader.KeyToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, value) in reader.RegexToken)
{
TokenUnit.Add(key, new LR1Unit(key, value));
}
foreach (var (key, _) in reader.RegularToken)
{
if (key.StartsWith("@skip"))
continue;
if (key.StartsWith("@top"))
GrammarUnit.Add(key.Substring(5), []);
else
GrammarUnit.Add(key, []);
}
string startSymbol = "";
foreach (var (key, value) in reader.RegularToken)
{
if (key.StartsWith("@skip"))
continue;
if (key.StartsWith("@top"))
{
startSymbol = key.Substring(5);
GrammarUnitCreate(value, key.Substring(5));
}
else
{
GrammarUnitCreate(value, key);
}
}
SetGrammarIndex();
PrintGrammar();
foreach (var (key, _) in TokenUnit)
GetFirst(key);
foreach (var (key, _) in GrammarUnit)
GetFirst(key);
PrintFirst();
StartSymbol = startSymbol;
I0Creator();
CalculateAllClosure();
PrintClosure(Enumerable.Range(0, Closures.Count).ToArray());
}
internal void SetGrammarIndex()
{
int id = 0;
foreach (var (key, units) in GrammarUnit)
foreach (var unit in units)
{
unit.Id = id++;
UnitIndex[unit.Id] = unit;
}
}
internal void PrintClosure(params int[] index)
{
foreach (int t in index)
{
//Console.WriteLine($"闭包{t}:");
Console.WriteLine(Closures[t]);
//var closure = Closures[t];
//foreach (var unit in closure.Units)
//{
// Console.Write($"{unit.Name} ::= ");
// Console.WriteLine(unit);
//}
}
}
internal void I0Creator()
{
var closure = new LR1Closure(this);
foreach (var root in GrammarUnit[StartSymbol])
{
var res = root.Clone();
res.Prospect.Add("End");
closure.Units.Add(res);
}
closure.AddMissingUnits();
//foreach (var (key, value) in GrammarUnit)
//{
// if (key == StartSymbol)
// continue;
// value.ForEach(x => closure.Units.Add(x.Clone()));
//}
closure.CalculateProspects();
Closures.Add(closure.Index, closure);
}
internal HashSet<string> GetFirst(string root)
{
if (FirstGroup.TryGetValue(root, out var ans))
return ans;
if (GrammarUnit.TryGetValue(root, out var units))
{
HashSet<string> res = [];
foreach (var unit in units)
{
if (FirstGroup.TryGetValue(root, out var list))
res.UnionWith(list);
else
{
if (unit.Type == "Token")
res.UnionWith(GetFirst(unit.Name));
else if (unit.Type == "Grammar")
{
var val = unit.Grammar.FirstOrDefault();
if (val == null)
continue;
res.UnionWith(GetFirst(val));
}
}
}
FirstGroup.Add(root, res);
return res;
}
else if (TokenUnit.TryGetValue(root, out var unit))
{
FirstGroup.Add(root, [unit.Name]);
return [unit.Name];
}
else
{
throw new Exception("Grammar can't be recognized. " + root);
}
}
internal void CalculateAllClosure()
{
int t = 0;
bool haveNew = true;
while (Closures.ContainsKey(t))
{
haveNew = false;
if (Closures.TryGetValue(t, out var closure))
{
List<LR1Closure> nextClosures = closure.NextClosures();
if (nextClosures.Count > 0)
haveNew = true;
nextClosures.ForEach(x => Closures.Add(x.Index, x));
}
t++;
}
if (!Closures.ContainsKey(t) && haveNew)
throw new Exception("Really weird. This situation shouldn't be possible.");
}
internal string GrammarUnitCreate(string value, string name = "")
{
if (value.EndsWith('*') || value.EndsWith('+'))
{
string s;
if (value.StartsWith('(') && value[^2] == ')')
{
s = DeClosure(value.Substring(1, value.Length - 3), value[^1]);
if (name != "")
{
GrammarUnit[name] = [new LR1Unit(name, [s])];
}
return s;
}
else if (value.IndexOf(' ') == -1)
{
s = DeClosure(value.Substring(0, value.Length - 1), value[^1]);
if (name != "")
{
GrammarUnit[name] = [new LR1Unit(name, [s])];
}
return s;
}
}
bool inParen = false;
int last = 0;
name = name.Length == 0 ? NameGenerator(value) : name;
List<List<string>> ans = [];
List<string> grammar = [];
value = value + ' ';
for (var i = 0; i < value.Length; i++)
{
char c = value[i];
if (inParen && c != ')')
continue;
switch (c)
{
case '(':
last = i;
inParen = true;
break;
case ')':
{
inParen = false;
break;
}
case ' ':
{
string subValue = value.Substring(last, i - last);
last = i + 1;
if (GrammarUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (TokenUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else
{
if (subValue.Trim().Length == 0)
break;
throw new Exception("Grammar can't be recognized. " + subValue);
}
break;
}
case '|':
{
string subValue = value.Substring(last, i - last);
if (subValue.Trim().TrimEnd('|').Length != 0)
{
if (GrammarUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else if (TokenUnit.ContainsKey(subValue.Trim()))
grammar.Add(subValue.Trim());
else
throw new Exception("Grammar can't be recognized. " + subValue);
}
ans.Add(grammar);
grammar = [];
last = i + 1;
break;
}
case '*':
{
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
grammar.Add(subString);
last = i + 1;
break;
}
case '+':
{
string subString = GrammarUnitCreate(value.Substring(last, i - last + 1));
grammar.Add(subString);
last = i + 1;
break;
}
default:
break;
}
}
if (grammar.Count > 0)
ans.Add(grammar);
if (GrammarUnit.TryGetValue(name, out var val))
GrammarUnit[name] = ToLR1Units(name, ans);
else
GrammarUnit.Add(name, ToLR1Units(name, ans));
return name;
}
// 嵌套解包直接报错!!
internal string DeClosure(string value, char type)
{
if (value.IndexOf('+') != -1 || value.IndexOf("*") != -1)
throw new Exception("Grammar too complex. " + value);
string name = NameGenerator(value);
List<List<string>> ans = [];
List<string> res = [];
if (type == '*')
{
ans.Add([]);
var grammars = value.Split('|').Select(x => x.Split(' ').Where(x => x.Length != 0));
foreach (var grammar in grammars)
{
res = [];
foreach (var gram in grammar)
{
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
res.Add(gram);
else
{
Console.WriteLine(gram.Length);
throw new Exception("Unknown grammar. " + gram);
}
}
//ans.Add([name,..res]);
ans.Add([.. res, name]);
}
}
else if (type == '+')
{
var grammars = value.Split('|').Select(x => x.Trim().Split(' '));
foreach (var grammar in grammars)
{
res = [];
foreach (var gram in grammar)
{
if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram))
res.Add(gram);
else
throw new Exception("Unknown grammar. " + gram);
}
ans.Add([.. res]);
//ans.Add([name,.. res]);
ans.Add([.. res, name]);
}
}
else
{
throw new Exception("Unknown type. " + type);
}
if (GrammarUnit.TryGetValue(name, out var val))
GrammarUnit[name] = ToLR1Units(name, ans);
else
GrammarUnit.Add(name, ToLR1Units(name, ans));
return name;
}
internal string NameGenerator(string key)
{
var list = key.Replace('(', ' ')
.Replace(')', ' ')
.Replace('*', ' ')
.Replace('|', ' ')
.Replace(" ", " ")
.Split(' ')
.Select(x => x.Trim());
var res = String.Join('_', list);
for (int i = 0; i < 9; i++)
if (!GrammarUnit.ContainsKey(res + '_' + i))
return res + '_' + i;
throw new Exception("Grammar too Complex:" + key);
}
internal List<LR1Unit> ToLR1Units(string name, List<List<string>> grammars)
{
var res = new List<LR1Unit>();
foreach (var grammar in grammars)
{
res.Add(new LR1Unit(name, grammar));
}
return res;
}
private void PrintGrammar()
{
Console.WriteLine("文法:");
foreach (var (key, value) in GrammarUnit)
{
Console.WriteLine("key: " + key);
foreach (var unit in value)
{
Console.WriteLine(" " + unit);
}
}
}
internal void PrintFirst()
{
Console.WriteLine("First集");
foreach (var (key, value) in FirstGroup)
{
Console.Write($"{key}: ");
foreach (var val in value)
Console.Write($"{val} ");
Console.WriteLine();
}
}
internal List<LR1Unit> GetUnits(string name)
{
if (TokenUnit.TryGetValue(name, out var unit))
return [unit];
if (GrammarUnit.TryGetValue(name, out var units))
return units;
throw new Exception($"No Unit Called: {name}");
}
}
}

View File

@ -0,0 +1,93 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace CompilerDesignIFlr1
{
internal class LR1Table
{
internal List<Row> Rows = [];
internal LR1Table(LR1Creator creator)
{
for (int i = 0; i < creator.Closures.Count; i++)
{
LR1Closure closure = creator.Closures[i];
Dictionary<string, int> @goto = [];
Dictionary<string, string> action = [];
foreach (var (by, nextIndex) in closure.Next)
{
if (creator.TokenUnit.ContainsKey(by))
action.Add(by, "s" + nextIndex);
else
@goto.Add(by, nextIndex);
}
foreach (var unit in closure.GetReduceUnits())
{
foreach (string prospect in unit.Prospect)
{
if (action.ContainsKey(prospect))
throw new Exception("Reduce,Reduce/Reduce,Shift error occurred.");
action.Add(prospect, "r" + unit.Id);
}
}
Rows.Add(new Row(i, @goto, action));
}
PrintRow();
return;
}
private void PrintRow()
{
foreach (Row row in Rows)
{
Console.WriteLine(row);
}
}
}
internal class Row
{
internal int Id;
internal Dictionary<string, int> GOTO;
internal Dictionary<string, string> ACTION;
internal Row(int id, Dictionary<string, int> @goto, Dictionary<string, string> action)
{
Id = id;
GOTO = @goto;
ACTION = action;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append(Id + " \n");
sb.Append("ACTION: \n");
foreach (var (key, value) in ACTION)
sb.Append(key).Append(": ").Append(value).Append(", ");
sb.Append("GOTO: \n");
foreach (var (key, value) in GOTO)
sb.Append(key).Append(": ").Append(value).Append(", ");
return sb.ToString();
}
public (string, int) Next(string name)
{
if (name == "StatementList" && Id == 0)
return ("ACC", 0);
if (GOTO.TryGetValue(name, out var result))
return ("GOTO", result);
if (ACTION.TryGetValue(name, out var value))
if (value.StartsWith("s"))
return ("GOTO", int.Parse(value.Substring(1)));
else if (value.StartsWith("r"))
return ("Reduce", int.Parse(value.Substring(1)));
else
throw new Exception($"Value can't be recognized. {value}");
throw new InvalidOperationException($"Code error in state {Id}: {name} not found");
}
}
}

View File

@ -8,6 +8,127 @@ namespace CompilerDesignIFlr1
{
internal class LR1Unit
{
public LR1Unit() { }
internal LR1Unit() { }
internal LR1Unit(string name, string grammar)
{
Type = "Token";
Name = name;
Grammar = [grammar];
}
internal LR1Unit(string name, string value, string grammar)
{
Type = "Token";
Name = name;
Grammar = [grammar];
Value = value;
}
internal LR1Unit(string name, List<string> grammar)
{
Type = "Grammar";
Name = name;
Grammar = grammar;
}
internal LR1Unit(
int id,
string type,
string name,
HashSet<string> prospect,
int pointPosition,
List<string> grammar
)
{
Id = id;
Type = type;
Name = name;
Prospect = prospect;
PointPosition = pointPosition;
Grammar = grammar;
}
internal int Id { get; set; } = -1;
internal string Type { get; set; } = "";
internal string Name { get; set; } = "";
internal string Value { get; set; } = "";
internal HashSet<string> Prospect { get; set; } = [];
internal int PointPosition { get; set; } = 0;
internal List<string> Grammar { get; set; } = [];
internal string? Next() => PointPosition >= Grammar.Count ? null : Grammar[PointPosition];
internal bool ReadyToReduce() => PointPosition >= Grammar.Count;
internal bool CanReduce(List<LR1Unit> stack)
{
for (int i = 1; i <= Grammar.Count; i++)
{
if (stack[^i].Name != Grammar[^i])
return false;
}
return true;
}
public override string ToString()
{
StringBuilder sb = new();
//if (Grammar.Count == 0)
// return ".";
sb.Append($"{Id, 2} ");
sb.Append(Name).Append(" ::= ");
for (int i = 0; i < Grammar.Count; i++)
{
if (PointPosition == i)
sb.Append(". ");
sb.Append(Grammar[i] + " ");
}
if (PointPosition == Grammar.Count)
sb.Append('.');
foreach (var item in Prospect)
{
sb.Append(", " + item);
}
return sb.ToString();
}
internal LR1Unit Clone() =>
new LR1Unit(Id, Type, Name, new HashSet<string>(Prospect), PointPosition, [.. Grammar]);
public override bool Equals(object? obj)
{
if (obj is not LR1Unit other)
return false;
return Type == other.Type
&& Name == other.Name
&& Id == other.Id
&& PointPosition == other.PointPosition
&& Grammar.SequenceEqual(other.Grammar)
&& Prospect.SetEquals(other.Prospect);
}
public override int GetHashCode()
{
HashCode hash = new HashCode();
hash.Add(Type);
hash.Add(Name);
hash.Add(PointPosition);
foreach (var item in Grammar)
hash.Add(item);
foreach (var item in Prospect)
hash.Add(item);
return hash.ToHashCode();
}
internal bool Nullable() => Grammar.Count == 0;
internal LR1Unit ToNext()
{
var unit = Clone();
unit.PointPosition++;
return unit;
}
}
}

View File

@ -2,14 +2,153 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using CompilerDesignIFlr1;
namespace CompilerDesignIflr1
{
internal class LexicalAnalysis
{
string Text { get; set; } = "";
Dictionary<string, LR1Unit> Symbol = [];
Dictionary<string, LR1Unit> Keyword = [];
Dictionary<string, LR1Unit> Patterns = [];
internal List<LR1Unit> Result;
internal LexicalAnalysis(LR1Creator creator, string codeFilePath)
{
foreach (var (_, unit) in creator.TokenUnit)
{
var val = unit.Grammar[0];
if (val.Contains('['))
Patterns.Add(val, unit);
else if (val.Select(x => char.IsLetterOrDigit(x)).All(x => x))
Keyword.Add(val, unit);
else
Symbol.Add(val, unit);
}
PrintStrSplitResult();
var list = StrSplit(File.ReadAllText(codeFilePath));
foreach (string s in list)
Console.WriteLine(s);
Result = Analyze(list);
PrintAnalyzeResult();
}
internal Stack<LR1Unit> GetStack()
{
Stack<LR1Unit> stack = [];
for (int i = 1; i <= Result.Count; i++)
{
stack.Push(Result[^i]);
}
return stack;
}
internal void PrintStrSplitResult()
{
Console.WriteLine("Symbol");
foreach (var (a, b) in Symbol)
Console.WriteLine($"{a}: {b}");
Console.WriteLine("keyword");
foreach (var (a, b) in Keyword)
Console.WriteLine($"{a}: {b}");
Console.WriteLine("Regex");
foreach (var (a, b) in Patterns)
Console.WriteLine($"{a}: {b}");
}
internal void PrintAnalyzeResult()
{
Console.WriteLine("units");
foreach (var unit in Result)
Console.WriteLine($"{unit.Name}: {unit.Value}");
}
public List<LR1Unit> Analyze(List<string> list)
{
List<LR1Unit> ans = [];
for (int i = 0; i < list.Count; i++)
{
string s = list[i];
if (Keyword.TryGetValue(s, out var unit))
{
var temp = unit.Clone();
temp.Value = s;
ans.Add(temp);
}
else if (Symbol.TryGetValue(s, out var symbol))
{
var temp = symbol.Clone();
if (i + 1 < list.Count && Symbol.TryGetValue(s + list[i + 1], out symbol))
{
temp = symbol.Clone();
temp.Value = s + list[i + 1];
i++;
}
else
temp.Value = s;
ans.Add(temp);
}
else
{
bool noAnswer = true;
foreach (var (pattern, ut) in Patterns)
{
if (Regex.IsMatch(s, pattern))
{
var temp = ut.Clone();
temp.Value = s;
ans.Add(temp);
noAnswer = false;
break;
}
}
if (noAnswer)
{
Console.WriteLine($"无法识别的字符串: {s}");
}
}
}
return ans;
}
public List<string> StrSplit(string s)
{
List<string> lt = new List<string>();
int l = 0;
int r = 1;
while (r < s.Length)
{
if (!char.IsLetterOrDigit(s[r]))
{
string k = s.Substring(l, r - l).Trim();
if (k.Length != 0)
{
lt.Add(k);
}
k = s[r].ToString().Trim();
if (k.Length != 0)
{
lt.Add(k);
}
l = r + 1;
r = l;
}
else
{
r += 1;
}
}
if (l < s.Length)
{
lt.Add(s.Substring(l, r - l));
}
lt.Add("#");
return lt;
}
}
}

View File

@ -1,2 +1,10 @@
// See https://aka.ms/new-console-template for more information
using CompilerDesignIflr1;
using CompilerDesignIFlr1;
Console.WriteLine("Hello, World!");
var grammarReader = new GrammarReader("./files/if-grammar.grammar");
var lr1Creator = new LR1Creator(grammarReader);
var lr1Table = new LR1Table(lr1Creator);
var lexicalAnalysis = new LexicalAnalysis(lr1Creator, "./files/code");
var stateMachine = new StateMachine(lr1Table, lexicalAnalysis, lr1Creator);

View File

@ -0,0 +1,65 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using CompilerDesignIflr1;
namespace CompilerDesignIFlr1
{
internal class StateMachine
{
internal LR1Table Table;
internal LexicalAnalysis AnalysisResult;
internal LR1Creator Creator;
internal List<(LR1Unit, int)> stack = [(new LR1Unit("End","#"),0)];
internal Stack<LR1Unit> Tokens;
internal StateMachine(LR1Table table, LexicalAnalysis analysis, LR1Creator creator)
{
Table = table;
AnalysisResult = analysis;
Creator = creator;
Tokens = AnalysisResult.GetStack();
Compute();
}
internal void Compute()
{
while (true)
{
if (ComputeOnce(Tokens.Pop()))
break;
}
}
internal bool ComputeOnce(LR1Unit unit)
{
var (action, destination) = Table.Rows[stack[^1].Item2].Next(unit.Name);
switch (action)
{
case "GOTO":
stack.Add((unit, destination));
break;
case "Reduce":
var reduceUnit = Creator.UnitIndex[destination];
if (reduceUnit.CanReduce(stack.Select(x => x.Item1).ToList()))
{
stack.RemoveRange(stack.Count - reduceUnit.Grammar.Count, reduceUnit.Grammar.Count);
}
else
throw new Exception("Reduce not allow.");
Console.WriteLine(reduceUnit);
Tokens.Push(unit);
Tokens.Push(reduceUnit.Clone());
break;
case "ACC":
Console.WriteLine(Creator.UnitIndex[destination]);
return true;
default:
throw new Exception("Action is not recognized.");
}
return false;
}
}
}

View File

@ -0,0 +1,9 @@
int i=0,t,b=15;
if(i>=10)
if(k==9)
h = 6;
else
{
c=10;
if(k<=95 && )
}

View File

@ -1,4 +1,4 @@
@top Program {
@top Program {
StatementList
}
@ -29,7 +29,7 @@ Statement {
}
NoIfStatement {
AssignmentStatement Semicolon | VariableDefinition Semicolon | LBrace Statement* RBrace | ConstantDefinition Semicolon
AssignmentStatement Semicolon | VariableDefinition Semicolon | LBrace Statement* RBrace | ConstantDefinition Semicolon | Semicolon
}
AssignmentStatement {
@ -40,14 +40,14 @@ Term {
Factor (MultiplyLike Factor)*
}
VariableDefinition {
Type (Identifier | Identifier Equal Expression)+
}
ConstantDefinition {
Const VariableDefinition
}
VariableDefinition {
Type (Identifier | AssignmentStatement)+ (Comma Identifier | Comma AssignmentStatement)*
}
Type {
Int | Char
}
@ -67,6 +67,11 @@ MultiplyLike {
Number {
UnsignedNumber | Minus UnsignedNumber | Plus UnsignedNumber
}
Operator {
EuqalTo | NotEqualTo | LessThan | GreaterThan | LessThanOrEqual | GreaterThanOrEqual
}
@tokens {
If { "!if" }
Else { "!else" }
@ -82,12 +87,18 @@ Number {
RParen { ")" }
LBrace { "{" }
RBrace { "}" }
Comma { "," }
Semicolon { ";" }
Identifier { $[a-zA-Z_]$[a-zA-Z0-9_]* }
UnsignedNumber { $[0-9]+ }
String { "\"" $[\x00-\x7F]* "\"" }
Character { "'" $[\x00-\x7F] "'" }
Operator { "==" | "!=" | "<=" | ">=" | "<" | ">" }
EuqalTo { "==" }
NotEqualTo { "!=" }
LessThan { "<" }
GreaterThan { ">" }
LessThanOrEqual { "<=" }
GreaterThanOrEqual { ">=" }
Equal { "=" }
Whitespace { $[\t\n\r]+ }
}