From 3b735a43b8b991d200344a39ddbb2e52e0e7d206 Mon Sep 17 00:00:00 2001 From: lichx Date: Mon, 23 Dec 2024 01:42:58 +0800 Subject: [PATCH] =?UTF-8?q?[feature]=20=E5=AE=8C=E6=88=90=E8=AF=AD?= =?UTF-8?q?=E6=B3=95=E5=88=86=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../CompilerDesignIflr1.csproj | 9 + CompilerDesignIflr1/GrammarReader.cs | 122 +++++- CompilerDesignIflr1/LR1Closure.cs | 194 +++++++++ CompilerDesignIflr1/LR1Creator.cs | 407 ++++++++++++++++++ CompilerDesignIflr1/LR1Table.cs | 93 ++++ CompilerDesignIflr1/LR1Unit.cs | 123 +++++- CompilerDesignIflr1/LexicalAnalysis.cs | 139 ++++++ CompilerDesignIflr1/Program.cs | 8 + CompilerDesignIflr1/StateMachine.cs | 65 +++ CompilerDesignIflr1/files/code | 9 + .../{ => files}/if-grammar.grammar | 25 +- 11 files changed, 1179 insertions(+), 15 deletions(-) create mode 100644 CompilerDesignIflr1/LR1Creator.cs create mode 100644 CompilerDesignIflr1/LR1Table.cs create mode 100644 CompilerDesignIflr1/StateMachine.cs create mode 100644 CompilerDesignIflr1/files/code rename CompilerDesignIflr1/{ => files}/if-grammar.grammar (76%) diff --git a/CompilerDesignIflr1/CompilerDesignIflr1.csproj b/CompilerDesignIflr1/CompilerDesignIflr1.csproj index 2150e37..0065211 100644 --- a/CompilerDesignIflr1/CompilerDesignIflr1.csproj +++ b/CompilerDesignIflr1/CompilerDesignIflr1.csproj @@ -7,4 +7,13 @@ enable + + + Always + + + Always + + + diff --git a/CompilerDesignIflr1/GrammarReader.cs b/CompilerDesignIflr1/GrammarReader.cs index dd8ed46..06aa847 100644 --- a/CompilerDesignIflr1/GrammarReader.cs +++ b/CompilerDesignIflr1/GrammarReader.cs @@ -1,16 +1,124 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Text; namespace CompilerDesignIFlr1 { internal class GrammarReader { - public GrammarReader(Uri grammarFilePath) - { + //词法分析解决 + internal Dictionary KeyToken { get; set; } = []; + //此法分析解决 + internal Dictionary SymbolToken { get; set; } = []; + + //词法分析解决 + internal Dictionary RegexToken { get; set; } = []; + + //语法分析解决 + internal Dictionary RegularToken { get; set; } = []; + + internal GrammarReader(string grammarFilePath) + { + string fileContent = File.ReadAllText(grammarFilePath); + var dict = fileContent.PartParser(); + + var tokenDict = dict["@tokens"].PartParser(); + foreach (var (key, value) in tokenDict) + { + if (value[1] == '!' && value[2] != '=') + KeyToken.Add(key, value.DeDoubleQuote().Substring(1)); + else if (value.Contains('$')) + RegexToken.Add(key, value.Replace("$", "").DeDoubleQuote()); + else + SymbolToken.Add(key, value.DeDoubleQuote()); + } + Console.WriteLine("Key:"); + foreach (var (key, value) in KeyToken) + Console.WriteLine($"{key}: {value}"); + Console.WriteLine("Symbol:"); + foreach (var (key, value) in SymbolToken) + Console.WriteLine($"{key}: {value}"); + Console.WriteLine("Regex:"); + foreach (var (key, value) in RegexToken) + Console.WriteLine($"{key}: {value}"); + foreach (var (key, value) in dict) + { + if (key == "@tokens") + continue; + RegularToken.Add(key, value); + Console.WriteLine(key + ": " + value); + } + } + } + + file static class StringExtension + { + internal static Dictionary PartParser(this string input) + { + Dictionary result = []; + input = input.Replace("\n", " ").Replace("\r", " "); + int start = 0; + int layer = 0; + string key = ""; + string value = ""; + bool inQuote = false; + for (int i = 0; i < input.Length; i++) + { + char c = input[i]; + switch (c) + { + case '"': + inQuote = !inQuote; + break; + case '{': + if (inQuote) + break; + if (layer++ == 0) + { + key = input.Substring(start, i - start); + start = i + 1; + } + break; + case '}': + if (inQuote) + break; + if (layer-- == 1) + { + value = input.Substring(start, i - start); + result.Add(key.Trim(), value.Trim()); + start = i + 1; + } + break; + } + } + return result; + } + + internal static string DeDoubleQuote(this string s) + { + StringBuilder sb = new StringBuilder(); + bool inLayer = false; + for (int i = 0; i < s.Length; i++) + { + if (s[i] == '"') + { + if (!inLayer) + { + inLayer = true; + } + else + { + if (sb[^1] == '\\') + { + sb[^1] = '\"'; + } + else + inLayer = false; + } + } + else if (s[i] != ' ') + sb.Append(s[i]); + } + return sb.ToString(); } } } diff --git a/CompilerDesignIflr1/LR1Closure.cs b/CompilerDesignIflr1/LR1Closure.cs index 724b6d2..718692c 100644 --- a/CompilerDesignIflr1/LR1Closure.cs +++ b/CompilerDesignIflr1/LR1Closure.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.ComponentModel; using System.Linq; using System.Text; using System.Threading.Tasks; @@ -8,5 +9,198 @@ namespace CompilerDesignIFlr1 { internal class LR1Closure { + internal static int GlobalIndex = 0; + internal int Index = 0; + internal LR1Creator LR1Creator; + internal HashSet Units { get; set; } = []; + internal Dictionary Next { get; set; } = []; + + internal LR1Closure(LR1Creator lr1Creator) + { + LR1Creator = lr1Creator; + Index = GlobalIndex++; + } + + internal LR1Closure(LR1Creator lr1Creator, List units) + { + Units.UnionWith(units); + Index = GlobalIndex++; + LR1Creator = lr1Creator; + AddMissingUnits(); + CalculateProspects(); + } + + internal void CalculateProspects() + { + //Queue queue = []; + //var startUnit = Units.Where(x=>x.Name == LR1Creator.StartSymbol).First(); + //startUnit.Prospect.Add("End"); + + //queue.Enqueue(startUnit); + //while (queue.Count > 0) + //{ + // var unit = queue.Dequeue(); + // var left = unit.Name; + // var units = UnitsHaveDotBefore(left); + // var next = unit.Next(); + // if (next is not null && !LR1Creator.TokenUnit.ContainsKey(next)) + // foreach (var item in Units.Where(x => x.Name == next)) + // queue.Enqueue(item); + // HashSet prospect = units + // .Select(x => GetProspectsOf(x)) + // .Aggregate( + // new HashSet(), + // (set, x) => + // { + // set.UnionWith(x); + // return set; + // } + // ); + // unit.Prospect.UnionWith(prospect); + //} + + bool haveChange = true; + while (haveChange) + { + haveChange = false; + foreach (var unit in Units) + { + var left = unit.Name; + var units = UnitsHaveDotBefore(left); + HashSet prospect = units + .Select(x => GetProspectsOf(x)) + .Aggregate( + unit.Prospect, + (set, x) => + { + set.UnionWith(x); + return set; + } + ); + if (!prospect.IsSubsetOf(unit.Prospect)) + haveChange = true; + unit.Prospect.UnionWith(prospect); + } + } + } + + internal List UnitsHaveDotBefore(string name) + { + List res = []; + foreach (LR1Unit unit in Units) + { + if (unit.Next() == name) + res.Add(unit); + } + return res; + } + // 对于文法 X => yz... + // 该方法返回 A => .XB 中的B 以及B可能为空时其后的元素 + internal List GetProspectsOf(LR1Unit unit) + { + if (unit.Name == LR1Creator.StartSymbol) + return ["End"]; + //if(unit.Prospect.Count != 0) + //{ + // throw new Exception("有环???"); + //} + + List ans = unit.PointPosition + 1 == unit.Grammar.Count ? [.. unit.Prospect] : []; + for (int i = unit.PointPosition + 1; i < unit.Grammar.Count; i++) + { + var units = LR1Creator.GetUnits(unit.Grammar[i]); + ans.AddRange(LR1Creator.FirstGroup[unit.Grammar[i]]); + if (units.All(x => !x.Nullable())) + break; + else if (i == unit.Grammar.Count - 1) + ans.AddRange(unit.Prospect); + } + return ans; + } + internal void AddMissingUnits() + { + bool haveNew = true; + while (haveNew) + { + HashSet addingUnits = []; + haveNew = false; + foreach (LR1Unit unit in Units) + { + string? next = unit.Next(); + if (next is null) + continue; + if (LR1Creator.GrammarUnit.TryGetValue(next, out var value)) + { + var set = new HashSet(value.Select(x => x.Clone())); + if (set.IsSubsetOf(Units)) + continue; + haveNew = true; + addingUnits.UnionWith(set); + } + } + Units.UnionWith(addingUnits); + } + } + internal List NextClosures() + { + List ans = []; + Dictionary> nextSteps = []; + foreach(LR1Unit unit in Units) + { + string? next = unit.Next(); + if (next is null) + continue; + if (nextSteps.TryGetValue(next, out var units)) + units.Add(unit); + else + nextSteps.Add(next, [unit]); + } + foreach(var (next,value) in nextSteps) + { + var closure = new LR1Closure(LR1Creator, value.Select(x => x.ToNext()).ToList()); + var existClosure = LR1Creator.Closures.Values.Where(x => x.Equals(closure)).FirstOrDefault(); + if(existClosure is not null) + { + LR1Closure.GlobalIndex--; + Next.Add(next,existClosure.Index); + continue; + } + ans.Add(closure); + Next.Add(next, closure.Index); + } + return ans; + } + + internal List GetReduceUnits() + => Units.Where(x => x.ReadyToReduce()).ToList(); + + public override bool Equals(object? obj) + { + if (obj is not LR1Closure closure || obj is null) + return false; + + return closure.Units.Count == Units.Count && closure.Units.Select(x => Units.Where(y => y.Equals(x)).Any()).All(x=> x); + } + + public override int GetHashCode() + { + int hash = 0; + foreach (var unit in Units.OrderBy(e => e.GetHashCode())) + { + hash ^= unit.GetHashCode(); + } + return hash; + } + + public override string ToString() + { + StringBuilder sb = new(); + sb.Append($"闭包{Index}:\n"); + foreach (var unit in Units) + { + sb.Append(unit + "\n"); + } + return sb.ToString(); + } } } diff --git a/CompilerDesignIflr1/LR1Creator.cs b/CompilerDesignIflr1/LR1Creator.cs new file mode 100644 index 0000000..396a293 --- /dev/null +++ b/CompilerDesignIflr1/LR1Creator.cs @@ -0,0 +1,407 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel.DataAnnotations; +using System.Dynamic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace CompilerDesignIFlr1 +{ + internal class LR1Creator + { + internal string StartSymbol { get; init; } + internal Dictionary Closures = []; + + // 终结符 + internal Dictionary TokenUnit = []; + + // 非终结符 考虑 或 的情况 + internal Dictionary> GrammarUnit = []; + + // First集 + internal Dictionary> FirstGroup = []; + internal Dictionary UnitIndex = []; + + internal LR1Creator(GrammarReader reader) + { + TokenUnit.Add("End", new LR1Unit("End", "#")); + foreach (var (key, value) in reader.SymbolToken) + { + TokenUnit.Add(key, new LR1Unit(key, value)); + } + foreach (var (key, value) in reader.KeyToken) + { + TokenUnit.Add(key, new LR1Unit(key, value)); + } + foreach (var (key, value) in reader.RegexToken) + { + TokenUnit.Add(key, new LR1Unit(key, value)); + } + foreach (var (key, _) in reader.RegularToken) + { + if (key.StartsWith("@skip")) + continue; + if (key.StartsWith("@top")) + GrammarUnit.Add(key.Substring(5), []); + else + GrammarUnit.Add(key, []); + } + string startSymbol = ""; + foreach (var (key, value) in reader.RegularToken) + { + if (key.StartsWith("@skip")) + continue; + if (key.StartsWith("@top")) + { + startSymbol = key.Substring(5); + GrammarUnitCreate(value, key.Substring(5)); + } + else + { + GrammarUnitCreate(value, key); + } + } + SetGrammarIndex(); + PrintGrammar(); + foreach (var (key, _) in TokenUnit) + GetFirst(key); + foreach (var (key, _) in GrammarUnit) + GetFirst(key); + PrintFirst(); + StartSymbol = startSymbol; + I0Creator(); + CalculateAllClosure(); + PrintClosure(Enumerable.Range(0, Closures.Count).ToArray()); + } + + internal void SetGrammarIndex() + { + int id = 0; + foreach (var (key, units) in GrammarUnit) + foreach (var unit in units) + { + unit.Id = id++; + UnitIndex[unit.Id] = unit; + } + } + + internal void PrintClosure(params int[] index) + { + foreach (int t in index) + { + //Console.WriteLine($"闭包{t}:"); + Console.WriteLine(Closures[t]); + //var closure = Closures[t]; + //foreach (var unit in closure.Units) + //{ + // Console.Write($"{unit.Name} ::= "); + // Console.WriteLine(unit); + //} + } + } + + internal void I0Creator() + { + var closure = new LR1Closure(this); + foreach (var root in GrammarUnit[StartSymbol]) + { + var res = root.Clone(); + res.Prospect.Add("End"); + closure.Units.Add(res); + } + closure.AddMissingUnits(); + //foreach (var (key, value) in GrammarUnit) + //{ + // if (key == StartSymbol) + // continue; + // value.ForEach(x => closure.Units.Add(x.Clone())); + //} + closure.CalculateProspects(); + Closures.Add(closure.Index, closure); + } + + internal HashSet GetFirst(string root) + { + if (FirstGroup.TryGetValue(root, out var ans)) + return ans; + if (GrammarUnit.TryGetValue(root, out var units)) + { + HashSet res = []; + foreach (var unit in units) + { + if (FirstGroup.TryGetValue(root, out var list)) + res.UnionWith(list); + else + { + if (unit.Type == "Token") + res.UnionWith(GetFirst(unit.Name)); + else if (unit.Type == "Grammar") + { + var val = unit.Grammar.FirstOrDefault(); + if (val == null) + continue; + res.UnionWith(GetFirst(val)); + } + } + } + FirstGroup.Add(root, res); + return res; + } + else if (TokenUnit.TryGetValue(root, out var unit)) + { + FirstGroup.Add(root, [unit.Name]); + return [unit.Name]; + } + else + { + throw new Exception("Grammar can't be recognized. " + root); + } + } + + internal void CalculateAllClosure() + { + int t = 0; + bool haveNew = true; + while (Closures.ContainsKey(t)) + { + haveNew = false; + if (Closures.TryGetValue(t, out var closure)) + { + List nextClosures = closure.NextClosures(); + if (nextClosures.Count > 0) + haveNew = true; + nextClosures.ForEach(x => Closures.Add(x.Index, x)); + } + t++; + } + if (!Closures.ContainsKey(t) && haveNew) + throw new Exception("Really weird. This situation shouldn't be possible."); + } + + internal string GrammarUnitCreate(string value, string name = "") + { + if (value.EndsWith('*') || value.EndsWith('+')) + { + string s; + if (value.StartsWith('(') && value[^2] == ')') + { + s = DeClosure(value.Substring(1, value.Length - 3), value[^1]); + if (name != "") + { + GrammarUnit[name] = [new LR1Unit(name, [s])]; + } + return s; + } + else if (value.IndexOf(' ') == -1) + { + s = DeClosure(value.Substring(0, value.Length - 1), value[^1]); + if (name != "") + { + GrammarUnit[name] = [new LR1Unit(name, [s])]; + } + return s; + } + } + bool inParen = false; + int last = 0; + name = name.Length == 0 ? NameGenerator(value) : name; + List> ans = []; + List grammar = []; + value = value + ' '; + for (var i = 0; i < value.Length; i++) + { + char c = value[i]; + if (inParen && c != ')') + continue; + switch (c) + { + case '(': + last = i; + inParen = true; + break; + case ')': + { + inParen = false; + break; + } + case ' ': + { + string subValue = value.Substring(last, i - last); + last = i + 1; + if (GrammarUnit.ContainsKey(subValue.Trim())) + grammar.Add(subValue.Trim()); + else if (TokenUnit.ContainsKey(subValue.Trim())) + grammar.Add(subValue.Trim()); + else + { + if (subValue.Trim().Length == 0) + break; + throw new Exception("Grammar can't be recognized. " + subValue); + } + break; + } + case '|': + { + string subValue = value.Substring(last, i - last); + if (subValue.Trim().TrimEnd('|').Length != 0) + { + if (GrammarUnit.ContainsKey(subValue.Trim())) + grammar.Add(subValue.Trim()); + else if (TokenUnit.ContainsKey(subValue.Trim())) + grammar.Add(subValue.Trim()); + else + throw new Exception("Grammar can't be recognized. " + subValue); + } + ans.Add(grammar); + grammar = []; + last = i + 1; + break; + } + case '*': + { + string subString = GrammarUnitCreate(value.Substring(last, i - last + 1)); + grammar.Add(subString); + last = i + 1; + break; + } + case '+': + { + string subString = GrammarUnitCreate(value.Substring(last, i - last + 1)); + grammar.Add(subString); + last = i + 1; + break; + } + default: + break; + } + } + if (grammar.Count > 0) + ans.Add(grammar); + if (GrammarUnit.TryGetValue(name, out var val)) + GrammarUnit[name] = ToLR1Units(name, ans); + else + GrammarUnit.Add(name, ToLR1Units(name, ans)); + return name; + } + + // 嵌套解包直接报错!! + internal string DeClosure(string value, char type) + { + if (value.IndexOf('+') != -1 || value.IndexOf("*") != -1) + throw new Exception("Grammar too complex. " + value); + string name = NameGenerator(value); + List> ans = []; + List res = []; + if (type == '*') + { + ans.Add([]); + var grammars = value.Split('|').Select(x => x.Split(' ').Where(x => x.Length != 0)); + foreach (var grammar in grammars) + { + res = []; + foreach (var gram in grammar) + { + if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram)) + res.Add(gram); + else + { + Console.WriteLine(gram.Length); + throw new Exception("Unknown grammar. " + gram); + } + } + //ans.Add([name,..res]); + ans.Add([.. res, name]); + } + } + else if (type == '+') + { + var grammars = value.Split('|').Select(x => x.Trim().Split(' ')); + foreach (var grammar in grammars) + { + res = []; + foreach (var gram in grammar) + { + if (TokenUnit.ContainsKey(gram) || GrammarUnit.ContainsKey(gram)) + res.Add(gram); + else + throw new Exception("Unknown grammar. " + gram); + } + ans.Add([.. res]); + + //ans.Add([name,.. res]); + ans.Add([.. res, name]); + } + } + else + { + throw new Exception("Unknown type. " + type); + } + if (GrammarUnit.TryGetValue(name, out var val)) + GrammarUnit[name] = ToLR1Units(name, ans); + else + GrammarUnit.Add(name, ToLR1Units(name, ans)); + return name; + } + + internal string NameGenerator(string key) + { + var list = key.Replace('(', ' ') + .Replace(')', ' ') + .Replace('*', ' ') + .Replace('|', ' ') + .Replace(" ", " ") + .Split(' ') + .Select(x => x.Trim()); + var res = String.Join('_', list); + for (int i = 0; i < 9; i++) + if (!GrammarUnit.ContainsKey(res + '_' + i)) + return res + '_' + i; + throw new Exception("Grammar too Complex:" + key); + } + + internal List ToLR1Units(string name, List> grammars) + { + var res = new List(); + foreach (var grammar in grammars) + { + res.Add(new LR1Unit(name, grammar)); + } + return res; + } + + private void PrintGrammar() + { + Console.WriteLine("文法:"); + foreach (var (key, value) in GrammarUnit) + { + Console.WriteLine("key: " + key); + foreach (var unit in value) + { + Console.WriteLine(" " + unit); + } + } + } + + internal void PrintFirst() + { + Console.WriteLine("First集:"); + foreach (var (key, value) in FirstGroup) + { + Console.Write($"{key}: "); + foreach (var val in value) + Console.Write($"{val} "); + Console.WriteLine(); + } + } + + internal List GetUnits(string name) + { + if (TokenUnit.TryGetValue(name, out var unit)) + return [unit]; + if (GrammarUnit.TryGetValue(name, out var units)) + return units; + throw new Exception($"No Unit Called: {name}"); + } + } +} diff --git a/CompilerDesignIflr1/LR1Table.cs b/CompilerDesignIflr1/LR1Table.cs new file mode 100644 index 0000000..d20d557 --- /dev/null +++ b/CompilerDesignIflr1/LR1Table.cs @@ -0,0 +1,93 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace CompilerDesignIFlr1 +{ + internal class LR1Table + { + internal List Rows = []; + + internal LR1Table(LR1Creator creator) + { + for (int i = 0; i < creator.Closures.Count; i++) + { + LR1Closure closure = creator.Closures[i]; + Dictionary @goto = []; + Dictionary action = []; + foreach (var (by, nextIndex) in closure.Next) + { + if (creator.TokenUnit.ContainsKey(by)) + action.Add(by, "s" + nextIndex); + else + @goto.Add(by, nextIndex); + } + foreach (var unit in closure.GetReduceUnits()) + { + foreach (string prospect in unit.Prospect) + { + if (action.ContainsKey(prospect)) + throw new Exception("Reduce,Reduce/Reduce,Shift error occurred."); + action.Add(prospect, "r" + unit.Id); + } + } + Rows.Add(new Row(i, @goto, action)); + } + PrintRow(); + return; + } + + private void PrintRow() + { + foreach (Row row in Rows) + { + Console.WriteLine(row); + } + } + } + + internal class Row + { + internal int Id; + internal Dictionary GOTO; + internal Dictionary ACTION; + + internal Row(int id, Dictionary @goto, Dictionary action) + { + Id = id; + GOTO = @goto; + ACTION = action; + } + + public override string ToString() + { + StringBuilder sb = new StringBuilder(); + sb.Append(Id + " \n"); + sb.Append("ACTION: \n"); + foreach (var (key, value) in ACTION) + sb.Append(key).Append(": ").Append(value).Append(", "); + sb.Append("GOTO: \n"); + foreach (var (key, value) in GOTO) + sb.Append(key).Append(": ").Append(value).Append(", "); + return sb.ToString(); + } + + public (string, int) Next(string name) + { + if (name == "StatementList" && Id == 0) + return ("ACC", 0); + if (GOTO.TryGetValue(name, out var result)) + return ("GOTO", result); + if (ACTION.TryGetValue(name, out var value)) + if (value.StartsWith("s")) + return ("GOTO", int.Parse(value.Substring(1))); + else if (value.StartsWith("r")) + return ("Reduce", int.Parse(value.Substring(1))); + else + throw new Exception($"Value can't be recognized. {value}"); + throw new InvalidOperationException($"Code error in state {Id}: {name} not found"); + } + } +} diff --git a/CompilerDesignIflr1/LR1Unit.cs b/CompilerDesignIflr1/LR1Unit.cs index d3ec49b..d17249b 100644 --- a/CompilerDesignIflr1/LR1Unit.cs +++ b/CompilerDesignIflr1/LR1Unit.cs @@ -8,6 +8,127 @@ namespace CompilerDesignIFlr1 { internal class LR1Unit { - public LR1Unit() { } + internal LR1Unit() { } + + internal LR1Unit(string name, string grammar) + { + Type = "Token"; + Name = name; + Grammar = [grammar]; + } + + internal LR1Unit(string name, string value, string grammar) + { + Type = "Token"; + Name = name; + Grammar = [grammar]; + Value = value; + } + + internal LR1Unit(string name, List grammar) + { + Type = "Grammar"; + Name = name; + Grammar = grammar; + } + + internal LR1Unit( + int id, + string type, + string name, + HashSet prospect, + int pointPosition, + List grammar + ) + { + Id = id; + Type = type; + Name = name; + Prospect = prospect; + PointPosition = pointPosition; + Grammar = grammar; + } + + internal int Id { get; set; } = -1; + internal string Type { get; set; } = ""; + internal string Name { get; set; } = ""; + internal string Value { get; set; } = ""; + internal HashSet Prospect { get; set; } = []; + internal int PointPosition { get; set; } = 0; + internal List Grammar { get; set; } = []; + + internal string? Next() => PointPosition >= Grammar.Count ? null : Grammar[PointPosition]; + + internal bool ReadyToReduce() => PointPosition >= Grammar.Count; + + internal bool CanReduce(List stack) + { + for (int i = 1; i <= Grammar.Count; i++) + { + if (stack[^i].Name != Grammar[^i]) + return false; + } + return true; + } + + public override string ToString() + { + StringBuilder sb = new(); + //if (Grammar.Count == 0) + // return "."; + sb.Append($"{Id, 2} "); + sb.Append(Name).Append(" ::= "); + for (int i = 0; i < Grammar.Count; i++) + { + if (PointPosition == i) + sb.Append(". "); + sb.Append(Grammar[i] + " "); + } + if (PointPosition == Grammar.Count) + sb.Append('.'); + foreach (var item in Prospect) + { + sb.Append(", " + item); + } + return sb.ToString(); + } + + internal LR1Unit Clone() => + new LR1Unit(Id, Type, Name, new HashSet(Prospect), PointPosition, [.. Grammar]); + + public override bool Equals(object? obj) + { + if (obj is not LR1Unit other) + return false; + + return Type == other.Type + && Name == other.Name + && Id == other.Id + && PointPosition == other.PointPosition + && Grammar.SequenceEqual(other.Grammar) + && Prospect.SetEquals(other.Prospect); + } + + public override int GetHashCode() + { + HashCode hash = new HashCode(); + hash.Add(Type); + hash.Add(Name); + hash.Add(PointPosition); + foreach (var item in Grammar) + hash.Add(item); + foreach (var item in Prospect) + hash.Add(item); + return hash.ToHashCode(); + } + + internal bool Nullable() => Grammar.Count == 0; + + internal LR1Unit ToNext() + { + var unit = Clone(); + unit.PointPosition++; + return unit; + } } } diff --git a/CompilerDesignIflr1/LexicalAnalysis.cs b/CompilerDesignIflr1/LexicalAnalysis.cs index e3b3ae0..8525370 100644 --- a/CompilerDesignIflr1/LexicalAnalysis.cs +++ b/CompilerDesignIflr1/LexicalAnalysis.cs @@ -2,14 +2,153 @@ using System.Collections.Generic; using System.Linq; using System.Text; +using System.Text.RegularExpressions; using System.Threading.Tasks; +using CompilerDesignIFlr1; namespace CompilerDesignIflr1 { internal class LexicalAnalysis { string Text { get; set; } = ""; + Dictionary Symbol = []; + Dictionary Keyword = []; + Dictionary Patterns = []; + internal List Result; + internal LexicalAnalysis(LR1Creator creator, string codeFilePath) + { + foreach (var (_, unit) in creator.TokenUnit) + { + var val = unit.Grammar[0]; + if (val.Contains('[')) + Patterns.Add(val, unit); + else if (val.Select(x => char.IsLetterOrDigit(x)).All(x => x)) + Keyword.Add(val, unit); + else + Symbol.Add(val, unit); + } + PrintStrSplitResult(); + var list = StrSplit(File.ReadAllText(codeFilePath)); + foreach (string s in list) + Console.WriteLine(s); + Result = Analyze(list); + PrintAnalyzeResult(); + } + internal Stack GetStack() + { + Stack stack = []; + for (int i = 1; i <= Result.Count; i++) + { + stack.Push(Result[^i]); + } + return stack; + } + + internal void PrintStrSplitResult() + { + Console.WriteLine("Symbol"); + foreach (var (a, b) in Symbol) + Console.WriteLine($"{a}: {b}"); + Console.WriteLine("keyword"); + foreach (var (a, b) in Keyword) + Console.WriteLine($"{a}: {b}"); + Console.WriteLine("Regex"); + foreach (var (a, b) in Patterns) + Console.WriteLine($"{a}: {b}"); + } + + internal void PrintAnalyzeResult() + { + Console.WriteLine("units"); + foreach (var unit in Result) + Console.WriteLine($"{unit.Name}: {unit.Value}"); + } + + public List Analyze(List list) + { + List ans = []; + for (int i = 0; i < list.Count; i++) + { + string s = list[i]; + if (Keyword.TryGetValue(s, out var unit)) + { + var temp = unit.Clone(); + temp.Value = s; + ans.Add(temp); + } + else if (Symbol.TryGetValue(s, out var symbol)) + { + var temp = symbol.Clone(); + if (i + 1 < list.Count && Symbol.TryGetValue(s + list[i + 1], out symbol)) + { + temp = symbol.Clone(); + temp.Value = s + list[i + 1]; + i++; + } + else + temp.Value = s; + ans.Add(temp); + } + else + { + bool noAnswer = true; + foreach (var (pattern, ut) in Patterns) + { + if (Regex.IsMatch(s, pattern)) + { + var temp = ut.Clone(); + temp.Value = s; + ans.Add(temp); + noAnswer = false; + break; + } + } + if (noAnswer) + { + Console.WriteLine($"无法识别的字符串: {s}"); + } + } + } + return ans; + } + + public List StrSplit(string s) + { + List lt = new List(); + int l = 0; + int r = 1; + + while (r < s.Length) + { + if (!char.IsLetterOrDigit(s[r])) + { + string k = s.Substring(l, r - l).Trim(); + if (k.Length != 0) + { + lt.Add(k); + } + k = s[r].ToString().Trim(); + if (k.Length != 0) + { + lt.Add(k); + } + l = r + 1; + r = l; + } + else + { + r += 1; + } + } + + if (l < s.Length) + { + lt.Add(s.Substring(l, r - l)); + } + lt.Add("#"); + return lt; + } } } diff --git a/CompilerDesignIflr1/Program.cs b/CompilerDesignIflr1/Program.cs index 3751555..5fb481c 100644 --- a/CompilerDesignIflr1/Program.cs +++ b/CompilerDesignIflr1/Program.cs @@ -1,2 +1,10 @@ // See https://aka.ms/new-console-template for more information +using CompilerDesignIflr1; +using CompilerDesignIFlr1; + Console.WriteLine("Hello, World!"); +var grammarReader = new GrammarReader("./files/if-grammar.grammar"); +var lr1Creator = new LR1Creator(grammarReader); +var lr1Table = new LR1Table(lr1Creator); +var lexicalAnalysis = new LexicalAnalysis(lr1Creator, "./files/code"); +var stateMachine = new StateMachine(lr1Table, lexicalAnalysis, lr1Creator); diff --git a/CompilerDesignIflr1/StateMachine.cs b/CompilerDesignIflr1/StateMachine.cs new file mode 100644 index 0000000..386ddbc --- /dev/null +++ b/CompilerDesignIflr1/StateMachine.cs @@ -0,0 +1,65 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using CompilerDesignIflr1; + +namespace CompilerDesignIFlr1 +{ + internal class StateMachine + { + internal LR1Table Table; + internal LexicalAnalysis AnalysisResult; + internal LR1Creator Creator; + internal List<(LR1Unit, int)> stack = [(new LR1Unit("End","#"),0)]; + internal Stack Tokens; + + internal StateMachine(LR1Table table, LexicalAnalysis analysis, LR1Creator creator) + { + Table = table; + AnalysisResult = analysis; + Creator = creator; + Tokens = AnalysisResult.GetStack(); + Compute(); + } + + internal void Compute() + { + while (true) + { + if (ComputeOnce(Tokens.Pop())) + break; + } + } + + internal bool ComputeOnce(LR1Unit unit) + { + var (action, destination) = Table.Rows[stack[^1].Item2].Next(unit.Name); + switch (action) + { + case "GOTO": + stack.Add((unit, destination)); + break; + case "Reduce": + var reduceUnit = Creator.UnitIndex[destination]; + if (reduceUnit.CanReduce(stack.Select(x => x.Item1).ToList())) + { + stack.RemoveRange(stack.Count - reduceUnit.Grammar.Count, reduceUnit.Grammar.Count); + } + else + throw new Exception("Reduce not allow."); + Console.WriteLine(reduceUnit); + Tokens.Push(unit); + Tokens.Push(reduceUnit.Clone()); + break; + case "ACC": + Console.WriteLine(Creator.UnitIndex[destination]); + return true; + default: + throw new Exception("Action is not recognized."); + } + return false; + } + } +} diff --git a/CompilerDesignIflr1/files/code b/CompilerDesignIflr1/files/code new file mode 100644 index 0000000..8359440 --- /dev/null +++ b/CompilerDesignIflr1/files/code @@ -0,0 +1,9 @@ +int i=0,t,b=15; +if(i>=10) + if(k==9) + h = 6; + else + { + c=10; + if(k<=95 && ) + } diff --git a/CompilerDesignIflr1/if-grammar.grammar b/CompilerDesignIflr1/files/if-grammar.grammar similarity index 76% rename from CompilerDesignIflr1/if-grammar.grammar rename to CompilerDesignIflr1/files/if-grammar.grammar index 531ff15..b97e936 100644 --- a/CompilerDesignIflr1/if-grammar.grammar +++ b/CompilerDesignIflr1/files/if-grammar.grammar @@ -1,4 +1,4 @@ -@top Program { +@top Program { StatementList } @@ -29,7 +29,7 @@ Statement { } NoIfStatement { - AssignmentStatement Semicolon | VariableDefinition Semicolon | LBrace Statement* RBrace | ConstantDefinition Semicolon + AssignmentStatement Semicolon | VariableDefinition Semicolon | LBrace Statement* RBrace | ConstantDefinition Semicolon | Semicolon } AssignmentStatement { @@ -40,14 +40,14 @@ Term { Factor (MultiplyLike Factor)* } -VariableDefinition { - Type (Identifier | Identifier Equal Expression)+ -} - ConstantDefinition { Const VariableDefinition } +VariableDefinition { + Type (Identifier | AssignmentStatement)+ (Comma Identifier | Comma AssignmentStatement)* +} + Type { Int | Char } @@ -67,6 +67,11 @@ MultiplyLike { Number { UnsignedNumber | Minus UnsignedNumber | Plus UnsignedNumber } + +Operator { + EuqalTo | NotEqualTo | LessThan | GreaterThan | LessThanOrEqual | GreaterThanOrEqual +} + @tokens { If { "!if" } Else { "!else" } @@ -82,12 +87,18 @@ Number { RParen { ")" } LBrace { "{" } RBrace { "}" } + Comma { "," } Semicolon { ";" } Identifier { $[a-zA-Z_]$[a-zA-Z0-9_]* } UnsignedNumber { $[0-9]+ } String { "\"" $[\x00-\x7F]* "\"" } Character { "'" $[\x00-\x7F] "'" } - Operator { "==" | "!=" | "<=" | ">=" | "<" | ">" } + EuqalTo { "==" } + NotEqualTo { "!=" } + LessThan { "<" } + GreaterThan { ">" } + LessThanOrEqual { "<=" } + GreaterThanOrEqual { ">=" } Equal { "=" } Whitespace { $[\t\n\r]+ } } \ No newline at end of file