CompilerDesignIFLR1/CompilerDesignIflr1/LexicalAnalysis.cs

167 lines
5.1 KiB
C#
Raw Normal View History

2024-12-20 20:20:34 +08:00
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
2024-12-23 01:42:58 +08:00
using System.Text.RegularExpressions;
2024-12-20 20:20:34 +08:00
using System.Threading.Tasks;
2024-12-23 01:42:58 +08:00
using CompilerDesignIFlr1;
2024-12-20 20:20:34 +08:00
namespace CompilerDesignIflr1
{
internal class LexicalAnalysis
{
string Text { get; set; } = "";
2024-12-23 01:42:58 +08:00
Dictionary<string, LR1Unit> Symbol = [];
Dictionary<string, LR1Unit> Keyword = [];
Dictionary<string, LR1Unit> Patterns = [];
internal List<LR1Unit> Result;
2024-12-20 20:20:34 +08:00
2024-12-23 01:42:58 +08:00
internal LexicalAnalysis(LR1Creator creator, string codeFilePath)
{
foreach (var (_, unit) in creator.TokenUnit)
{
var val = unit.Grammar[0];
if (val.Contains('['))
Patterns.Add(val, unit);
else if (val.Select(x => char.IsLetterOrDigit(x)).All(x => x))
Keyword.Add(val, unit);
else
Symbol.Add(val, unit);
}
PrintStrSplitResult();
var list = StrSplit(File.ReadAllText(codeFilePath));
foreach (string s in list)
Console.WriteLine(s);
Result = Analyze(list);
PrintAnalyzeResult();
}
2024-12-20 20:20:34 +08:00
2024-12-23 01:42:58 +08:00
internal Stack<LR1Unit> GetStack()
{
Stack<LR1Unit> stack = [];
for (int i = 1; i <= Result.Count; i++)
{
stack.Push(Result[^i]);
}
return stack;
}
internal void PrintStrSplitResult()
{
Console.WriteLine("Symbol");
foreach (var (a, b) in Symbol)
Console.WriteLine($"{a}: {b}");
Console.WriteLine("keyword");
foreach (var (a, b) in Keyword)
Console.WriteLine($"{a}: {b}");
Console.WriteLine("Regex");
foreach (var (a, b) in Patterns)
Console.WriteLine($"{a}: {b}");
}
internal void PrintAnalyzeResult()
{
Console.WriteLine("units");
foreach (var unit in Result)
Console.WriteLine($"{unit.Name}: {unit.Value}");
}
public List<LR1Unit> Analyze(List<string> list)
{
List<LR1Unit> ans = [];
for (int i = 0; i < list.Count; i++)
{
string s = list[i];
if (Keyword.TryGetValue(s, out var unit))
{
var temp = unit.Clone();
temp.Value = s;
ans.Add(temp);
}
2024-12-24 16:17:20 +08:00
else if(i + 1 < list.Count && Symbol.TryGetValue(s + list[i + 1], out var symbol))
2024-12-23 01:42:58 +08:00
{
var temp = symbol.Clone();
2024-12-24 16:17:20 +08:00
temp.Value = s + list[i + 1];
i++;
ans.Add(temp);
}
else if (Symbol.TryGetValue(s, out symbol))
{
var temp = symbol.Clone();
temp.Value = s;
2024-12-23 01:42:58 +08:00
ans.Add(temp);
}
else
{
bool noAnswer = true;
foreach (var (pattern, ut) in Patterns)
{
2024-12-24 16:17:20 +08:00
if (Regex.IsMatch(s, '^' + pattern + '$'))
2024-12-23 01:42:58 +08:00
{
var temp = ut.Clone();
temp.Value = s;
ans.Add(temp);
noAnswer = false;
break;
}
}
if (noAnswer)
{
Console.WriteLine($"无法识别的字符串: {s}");
}
}
}
return ans;
}
public List<string> StrSplit(string s)
{
List<string> lt = new List<string>();
int l = 0;
2024-12-24 16:17:20 +08:00
int r = 0;
2024-12-23 01:42:58 +08:00
while (r < s.Length)
{
if (!char.IsLetterOrDigit(s[r]))
{
string k = s.Substring(l, r - l).Trim();
if (k.Length != 0)
{
lt.Add(k);
}
k = s[r].ToString().Trim();
2024-12-24 16:17:20 +08:00
2024-12-23 01:42:58 +08:00
if (k.Length != 0)
{
2024-12-24 16:17:20 +08:00
if(k is "\'" or "\"")
{
l = r;
int temp = r + 1;
while (temp < s.Length && s[temp].ToString().Trim() != k)
temp++;
lt.Add(s.Substring(l, temp - l + 1));
r = temp;
}
else
lt.Add(k);
2024-12-23 01:42:58 +08:00
}
l = r + 1;
r = l;
}
else
{
r += 1;
}
}
if (l < s.Length)
{
lt.Add(s.Substring(l, r - l));
}
lt.Add("#");
return lt;
}
2024-12-20 20:20:34 +08:00
}
}