167 lines
5.1 KiB
C#
167 lines
5.1 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using CompilerDesignIFlr1;
|
|
|
|
namespace CompilerDesignIflr1
|
|
{
|
|
internal class LexicalAnalysis
|
|
{
|
|
string Text { get; set; } = "";
|
|
Dictionary<string, LR1Unit> Symbol = [];
|
|
Dictionary<string, LR1Unit> Keyword = [];
|
|
Dictionary<string, LR1Unit> Patterns = [];
|
|
internal List<LR1Unit> Result;
|
|
|
|
internal LexicalAnalysis(LR1Creator creator, string codeFilePath)
|
|
{
|
|
foreach (var (_, unit) in creator.TokenUnit)
|
|
{
|
|
var val = unit.Grammar[0];
|
|
if (val.Contains('['))
|
|
Patterns.Add(val, unit);
|
|
else if (val.Select(x => char.IsLetterOrDigit(x)).All(x => x))
|
|
Keyword.Add(val, unit);
|
|
else
|
|
Symbol.Add(val, unit);
|
|
}
|
|
PrintStrSplitResult();
|
|
var list = StrSplit(File.ReadAllText(codeFilePath));
|
|
foreach (string s in list)
|
|
Console.WriteLine(s);
|
|
Result = Analyze(list);
|
|
PrintAnalyzeResult();
|
|
}
|
|
|
|
internal Stack<LR1Unit> GetStack()
|
|
{
|
|
Stack<LR1Unit> stack = [];
|
|
for (int i = 1; i <= Result.Count; i++)
|
|
{
|
|
stack.Push(Result[^i]);
|
|
}
|
|
return stack;
|
|
}
|
|
|
|
internal void PrintStrSplitResult()
|
|
{
|
|
Console.WriteLine("Symbol");
|
|
foreach (var (a, b) in Symbol)
|
|
Console.WriteLine($"{a}: {b}");
|
|
Console.WriteLine("keyword");
|
|
foreach (var (a, b) in Keyword)
|
|
Console.WriteLine($"{a}: {b}");
|
|
Console.WriteLine("Regex");
|
|
foreach (var (a, b) in Patterns)
|
|
Console.WriteLine($"{a}: {b}");
|
|
}
|
|
|
|
internal void PrintAnalyzeResult()
|
|
{
|
|
Console.WriteLine("units");
|
|
foreach (var unit in Result)
|
|
Console.WriteLine($"{unit.Name}: {unit.Value}");
|
|
}
|
|
|
|
public List<LR1Unit> Analyze(List<string> list)
|
|
{
|
|
List<LR1Unit> ans = [];
|
|
for (int i = 0; i < list.Count; i++)
|
|
{
|
|
string s = list[i];
|
|
if (Keyword.TryGetValue(s, out var unit))
|
|
{
|
|
var temp = unit.Clone();
|
|
temp.Value = s;
|
|
ans.Add(temp);
|
|
}
|
|
else if(i + 1 < list.Count && Symbol.TryGetValue(s + list[i + 1], out var symbol))
|
|
{
|
|
var temp = symbol.Clone();
|
|
temp.Value = s + list[i + 1];
|
|
i++;
|
|
ans.Add(temp);
|
|
}
|
|
else if (Symbol.TryGetValue(s, out symbol))
|
|
{
|
|
var temp = symbol.Clone();
|
|
temp.Value = s;
|
|
ans.Add(temp);
|
|
}
|
|
else
|
|
{
|
|
bool noAnswer = true;
|
|
foreach (var (pattern, ut) in Patterns)
|
|
{
|
|
|
|
if (Regex.IsMatch(s, '^' + pattern + '$'))
|
|
{
|
|
var temp = ut.Clone();
|
|
temp.Value = s;
|
|
ans.Add(temp);
|
|
noAnswer = false;
|
|
break;
|
|
}
|
|
}
|
|
if (noAnswer)
|
|
{
|
|
Console.WriteLine($"无法识别的字符串: {s}");
|
|
}
|
|
}
|
|
}
|
|
return ans;
|
|
}
|
|
|
|
public List<string> StrSplit(string s)
|
|
{
|
|
List<string> lt = new List<string>();
|
|
int l = 0;
|
|
int r = 0;
|
|
|
|
while (r < s.Length)
|
|
{
|
|
if (!char.IsLetterOrDigit(s[r]))
|
|
{
|
|
string k = s.Substring(l, r - l).Trim();
|
|
if (k.Length != 0)
|
|
{
|
|
lt.Add(k);
|
|
}
|
|
k = s[r].ToString().Trim();
|
|
|
|
if (k.Length != 0)
|
|
{
|
|
if(k is "\'" or "\"")
|
|
{
|
|
l = r;
|
|
int temp = r + 1;
|
|
while (temp < s.Length && s[temp].ToString().Trim() != k)
|
|
temp++;
|
|
lt.Add(s.Substring(l, temp - l + 1));
|
|
r = temp;
|
|
}
|
|
else
|
|
lt.Add(k);
|
|
}
|
|
l = r + 1;
|
|
r = l;
|
|
}
|
|
else
|
|
{
|
|
r += 1;
|
|
}
|
|
}
|
|
|
|
if (l < s.Length)
|
|
{
|
|
lt.Add(s.Substring(l, r - l));
|
|
}
|
|
lt.Add("#");
|
|
return lt;
|
|
}
|
|
}
|
|
}
|