import sys

import operator

import copy

#终结符

terSymbol = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',

'+','-','*','/','(',')','^']

#非终结符

non_ter = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']

firstSET = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]

followSET = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]

#求first集

def first(gra_line):

indexSET = []

x = gra_line[0] #x是开头的非终结符

ind = non_ter.index(x) #firstSET[ind]是当前非终结符的first集

#找到所有产生式右部的第一个字符,下标值返回列表indexSET

for g in gra_line:

if g == '>' or g == '|':

indexSET.append(gra_line.index(g) + 1)

#判断这些字符是否为终结符、空、非终结符,i是下标啊!gra_line[i]才是遍历的那个字符

for i in indexSET:

if gra_line[i] in terSymbol or gra_line[i] == '@':

if gra_line[i] not in firstSET[ind]:

firstSET[ind].append(gra_line[i])

elif gra_line[i] in non_ter:

#先把gra_line[i]的first集的非空元素加进去

if len(firstSET[non_ter.index(gra_line[i])]) == 0:

for gr in grammer:

if gr.startswith(gra_line[i]):

first(gr)

break

for g in firstSET[non_ter.index(gra_line[i])]:

if g != '@' and g not in firstSET[ind]:

firstSET[ind].append(g)

#再判断一堆空的那个情况

j = i #i是indexSET遍历用的,不能改,j用来遍历gra_line好了

while '@' in firstSET[non_ter.index(gra_line[j])]:

j += 1

#全包含空就把空加进去

if gra_line[j] == '|' or gra_line[j] == '\n':

firstSET[ind].append('@')

break

#把gra_line[j]的first集的非空元素加进去

if len(firstSET[non_ter.index(gra_line[j])]) == 0:

for gr in grammer:

if gr.startswith(gra_line[j]):

first(gr)

break

for g in firstSET[non_ter.index(gra_line[j])]:

if g != '@' and g not in firstSET[ind]:

firstSET[ind].append(g)

return firstSET

#求字符串的first集

def firststr(str):

firsts = []

if str == '@':

firsts.append('@')

return firsts

else:

for s in str:

if s in terSymbol:

firsts.append(s)

return firsts

elif '@' not in firstSET[non_ter.index(s)]:

for f in firstSET[non_ter.index(s)]:

if f not in firsts:

firsts.append(f)

return firsts

else:

for f in firstSET[non_ter.index(s)]:

if f != '@' and f not in firsts:

firsts.append(f)

if str.endswith(s):

firsts.append('@')

return firsts

#求follow集

def follow(gra_line):

#如果是开始符号则把#加入它的follow集

if gra_line[0] == grammer[0][0] and '#' not in followSET[non_ter.index(gra_line[0])]:

followSET[non_ter.index(gra_line[0])].append('#')

#查找产生式右部的非终结符并把下标加到nonIndex中,注意不能用index是因为可能会有重复的字符

nonIndex = []

g = 0

while g < len(gra_line):

if gra_line[g] in non_ter:

nonIndex.append(g)

g += 1

#从第二个非终结符开始判断它的位置,注意n是该非终结符在gra_line中的下标,gra_line[n]才是遍历的那个非终结符

for n in nonIndex[1:]:

#如果该终结符在产生式最右,则把follow(nonIndex[0])加入follow(n)中

if gra_line[n + 1] == '|' or gra_line[n + 1] == '\n':

for i in followSET[non_ter.index(gra_line[0])]:

if i not in followSET[non_ter.index(gra_line[n])]:

followSET[non_ter.index(gra_line[n])].append(i)

#如果该终结符不在最后

else:

#先把该终结符后的字符串截取出来

str = ''

for s in gra_line[n + 1:]:

if s == '|' or s == '\n':

break

else:

str = str + s

#如果字符串的first集中含有空

if '@' in firststr(str):

for i in followSET[non_ter.index(gra_line[0])]:

if i not in followSET[non_ter.index(gra_line[n])]:

followSET[non_ter.index(gra_line[n])].append(i)

for i in firststr(str):

if i != '@' and i not in followSET[non_ter.index(gra_line[n])]:

followSET[non_ter.index(gra_line[n])].append(i)

#如果字符串的first集中不含有空

else:

for i in firststr(str):

if i != '@' and i not in followSET[non_ter.index(gra_line[n])]:

followSET[non_ter.index(gra_line[n])].append(i)

return followSET

def cmp(SET1, SET2):

i = 0

while i < 26:

if (operator.eq(SET1[i], SET2[i])) == False:

return False

else:

i += 1

return True

#构造预测分析表

def table(grammer, column, row):

col = len(column) + 1

ro = len(row) + 1

#创建一个c列r行的二维列表

gra = [['\t' for col in range(col)] for row in range(ro)]

gra[0][0] = ' '

i = 1

for c in column:

gra[0][i] = c

i += 1

j = 1

for r in row:

gra[j][0] = r

j += 1

for gra_line in grammer:

# 先看每一行有几个产生式,把每一个产生式的开始索引加入pro_index

pro_index = []

for g in gra_line:

if g == '>' or g == '|':

pro_index.append(gra_line.index(g) + 1)

for p in pro_index:

str = ''

for s in gra_line[p:]:

if s == '|' or s == '\n':

break

else:

str = str + s

#找到了单独的产生式后,看这个产生式的first集,注意产生式是单独一个空的情况

if str == '@':

for f in followSET[non_ter.index(gra_line[0])]:

r = row.index(gra_line[0]) + 1

c = column.index(f) + 1

pro = gra_line[0] + '->' + str

gra[r][c] = pro

else:

for n in firststr(str):

if n == '@':

continue

r = row.index(gra_line[0]) + 1

c = column.index(n) + 1

pro = gra_line[0] + '->' + str

gra[r][c] = pro

if '@' in firststr(str):

for f in followSET[non_ter.index(gra_line[0])]:

r = row.index(gra_line[0]) + 1

c = column.index(f) + 1

pro = gra_line[0] + '->' + str

gra[r][c] = pro

return gra

#总控程序

def master(ana_str, column, row):

flag = 0

ana_str += '#'

stack = ['#']

stack.append(grammer[0][0])

while flag == 0:

print("当前栈中元素为:")

print(stack)

print("当前字符串为:")

print(ana_str)

if stack[-1] == ana_str[0] and stack[-1] == '#':

print("匹配成功,分析结束")

flag = 1

return True

elif stack[-1] == ana_str[0] and stack[-1] != '#':

stack.pop()

ana_str = ana_str[1:]

print("非终结符匹配成功")

elif stack[-1] in non_ter:

if ana_str[0] not in column:

print("匹配失败")

flag = 1

return False

elif gra[row.index(stack[-1]) + 1][column.index(ana_str[0]) + 1] != '\t':

str = gra[row.index(stack[-1]) + 1][column.index(ana_str[0]) + 1]

print("当前所用产生式为:")

print(str)

stack.pop()

right_str = str[3:]

right_str = right_str[::-1]

if right_str == '@':

continue

else:

for s in right_str:

stack.append(s)

else:

print("匹配失败")

flag = 1

return False

else:

print("匹配失败")

flag = 1

return False

if __name__ == '__main__':

ana_str = input("请输入一个待分析的串:")

print("请输入一个文法:")

grammer = sys.stdin.readlines()

for gr in grammer:

first(gr)

print("该文法的非终结符的first集为:")

i = 0

for f in firstSET:

if len(f) != 0:

print('first(' + non_ter[i] + '):',f)

i += 1

print()

#求follow集用的循环……因为我没想到该怎么递归

#关于递归:如何在需要把follow(A)加入follow(B)的时候准确求出follow(A)???????

#一个易错点:直接把followSET赋给follow1,会被引用。用切片可以避免普通列表的引用,但是对于嵌套列表好像没什么用

follow1 = copy.deepcopy(followSET)

for gr in grammer:

follow(gr)

follow2 = copy.deepcopy(followSET)

while not cmp(follow1, follow2):

follow1 = copy.deepcopy(followSET)

for gr in grammer:

follow(gr)

follow2 = copy.deepcopy(followSET)

print("该文法的非终结符的follow集为:")

j = 0

for f in followSET:

if len(f) != 0:

print('follow(' + non_ter[j] + '):', f)

j += 1

print()

column = []

row = []

for gra_line in grammer:

for gr in gra_line:

if gr == '-' and gra_line.index(gr) == 1:

continue

if gr in terSymbol and gr not in column:

column.append(gr)

if gr in non_ter and gr not in row:

row.append(gr)

column.append('#')

print("该文法的预测分析表为:")

gra = table(grammer, column, row)

for i in range(len(gra)): # 控制行

if i == 0:

for j in range(len(gra[i])): # 控制列

if j == 0:

print(gra[i][j], end='\t')

else:

print(gra[i][j], end='\t\t')

else:

for j in range(len(gra[i])): # 控制列

print(gra[i][j], end='\t')

print()

print()

#总控程序

print("分析过程如下:")

master(ana_str, column, row)

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐