国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁(yè) > 學(xué)院 > 開發(fā)設(shè)計(jì) > 正文

使用MPLex實(shí)現(xiàn)語(yǔ)法高亮顯示的功能代碼解釋

2019-11-17 03:50:19
字體:
來(lái)源:轉(zhuǎn)載
供稿:網(wǎng)友
在前面的文章使用MPLex實(shí)現(xiàn)語(yǔ)法高亮顯示的功能里面,貼了一個(gè)實(shí)現(xiàn)語(yǔ)法高亮顯示的代碼,是采用類似于編譯器自動(dòng)狀態(tài)機(jī)的方法來(lái)判斷代碼里面每個(gè)單詞的類型。

有限自動(dòng)狀態(tài)機(jī)是表示有限個(gè)狀態(tài)以及在這些狀態(tài)之間的轉(zhuǎn)移和動(dòng)作等行為的數(shù)學(xué)模型。狀態(tài)之間只有一個(gè)轉(zhuǎn)移的動(dòng)作。 MPLex或者說(shuō)相關(guān)軟件(例如flex)通過(guò)分析用戶給定的詞法文件,自動(dòng)生成相應(yīng)的有限自動(dòng)機(jī),將自動(dòng)機(jī)的狀態(tài)保存在一個(gè)表里面。

#include <iostream>

#include <string>



using namespace std;



enum TokenType

{

       BOOM_ERROR = -1, // 啊哈,出錯(cuò)了

       NUMBER = 1,

       IDENTIFIER = 2,

       IF = 4

};



int DFA_Table[][37] = {

// 0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z !

{1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,-1}, // s0 -- 起始狀態(tài)

{1,1,1,1,1,1,1,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, // s1 -- 到這里說(shuō)明是數(shù)字

{3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,4,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,-1}, // s2 -- 變量

{2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,-1},

{2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,-1} // s4 -- 這是IF

};



//

// Match:

// 給定一個(gè)字符串str,判斷這個(gè)字符串的類型

//

// 例子:

// if, 返回IF

// 數(shù)字,返回NUMBER

// 變量,返回IDENTIFIER

//

TokenType Match(string str)

{

       int state = 0;



       for (string::iterator iter = str.begin();

                             iter != str.end();

                             ++iter )

       {

              char c = *iter;

              int index = 0;

              if ( c >= '0' && c <= '9' )

              {

                     index = c - '0';

              }

              else if (c >= 'a' && c <= 'z')

              {

                     index = c - 'a' + 10; // a列在DFA_Table中的索引值

              }

              else

              {

                     index = 36; // !列在DFA_Table中的索引值,到這里說(shuō)明不匹配了

              }



              state = DFA_Table[state][index];



              if (state == BOOM_ERROR)

                     break;

       }



       return (TokenType)state;

}



int g_line = 0;

void PRint(TokenType type)

{

       switch (type)

       {

              case BOOM_ERROR:

                     cout << ++g_line << ": BOOM_ERROR/n" <<>

                     break;



              case IF:

                     cout << ++g_line << ": IF/n" <<>

                     break;



              case NUMBER:

                     cout << ++g_line << ": NUMBER/n" <<>

                     break;



              case IDENTIFIER:

                     cout << ++g_line << ": IDENTIFIER/n" <<>

                     break;



              default:

                     cout << ++g_line << ": Error/n" <<>

                     break;

       }

}



int main()

{

       Print(Match("if"));

       Print(Match("iff"));

       Print(Match("if0"));

       Print(Match("0if"));

       Print(Match("i0f"));

       Print(Match("ia"));

       Print(Match("01"));

       Print(Match("123"));

       Print(Match("1f"));

       Print(Match("abcd"));

       Print(Match("ab"));

       Print(Match("a"));

       Print(Match("0"));

       Print(Match("i"));

       Print(Match("_"));



       return 0;

}


例子1:一個(gè)簡(jiǎn)單的DFA表驅(qū)動(dòng)匹配程序

上面的例子里,字符串的匹配或者說(shuō)是分類是通過(guò)有限自動(dòng)機(jī)來(lái)完成的,有限自動(dòng)機(jī)在代碼里面的表示就是那個(gè)二維數(shù)組 DFA_Table。DFA_Table的每一行(DFA_Table[i])表示有限自動(dòng)機(jī)的狀態(tài),而列表示從當(dāng)前狀態(tài)可以執(zhí)行的狀態(tài)轉(zhuǎn)換(Transfer)。例如在匹配的時(shí)候,程序先從DFA_Table[0],也就是起始狀態(tài)開始,如果第一個(gè)字符串是i,則根據(jù)DFA_Table[0]['i']指定的轉(zhuǎn)換規(guī)則跳轉(zhuǎn)到下一個(gè)狀態(tài)(State)去,這里下一個(gè)狀態(tài)是2,也就是DFA_Table的第三行,再根據(jù)str的下一個(gè)字符來(lái)確定要轉(zhuǎn)換的狀態(tài)。匹配過(guò)程一直循環(huán)到字符串被全部處理掉,這時(shí)程序判斷當(dāng)前的狀態(tài)是不是一個(gè)可以接受的狀態(tài)(Acceptable State),也就是說(shuō)這個(gè)狀態(tài)是否在TokenType中定義,如果狀態(tài)在TokenType中定義,那好,我們給出的字符串匹配成功,否則……BOOM。

我在Match函數(shù)的for循環(huán)中用了if判斷來(lái)根據(jù)當(dāng)前的字符選擇正確的索引,其實(shí)如果你不嫌麻煩的話,你的Match函數(shù)中的for循環(huán)可以簡(jiǎn)化成這樣:

for (string::iterator iter = str.begin();

                             iter != str.end();

                             ++iter )

{

       state = DFA_Table[state][*iter];

}



前提是愿意把DFA_Table擴(kuò)展成一個(gè)127 * 5的二維表格。

知道了有限自動(dòng)機(jī)是如何匹配代碼里的關(guān)鍵字以后,接下來(lái)要做的就是生成保存有限自動(dòng)機(jī)里面的狀態(tài)的狀態(tài)表了。生成狀態(tài)表的工作就由MPLex來(lái)完成了,因?yàn)槭止憣?shí)在是太復(fù)雜了。下面這個(gè)詞法定義文件就是告訴MPLex有哪些元素需要進(jìn)行特殊處理,例如將注釋、字符串、數(shù)字和關(guān)鍵字與其他普通代碼文本區(qū)分開來(lái)。

%namespace Coder.LexScanner



%x COMMENT



White0          [ /t/r/f/v]

White           {White0}|/n

CmntStart         ///*

CmntEnd                  /*//

ABStar               [^/*/n]*



%%



/'[^/'/n]{0,3}/'                  { return (int)TokenType.LEX_STRINGLITERAL; }

/"[^/"/n]*/"                      { return (int)TokenType.LEX_STRINGLITERAL; }

////{ABStar}/n                    { return (int)TokenType.LEX_COMMENT; }

/'[^/'/n]*/n                      { return (int)TokenType.LEX_COMMENT; }

{CmntStart}{ABStar}/**{CmntEnd} { return (int)TokenType.LEX_COMMENT; }

{CmntStart}{ABStar}/**          { BEGIN(COMMENT); return (int)TokenType.LEX_MULTILINECOMMENT_BEGIN; }                            

[^/n]*/**{CmntEnd}                { BEGIN(INITIAL); return (int)TokenType.LEX_MULTILINECOMMENT_END; }



if                                { return (int)TokenType.LEX_IF; }

while                             { return (int)TokenType.LEX_WHILE; }

do                                { return (int)TokenType.LEX_DO; }

abstract                          { return (int)TokenType.LEX_ABSTRACT; }

as                                { return (int)TokenType.LEX_AS; }

base                              { return (int)TokenType.LEX_BASE; }

bool                              { return (int)TokenType.LEX_BOOL; }

break                             { return (int)TokenType.LEX_BREAK; }

byte                              { return (int)TokenType.LEX_BYTE; }

case                              { return (int)TokenType.LEX_CASE; }

catch                             { return (int)TokenType.LEX_CATCH; }

char                              { return (int)TokenType.LEX_CHAR; }

checked                           { return (int)TokenType.LEX_CHECKED; }

class                             { return (int)TokenType.LEX_CLASS; }

const                             { return (int)TokenType.LEX_CONST; }

continue                          { return (int)TokenType.LEX_CONTINUE; }

decimal                           { return (int)TokenType.LEX_DECIMAL; }

default                           { return (int)TokenType.LEX_DEFAULT; }

delegate                          { return (int)TokenType.LEX_DELEGATE; }

double                            { return (int)TokenType.LEX_DOUBLE; }

else                              { return (int)TokenType.LEX_ELSE; }

enum                              { return (int)TokenType.LEX_ENUM; }

event                             { return (int)TokenType.LEX_EVENT; }

explicit                          { return (int)TokenType.LEX_EXPLICIT; }

extern                            { return (int)TokenType.LEX_EXTERN; }

false                             { return (int)TokenType.LEX_FALSE; }

finally                           { return (int)TokenType.LEX_FINALLY; }

fixed                             { return (int)TokenType.LEX_FIXED; }

float                             { return (int)TokenType.LEX_FLOAT; }

for                               { return (int)TokenType.LEX_FOR; }

foreach                           { return (int)TokenType.LEX_FOREACH; }

goto                              { return (int)TokenType.LEX_GOTO; }

implicit                          { return (int)TokenType.LEX_IMPLICIT; }

in                                { return (int)TokenType.LEX_IN; }

int                               { return (int)TokenType.LEX_INT; }

interface                         { return (int)TokenType.LEX_INTERFACE; }

internal                          { return (int)TokenType.LEX_INTERNAL; }

is                                { return (int)TokenType.LEX_IS; }

lock                              { return (int)TokenType.LEX_LOCK; }

long                              { return (int)TokenType.LEX_LONG; }

namespace                         { return (int)TokenType.LEX_NAMESPACE; }

new                               { return (int)TokenType.LEX_NEW; }

null                              { return (int)TokenType.LEX_NULL; }

object                            { return (int)TokenType.LEX_OBJECT; }

Operator                          { return (int)TokenType.LEX_OPERATOR; }

out                               { return (int)TokenType.LEX_OUT; }

override                          { return (int)TokenType.LEX_OVERRIDE; }

params                            { return (int)TokenType.LEX_PARAMS; }

private                           { return (int)TokenType.LEX_PRIVATE; }

protected                         { return (int)TokenType.LEX_PROTECTED; }

public                            { return (int)TokenType.LEX_PUBLIC; }

readonly                          { return (int)TokenType.LEX_READONLY; }

ref                               { return (int)TokenType.LEX_REF; }

return                            { return (int)TokenType.LEX_RETURN; }

sbyte                             { return (int)TokenType.LEX_SBYTE; }

sealed                            { return (int)TokenType.LEX_SEALED; }

short                             { return (int)TokenType.LEX_SHORT; }

sizeof                            { return (int)TokenType.LEX_SIZEOF; }

stackalloc                        { return (int)TokenType.LEX_STACKALLOC; }

static                            { return (int)TokenType.LEX_STATIC; }

string                            { return (int)TokenType.LEX_STRING; }

struct                            { return (int)TokenType.LEX_STRUCT; }

switch                            { return (int)TokenType.LEX_SWITCH; }

this                              { return (int)TokenType.LEX_THIS; }

throw                             { return (int)TokenType.LEX_THROW; }

true                              { return (int)TokenType.LEX_TRUE; }

try                               { return (int)TokenType.LEX_TRY; }

typeof                            { return (int)TokenType.LEX_TYPEOF; }

uint                              { return (int)TokenType.LEX_UINT; }

ulong                             { return (int)TokenType.LEX_ULONG; }

unchecked                         { return (int)TokenType.LEX_UNCHECKED; }

unsafe                            { return (int)TokenType.LEX_UNSAFE; }

ushort                            { return (int)TokenType.LEX_USHORT; }

using                             { return (int)TokenType.LEX_USING; }

virtual                           { return (int)TokenType.LEX_VIRTUAL; }

volatile                          { return (int)TokenType.LEX_VOLATILE; }

void                              { return (int)TokenType.LEX_VOID; }



[0-9]+                            { return (int)TokenType.LEX_NUMBER; }

[a-zA-Z_][a-zA-Z0-9_]*            { return (int)TokenType.LEX_INDENTIFIER; }





{White0}+                  { return (int)TokenType.LEX_WHITE; }

/n                         { return (int)TokenType.LEX_WHITE; }

.                          { return (int)TokenType.LEX_ERROR; }



%%



Scanner.lex文件

上面代碼里面的TokenType枚舉需要在另外的C#文件里面定義:

using System;



namespace Coder.LexScanner

{

    public class TokenType

    {

        public const int LEX_NUMBER = 1;

        public const int LEX_INDENTIFIER = 2;

        public const int LEX_KEYWord = 1 << 30;

        public const int LEX_IF = 3 | LEX_KEYWORD;

        public const int LEX_WHILE = 4 | LEX_KEYWORD;

        public const int LEX_DO = 5 | LEX_KEYWORD;

        public const int LEX_ABSTRACT = 6 | LEX_KEYWORD;

        public const int LEX_AS = 7 | LEX_KEYWORD;

        public const int LEX_BASE = 8 | LEX_KEYWORD;

        public const int LEX_BOOL = 9 | LEX_KEYWORD;

        public const int LEX_BREAK = 10 | LEX_KEYWORD;

        public const int LEX_BYTE = 11 | LEX_KEYWORD;

        public const int LEX_CASE = 12 | LEX_KEYWORD;

        public const int LEX_CATCH = 13 | LEX_KEYWORD;

        public const int LEX_CHAR = 14 | LEX_KEYWORD;

        public const int LEX_CHECKED = 15 | LEX_KEYWORD;

        public const int LEX_CLASS = 16 | LEX_KEYWORD;

        public const int LEX_CONST = 17 | LEX_KEYWORD;

        public const int LEX_CONTINUE = 18 | LEX_KEYWORD;

        public const int LEX_DECIMAL = 19 | LEX_KEYWORD;

        public const int LEX_DEFAULT = 20 | LEX_KEYWORD;

        public const int LEX_DELEGATE = 21 | LEX_KEYWORD;

        public const int LEX_DOUBLE = 22 | LEX_KEYWORD;

        public const int LEX_ELSE = 23 | LEX_KEYWORD;

        public const int LEX_ENUM = 24 | LEX_KEYWORD;

        public const int LEX_EVENT = 25 | LEX_KEYWORD;

       public const int LEX_EXPLICIT = 26 | LEX_KEYWORD;

        public const int LEX_EXTERN = 27 | LEX_KEYWORD;

        public const int LEX_FALSE = 28 | LEX_KEYWORD;

        public const int LEX_FINALLY = 29 | LEX_KEYWORD;

        public const int LEX_FIXED = 30 | LEX_KEYWORD;

        public const int LEX_FLOAT = 31 | LEX_KEYWORD;

        public const int LEX_FOR = 32 | LEX_KEYWORD;

        public const int LEX_FOREACH = 33 | LEX_KEYWORD;

        public const int LEX_GOTO = 34 | LEX_KEYWORD;

        public const int LEX_IMPLICIT = 35 | LEX_KEYWORD;

        public const int LEX_IN = 36 | LEX_KEYWORD;

        public const int LEX_INT = 37 | LEX_KEYWORD;

        public const int LEX_INTERFACE = 38 | LEX_KEYWORD;

        public const int LEX_INTERNAL = 39 | LEX_KEYWORD;

        public const int LEX_IS = 40 | LEX_KEYWORD;

        public const int LEX_LOCK = 41 | LEX_KEYWORD;

        public const int LEX_LONG = 42 | LEX_KEYWORD;

        public const int LEX_NAMESPACE = 43 | LEX_KEYWORD;

        public const int LEX_NEW = 44 | LEX_KEYWORD;

        public const int LEX_NULL = 45 | LEX_KEYWORD;

        public const int LEX_OBJECT = 46 | LEX_KEYWORD;

        public const int LEX_OPERATOR = 47 | LEX_KEYWORD;

        public const int LEX_OUT = 48 | LEX_KEYWORD;

        public const int LEX_OVERRIDE = 49 | LEX_KEYWORD;

        public const int LEX_PARAMS = 50 | LEX_KEYWORD;

        public const int LEX_PRIVATE = 51 | LEX_KEYWORD;

        public const int LEX_PROTECTED = 52 | LEX_KEYWORD;

        public const int LEX_PUBLIC = 53 | LEX_KEYWORD;

        public const int LEX_READONLY = 54 | LEX_KEYWORD;

        public const int LEX_REF = 55 | LEX_KEYWORD;

        public const int LEX_RETURN = 56 | LEX_KEYWORD;

        public const int LEX_SBYTE = 57 | LEX_KEYWORD;

        public const int LEX_SEALED = 58 | LEX_KEYWORD;

        public const int LEX_SHORT = 59 | LEX_KEYWORD;

        public const int LEX_SIZEOF = 60 | LEX_KEYWORD;

        public const int LEX_STACKALLOC = 61 | LEX_KEYWORD;

        public const int LEX_STATIC = 62 | LEX_KEYWORD;

        public const int LEX_STRING = 63 | LEX_KEYWORD;

        public const int LEX_STRUCT = 64 | LEX_KEYWORD;

        public const int LEX_SWITCH = 65 | LEX_KEYWORD;

        public const int LEX_THIS = 66 | LEX_KEYWORD;

        public const int LEX_THROW = 67 | LEX_KEYWORD;

        public const int LEX_TRUE = 68 | LEX_KEYWORD;

        public const int LEX_TRY = 69 | LEX_KEYWORD;

        public const int LEX_TYPEOF = 70 | LEX_KEYWORD;

        public const int LEX_UINT = 71 | LEX_KEYWORD;

        public const int LEX_ULONG = 72 | LEX_KEYWORD;

        public const int LEX_UNCHECKED = 73 | LEX_KEYWORD;

        public const int LEX_UNSAFE = 74 | LEX_KEYWORD;

        public const int LEX_USHORT = 75 | LEX_KEYWORD;

        public const int LEX_USING = 76 | LEX_KEYWORD;

        public const int LEX_VIRTUAL = 77 | LEX_KEYWORD;

        public const int LEX_VOLATILE = 78 | LEX_KEYWORD;

        public const int LEX_VOID = 79 | LEX_KEYWORD;



        public const int LEX_MULTILINECOMMENT = 80;

        public const int LEX_MULTILINECOMMENT_BEGIN = 81;

        public const int LEX_MULTILINECOMMENT_END = 82;

        public const int LEX_COMMENT = 25;

        public const int LEX_WHITE = 26;

        public const int LEX_ERROR = 27;

        public const int LEX_STRINGLITERAL = 28;

    }



    public interface IErrorHandler

    {

        int ErrNum { get; }



        int WrnNum { get; }



        void AddError(string msg, int lin, int col, int len, int severity);

    }

}





然后使用命令根據(jù)詞法文件生成詞法匹配的C#代碼:

Mplex.exe scanner.lex

最后為了判斷生成的C#代碼是否有用,我寫了一個(gè)小程序調(diào)用詞法匹配函數(shù)測(cè)試了一下:

using System;

using Coder.LexScanner;



public class TestClass

{

    public static void Main()

    {

        string text = Console.ReadLine();

        Scanner scnr = new Scanner();

        int state = 0;

        int result = 0;

        int start, end;



        if ( !string.IsNullOrEmpty(text.Trim()) )

        {

            scnr.SetSource(text, 0);

            result = scnr.GetNext(ref state, out start, out end);

            while (result != (int)Tokens.EOF)

            {

                Console.WriteLine(string.Format(

                    "result: {0}, state: {1}, start: {2}, end: {3}",

                    result, state, start, end));

                result = scnr.GetNext(ref state, out start, out end);

            }

        }

    }

}





實(shí)際上,Visual Studio的代碼高亮顯示功能也是通過(guò)Mplex和Mppg實(shí)現(xiàn),這樣做的好處是,新的編程語(yǔ)言可以以插件的形式加入到Visual Studio里面來(lái),而Visual Studio照樣能夠在編輯新編程語(yǔ)言的程序時(shí),實(shí)現(xiàn)高亮顯示以及其他,例如變量和函數(shù)定義查找、智能提示框之類的功能。


發(fā)表評(píng)論 共有條評(píng)論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 普安县| 临沂市| 庆元县| 临澧县| 天水市| 盘山县| 游戏| 浦县| 沽源县| 庆云县| 蒲城县| 汪清县| 龙泉市| 新龙县| 措美县| 汉沽区| 隆尧县| 汉沽区| 屏山县| 奉节县| 永顺县| 汶上县| 宜昌市| 苏州市| 台北市| 罗定市| 克什克腾旗| 永顺县| 宁南县| 双城市| 靖宇县| 阳江市| 鹿泉市| 黄大仙区| 教育| 甘孜| 美姑县| 霍山县| 昌黎县| 株洲市| 响水县|