国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > C# > 正文

C#實(shí)現(xiàn)將HTML轉(zhuǎn)換成純文本的方法

2020-01-24 01:36:50
字體:
供稿:網(wǎng)友

本文實(shí)例講述了C#實(shí)現(xiàn)將HTML轉(zhuǎn)換成純文本的方法。分享給大家供大家參考。具體如下:

使用方法:

復(fù)制代碼 代碼如下:
HtmlToText convert = new HtmlToText();
textBox2.Text = convert.Convert(textBox1.Text);

C#代碼如下:

/// <summary>/// Converts HTML to plain text./// </summary>class HtmlToText{  // Static data tables  protected static Dictionary<string, string> _tags;  protected static HashSet<string> _ignoreTags;  // Instance variables  protected TextBuilder _text;  protected string _html;  protected int _pos;  // Static constructor (one time only)  static HtmlToText()  {    _tags = new Dictionary<string, string>();    _tags.Add("address", "/n");    _tags.Add("blockquote", "/n");    _tags.Add("div", "/n");    _tags.Add("dl", "/n");    _tags.Add("fieldset", "/n");    _tags.Add("form", "/n");    _tags.Add("h1", "/n");    _tags.Add("/h1", "/n");    _tags.Add("h2", "/n");    _tags.Add("/h2", "/n");    _tags.Add("h3", "/n");    _tags.Add("/h3", "/n");    _tags.Add("h4", "/n");    _tags.Add("/h4", "/n");    _tags.Add("h5", "/n");    _tags.Add("/h5", "/n");    _tags.Add("h6", "/n");    _tags.Add("/h6", "/n");    _tags.Add("p", "/n");    _tags.Add("/p", "/n");    _tags.Add("table", "/n");    _tags.Add("/table", "/n");    _tags.Add("ul", "/n");    _tags.Add("/ul", "/n");    _tags.Add("ol", "/n");    _tags.Add("/ol", "/n");    _tags.Add("/li", "/n");    _tags.Add("br", "/n");    _tags.Add("/td", "/t");    _tags.Add("/tr", "/n");    _tags.Add("/pre", "/n");    _ignoreTags = new HashSet<string>();    _ignoreTags.Add("script");    _ignoreTags.Add("noscript");    _ignoreTags.Add("style");    _ignoreTags.Add("object");  }  /// <summary>  /// Converts the given HTML to plain text and returns the result.  /// </summary>  /// <param name="html">HTML to be converted</param>  /// <returns>Resulting plain text</returns>  public string Convert(string html)  {    // Initialize state variables    _text = new TextBuilder();    _html = html;    _pos = 0;    // Process input    while (!EndOfText)    {      if (Peek() == '<')      {        // HTML tag        bool selfClosing;        string tag = ParseTag(out selfClosing);        // Handle special tag cases        if (tag == "body")        {          // Discard content before <body>          _text.Clear();        }        else if (tag == "/body")        {          // Discard content after </body>          _pos = _html.Length;        }        else if (tag == "pre")        {          // Enter preformatted mode          _text.Preformatted = true;          EatWhitespaceToNextLine();        }        else if (tag == "/pre")        {          // Exit preformatted mode          _text.Preformatted = false;        }        string value;        if (_tags.TryGetValue(tag, out value))          _text.Write(value);        if (_ignoreTags.Contains(tag))          EatInnerContent(tag);      }      else if (Char.IsWhiteSpace(Peek()))      {        // Whitespace (treat all as space)        _text.Write(_text.Preformatted ? Peek() : ' ');        MoveAhead();      }      else      {        // Other text        _text.Write(Peek());        MoveAhead();      }    }    // Return result    return HttpUtility.HtmlDecode(_text.ToString());  }  // Eats all characters that are part of the current tag  // and returns information about that tag  protected string ParseTag(out bool selfClosing)  {    string tag = String.Empty;    selfClosing = false;    if (Peek() == '<')    {      MoveAhead();      // Parse tag name      EatWhitespace();      int start = _pos;      if (Peek() == '/')        MoveAhead();      while (!EndOfText && !Char.IsWhiteSpace(Peek()) &&        Peek() != '/' && Peek() != '>')        MoveAhead();      tag = _html.Substring(start, _pos - start).ToLower();      // Parse rest of tag      while (!EndOfText && Peek() != '>')      {        if (Peek() == '"' || Peek() == '/'')          EatQuotedValue();        else        {          if (Peek() == '/')            selfClosing = true;          MoveAhead();        }      }      MoveAhead();    }    return tag;  }  // Consumes inner content from the current tag  protected void EatInnerContent(string tag)  {    string endTag = "/" + tag;    while (!EndOfText)    {      if (Peek() == '<')      {        // Consume a tag        bool selfClosing;        if (ParseTag(out selfClosing) == endTag)          return;        // Use recursion to consume nested tags        if (!selfClosing && !tag.StartsWith("/"))          EatInnerContent(tag);      }      else MoveAhead();    }  }  // Returns true if the current position is at the end of  // the string  protected bool EndOfText  {    get { return (_pos >= _html.Length); }  }  // Safely returns the character at the current position  protected char Peek()  {    return (_pos < _html.Length) ? _html[_pos] : (char)0;  }  // Safely advances to current position to the next character  protected void MoveAhead()  {    _pos = Math.Min(_pos + 1, _html.Length);  }  // Moves the current position to the next non-whitespace  // character.  protected void EatWhitespace()  {    while (Char.IsWhiteSpace(Peek()))      MoveAhead();  }  // Moves the current position to the next non-whitespace  // character or the start of the next line, whichever  // comes first  protected void EatWhitespaceToNextLine()  {    while (Char.IsWhiteSpace(Peek()))    {      char c = Peek();      MoveAhead();      if (c == '/n')        break;    }  }  // Moves the current position past a quoted value  protected void EatQuotedValue()  {    char c = Peek();    if (c == '"' || c == '/'')    {      // Opening quote      MoveAhead();      // Find end of value      int start = _pos;      _pos = _html.IndexOfAny(new char[] { c, '/r', '/n' }, _pos);      if (_pos < 0)        _pos = _html.Length;      else        MoveAhead();  // Closing quote    }  }  /// <summary>  /// A StringBuilder class that helps eliminate excess whitespace.  /// </summary>  protected class TextBuilder  {    private StringBuilder _text;    private StringBuilder _currLine;    private int _emptyLines;    private bool _preformatted;    // Construction    public TextBuilder()    {      _text = new StringBuilder();      _currLine = new StringBuilder();      _emptyLines = 0;      _preformatted = false;    }    /// <summary>    /// Normally, extra whitespace characters are discarded.    /// If this property is set to true, they are passed    /// through unchanged.    /// </summary>    public bool Preformatted    {      get      {        return _preformatted;      }      set      {        if (value)        {          // Clear line buffer if changing to          // preformatted mode          if (_currLine.Length > 0)            FlushCurrLine();          _emptyLines = 0;        }        _preformatted = value;      }    }    /// <summary>    /// Clears all current text.    /// </summary>    public void Clear()    {      _text.Length = 0;      _currLine.Length = 0;      _emptyLines = 0;    }    /// <summary>    /// Writes the given string to the output buffer.    /// </summary>    /// <param name="s"></param>    public void Write(string s)    {      foreach (char c in s)        Write(c);    }    /// <summary>    /// Writes the given character to the output buffer.    /// </summary>    /// <param name="c">Character to write</param>    public void Write(char c)    {      if (_preformatted)      {        // Write preformatted character        _text.Append(c);      }      else      {        if (c == '/r')        {          // Ignore carriage returns. We'll process          // '/n' if it comes next        }        else if (c == '/n')        {          // Flush current line          FlushCurrLine();        }        else if (Char.IsWhiteSpace(c))        {          // Write single space character          int len = _currLine.Length;          if (len == 0 || !Char.IsWhiteSpace(_currLine[len - 1]))            _currLine.Append(' ');        }        else        {          // Add character to current line          _currLine.Append(c);        }      }    }    // Appends the current line to output buffer    protected void FlushCurrLine()    {      // Get current line      string line = _currLine.ToString().Trim();      // Determine if line contains non-space characters      string tmp = line.Replace(" ", String.Empty);      if (tmp.Length == 0)      {        // An empty line        _emptyLines++;        if (_emptyLines < 2 && _text.Length > 0)          _text.AppendLine(line);      }      else      {        // A non-empty line        _emptyLines = 0;        _text.AppendLine(line);      }      // Reset current line      _currLine.Length = 0;    }    /// <summary>    /// Returns the current output as a string.    /// </summary>    public override string ToString()    {      if (_currLine.Length > 0)        FlushCurrLine();      return _text.ToString();    }  }}

希望本文所述對大家的C#程序設(shè)計(jì)有所幫助。

發(fā)表評(píng)論 共有條評(píng)論
用戶名: 密碼:
驗(yàn)證碼: 匿名發(fā)表
主站蜘蛛池模板: 当雄县| 奉贤区| 惠安县| 西青区| 河北区| 额尔古纳市| 西平县| 泰来县| 民权县| 桐梓县| 峨眉山市| 宁河县| 高陵县| 定南县| 新和县| 资溪县| 台东县| 崇文区| 白玉县| 高淳县| 南川市| 珲春市| 二手房| 田林县| 双江| 霍林郭勒市| 噶尔县| 孟连| 新闻| 张家界市| 教育| 湘乡市| 安乡县| 西平县| 枣强县| 株洲市| 乌鲁木齐市| 阜新市| 鄂托克前旗| 博爱县| 宜良县|