国产探花免费观看_亚洲丰满少妇自慰呻吟_97日韩有码在线_资源在线日韩欧美_一区二区精品毛片,辰东完美世界有声小说,欢乐颂第一季,yy玄幻小说排行榜完本

首頁 > 編程 > Java > 正文

java 字符串詞頻統(tǒng)計實例代碼

2019-11-26 16:12:55
字體:
供稿:網(wǎng)友

復(fù)制代碼 代碼如下:

package com.gpdi.action;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class WordsStatistics {

    class Obj {
        int count ;
        Obj(int count){
            this.count = count;
        }
    }

    public List<WordCount> statistics(String word) {
        List<WordCount> rs = new ArrayList<WordCount>();
        Map <String,Obj> map = new HashMap<String,Obj>();

        if(word == null ) {
            return null;
        }
        word = word.toLowerCase();
        word = word.replaceAll("'s", "");
        word = word.replaceAll(",", "");
        word = word.replaceAll("-", "");
        word = word.replaceAll("http://.", "");
        word = word.replaceAll("'", "");
        word = word.replaceAll(":", "");
        word = word.replaceAll("!", "");
        word = word.replaceAll("/n", "");

        String [] wordArray = word.split(" ");
        for(String simpleWord : wordArray) {
            simpleWord = simpleWord.trim(); 
            if (simpleWord != null && !simpleWord.equalsIgnoreCase("")) {
                Obj cnt = map.get(simpleWord);
                if ( cnt!= null ) {
                    cnt.count++;
                }else {
                    map.put(simpleWord, new Obj(1));
                }
            }
        }

        for(String key : map.keySet()) {
            WordCount wd = new WordCount(key,map.get(key).count);
            rs.add(wd);
        }

        Collections.sort(rs, new java.util.Comparator<WordCount>(){
            @Override
            public int compare(WordCount o1, WordCount o2) {
                int result = 0 ;
                if (o1.getCount() > o2.getCount() ) {
                    result = -1;
                }else if (o1.getCount() < o2.getCount()) {
                    result = 1;
                }else {
                    int strRs = o1.getWord().compareToIgnoreCase(o2.getWord());
                    if ( strRs > 0 ) {
                        result = 1;
                    }else {
                        result = -1 ;
                    }
                }
                return result;
            }

        });
        return rs;
    }

     
    public static void main(String args[]) {
        String word = "Pinterest is might be aa ab aa ab marketer's dream  - ths site is largely used to curate products " ;
        WordsStatistics s = new WordsStatistics();
        List<WordCount> rs = s.statistics(word);
        for(WordCount word1 : rs) {
            System.out.println(word1.getWord()+"*"+word1.getCount());
        }
    }

}

發(fā)表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發(fā)表
主站蜘蛛池模板: 蒙自县| 定襄县| 米脂县| 临安市| 舒兰市| 齐河县| 栾城县| 桃源县| 乐亭县| 凤阳县| 尉氏县| 驻马店市| 边坝县| 库伦旗| 通许县| 阿荣旗| 楚雄市| 陇川县| 布尔津县| 交口县| 扎囊县| 论坛| 义马市| 哈密市| 肇源县| 同仁县| 两当县| 江源县| 江口县| 肃北| 伊宁县| 榆树市| 霍城县| 安顺市| 五莲县| 双江| 曲阳县| 都昌县| 夹江县| 张家口市| 临沂市|