今天看到xuejun的一個取漢字首字符的函數,試用了一下,感覺很好用,不敢獨享,拿出來跟大家分享:
$pbexportheader$uf_getfirstletter.srf
$pbexportcomments$ 返回給定漢字串的首字母串, xuejun , 19990821
global type uf_getfirstletter from function_object
end type
forward prototypes
global function string uf_getfirstletter (string as_inputstring)
end prototypes
global function string uf_getfirstletter (string as_inputstring);//function name : uf_getfirstletter
//used to : 返回給定漢字串的首字母串,即聲母串
//input arguments: as_inputstring - string , 給定的漢字串
//return value : ls_returnstring - string , 給定的漢字串的聲母串,一律為小寫
//notice : 1. 此方法基于漢字的國標漢字庫區位編碼的有效性,不符合此編碼的
// 系統此函數無效!
// 2. 若漢字串含有非漢字字符,如圖形符號或ascii碼,則這些非漢字字符
// 將保持不變.
//sample : ls_rtn = uf_getfirstletter("中華人民共和國")
// ls_rtn will be : zhrmghg
//scripts:
char lc_firstletter[23] //存放國標一級漢字不同讀音的起始區位碼對應讀音
string ls_ch //臨時單元
string ls_secondsectable //存放所有國標二級漢字讀音
string ls_returnstr //返回串
integer li_secposvalue[23] //存放國標一級漢字不同讀音的起始區位碼
integer i , j
integer li_sectorcode //漢字區碼
integer li_positioncode //漢字位碼
integer li_secposcode //漢字區位碼
integer li_offset //二級字庫偏移量
//set initial value
li_secposvalue[]={1601,1637,1833,2078,2274,2302,2433,2594,2787,3106,3212,3472,3635,3722,3730,3858,4027,4086,4390,4558,4684,4925,5249 }
lc_firstletter[] = {"a", "b","c","d","e","f","g","h","j","k","l","m","n","o","p","q","r","s","t","w","x","y","z"}
ls_secondsectable="cjwgnspgcgne[y[btyyzdxykygt[jnnjqmbsgzscyjsyy[pgkbzgy[ywjkgkljywkpjqhy[w[dzlsgmrypywwcckznkyygttnjjnykkzytcjnmcylqlypyqfqrpzslwbtgkjfyxjwzltbncxjjjjtxdttsqzycdxxhgck[phffss[ybgxlppbyll[hlxs[zm[jhsojnghdzqyklgjhsgqzhxqgkezzwyscscjxyeyxadzpmdssmzjzqjyzc[j[wqjbyzpxgznzcpwhkxhqkmwfbpbydtjzzkqhylygxfptyjyyzpszlfchmqshgmxxsxj[[dcsbbqbefsjyhxwgzkpylqbgldlcctnmayddkssngycsgxlyzaybnptsdkdylhgymylcxpy[jndqjwxqxfyyfjlejpzrxccqwqqsbnkymgplbmjrqcflnymyqmsqyrbcjthztqfrxqhxmjjcjlxqgjmshzkbswyemyltxfsydswlycjqxsjnqbsctyhbftdcyzdjwyghqfrxwckqkxebptlpxjzsrmebwhjlbjslyysmdxlclqkxlhxjrzjmfqhxhwywsbhtrxxglhqhfnm[ykldyxzpylgg[mtcfpajjzyljtyanjgbjplqgdzyqyaxbkysecjsznslyzhsxlzcghpxzhznytdsbcjkdlzayfmydlebbgqyzkxgldndnyskjshdlyxbcghxypkdjmmzngmmclgwzszxzjfznmlzzthcsydbdllscddnlkjykjsycjlkwhqasdknhcsganhdaashtcplcpqybsdmpjlpzjoqlcdhjjysprchn[nnlhlyyqyhwzptczgwwmzffjqqqqyxaclbhkdjxdgmmydjxzllsygxgkjrywzwyclzmssjzldbyd[fcxyhlxchyzjq[[qagmnyxpfrkssbjlyxysyglnscmhzwwmnzjjlxxhchsy[[ttxrycyxbyhcsmxjsznpwgpxxtaybgajcxly[dccwzocwkccsbnhcpdyznfcyytyckxkybsqkkytqqxfcwchcykelzqbsqyjqcclmthsywhmktlkjlycxwheqqhtqh[pq[qscfymndmgbwhwlgsllysdlmlxpthmjhwljzyhzjxhtxjlhxrswlwzjcbxmhzqxsdzpmgfcsglsxymjshxpjxwmyqksmyplrthbxftpmhyxlchlhlzylxgsssstclsldclrpbhzhxyyfhb[gdmycnqqwlqhjj[ywjzyejjdhpblqxtqkwhlchqxagtlxljxmsl[htzkzjecxjcjnmfby[sfywybjzgnysdzsqyrsljpclpwxsdwejbjcbcnaytwgmpapclyqpclzxsbnmsggfnzjjbzsfzyndxhplqkzczwalsbccjx[yzgwkypsgxfzfcdkhjgxdlqfsgdslqwzkxtmhsbgzmjzrglyjbpmlmsxlzjqqhzyjczydjwbmyklddpmjegxyhylxhlqyqhkycwcjmyyxnatjhyccxzpcqlbzwwytwbqcmlpmyrjcccxfpznzzljplxxyztzlgdldcklyrzzgqtgjhhgjljaxfgfjzslcfdqzlclgjdjcsnzlljpjqdcclcjxmyzftsxgcgsbrzxjqqctzhgyqtjqqlzxjylylbcyamcstylpdjbyregklzyzhlyszqlznwczcllwjqjjjkdgjzolbbzppglghtgzxyghzmycnqsycyhbhgxkamtxyxnbskyzzgjzlqjdfcjxdygjqjjpmgwgjjjpkqsbgbmmcjssclpqpdxcdyyky[cjddyygywrhjrtgznyqldkljszzgzqzjgdykshpzmtlcpwnjafyzdjcnmwescyglbtzcgmssllyxqsxsbsjsbbsgghfjlypmzjnlyywdqshzxtyywhmzyhywdbxbtlmsyyyfsxjc[dxxlhjhf[sxzqhfzmzcztqcxzxrttdjhnnyzqqmnqdmmg[ydxmjgdhcdyzbffallztdltfxmxqzdngwqdbdczjdxbzgsqqddjcmbkzffxmkdmdsyyszcmljdsynsbrskmkmpcklgdbqtfzswtfgglyplljzhgj[gypzltcsmcnbtjbqfkthbyzgkpbbymtdssxtbnpdkleycjnyddykzddhqhsdzsctarlltkzlgecllkjlqjaqnbdkkghpjtzqksecshalqfmmgjnlyjbbtmlyzxdcjpldlpcqdhzycbzsczbzmsljflkrzjsnfrgjhxpdhyjybzgdlqcsezgxlblgyxtwmabchecmwyjyzlljjyhlg[djlslygkdzpzxjyyzlwcxszfgwyydlyhcljscmbjhblyzlycblydpdqysxqzbytdkyxjy[cnrjmpdjgklcljbctbjddbblblczqrppxjcjlzcshltoljnmdddlngkaqhqhjgykheznmshrp[qqjchgmfprxhjgdychghlyrzqlcyqjnzsqtkqjymszswlcfqqqxyfggyptqwlmcrnfkkfsyylqbmqammmyxctpshcptxxzzsmphpshmclmldqfyqxszyydyjzzhqpdszglstjbckbxyqzjsgpsxqzqzrqtbdkyxzkhhgflbcsmdldgdzdblzyycxnncsybzbfglzzxswmsccmqnjqsbdqsjtxxmbltxzclzshzcxrqjgjylxzfjphymzqqydfqjjlzznzjcdgzygctxmzysctlkphtxhtlbjxjlxscdqxcbbtjfqzfsltjbtkqbxxjjljchczdbzjdczjdcprnpqcjpfczlclzxzdmxmphjsgzgszzqlylwtjpfsyasmcjbtzkycwmytcsjjljcqlwzmalbxyfbpnlsfhtgjwejjxxglljstgshjqlzfkcgnnnszfdeqfhbsaqtgylbxmmygszldydqmjjrgbjtkgdhgkblqkbdmbylxwcxyttybkmrtjzxqjbhlmhmjjzmqasldcyxyqdlqcafywyxqhz"
//get it !
ls_returnstr = ""
for i=1 to len(as_inputstring) //依次處理as_inputstring中每個字符
ls_ch=mid(as_inputstring , i , 1)
if asc(ls_ch)<128 then // 非漢字
ls_returnstr = ls_returnstr+ls_ch // 不變
else // 是漢字
ls_ch = mid(as_inputstring , i , 2) // 取出此漢字
li_sectorcode = asc(left(ls_ch, 1)) - 160 //區碼
li_positioncode = asc(right(ls_ch, 1)) - 160 //位碼
li_secposcode = li_sectorcode*100 + li_positioncode // 區位碼
if li_secposcode>1600 and li_secposcode<5590 then // 第一個字符
for j=23 to 1 step -1 // 找聲母
if li_secposcode>=li_secposvalue[j] then
ls_returnstr = ls_returnstr + lc_firstletter[j]
exit
end if
next
else // 第一個字符
li_offset = (li_sectorcode - 56 ) *94 + li_positioncode - 1 // 計算偏移量
if li_offset>=0 and li_offset<=3007 then //二區漢字
ls_returnstr = ls_returnstr + mid(ls_secondsectable, li_offset , 1) //取出此字聲母
end if
end if
i = i+1 // 指向下一個漢字
end if
next // 處理完畢
//return result
return lower( ls_returnstr ) //返回 as_inputstring 的聲母串
end function