打包下载地址【包含v1和v2示例】:
代码片段:
SetBatchLines -1
SetWorkingDir %A_ScriptDir%
dllcall("LoadLibrary", "str", "Jieba.dll")
jieba := New cppJieba()
MsgBox % jieba.cut("我来到北京清华大学") ; => 我/来到/北京/清华大学
MsgBox % jieba.cut("我来到北京清华大学", true) ; => 我/来到/北京/清华/清华大学/华大/大学
MsgBox % jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") ; => 小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/后/在/日本/京都/大学/日本京都大学/深造
MsgBox % jieba.cut("行走的银行") ; => 行走/的/银行
class cppJieba {
__new(DICT_PATH := "./dict/jieba.dict.utf8", HMM_PATH := "./dict/hmm_model.utf8", USER_DICT_PATH := "./dict/user.dict.utf8", IDF_PATH := "./dict/idf.utf8", STOP_WORD_PATH := "./dict/stop_words.utf8") {
this.DICT_PATH := DICT_PATH
this.HMM_PATH := HMM_PATH
this.USER_DICT_PATH := USER_DICT_PATH
this.IDF_PATH := IDF_PATH
this.STOP_WORD_PATH := STOP_WORD_PATH
this.loadFlag := false
}
load() {
cppJieba.strToUtf8(this.DICT_PATH, buf_DICT_PATH)
cppJieba.strToUtf8(this.HMM_PATH, buf_HMM_PATH)
cppJieba.strToUtf8(this.USER_DICT_PATH, buf_USER_DICT_PATH)
cppJieba.strToUtf8(this.IDF_PATH, buf_IDF_PATH)
cppJieba.strToUtf8(this.STOP_WORD_PATH, buf_STOP_WORD_PATH)
this.cppJieba := dllcall("Jieba.dll\init", "ptr", &buf_DICT_PATH, "ptr", &buf_HMM_PATH, "ptr", &buf_USER_DICT_PATH, "ptr", &buf_IDF_PATH, "ptr", &buf_STOP_WORD_PATH)
this.loadFlag := true
}
cut(str, cut_all := false, hmm_flag := true, segFlag := "/") {
lst_str := strsplit(regexreplace(str, "[\pP‘’“”]", ","), ",")
if !this.loadFlag
this.load()
for _, i in lst_str {
if !i
continue
cppJieba.strToUtf8(i, cutStr)
if cut_all
cppJieba.strToUtf8(segFlag, buf_segFlag)
, ret .= strget(dllcall("Jieba.dll\cut_all", "ptr", this.cppJieba, "ptr", &cutStr, "ptr", &buf_segFlag), , "utf-8") "/"
else
cppJieba.strToUtf8(segFlag, buf_segFlag)
, ret .= strget(dllcall("Jieba.dll\cut", "ptr", this.cppJieba, "ptr", &cutStr, "int", hmm_flag, "ptr", &buf_segFlag), , "utf-8") "/"
}
return substr(ret, 1, strlen(ret) - 1)
}
cut_for_search(str, hmm_flag := true, segFlag := "/") {
lst_str := strsplit(regexreplace(str, "[\pP‘’“”]", ","), ",")
if !this.loadFlag
this.load()
for _, i in lst_str {
if !i
continue
cppJieba.strToUtf8(i, cutStr)
cppJieba.strToUtf8(segFlag, buf_segFlag)
ret .= strget(dllcall("Jieba.dll\cut_for_search", "ptr", this.cppJieba, "ptr", &cutStr, "int", hmm_flag, "ptr", &buf_segFlag), , "utf-8") "/"
}
return substr(ret, 1, strlen(ret) - 1)
}
free() {
try dllcall("Jieba.dll\free_jieba", "ptr", this.cppJieba)
}
load_userdict(path) {
cppJieba.strToUtf8(path, buf_path)
dllcall("Jieba.dll\load_userdict", "ptr", this.cppJieba, "ptr", &buf_path)
}
__delete() {
this.free()
}
strToUtf8(str, ByRef buf) {
VarSetCapacity(buf, strput(str, "utf-8"))
return strput(str, &buf, "utf-8")
}
}
声明:站内资源为整理优化好的代码上传分享与学习研究,如果是开源代码基本都会标明出处,方便大家扩展学习路径。请不要恶意搬运,破坏站长辛苦整理维护的劳动成果。本站为爱好者分享站点,所有内容不作为商业行为。如若本站上传内容侵犯了原著者的合法权益,请联系我们进行删除下架。

评论(0)