config = $config; } public function parse($hanzi, $method = false){ if(!$method){ $method = $this->config['parser'] . 'Parse'; } if(is_callable([$this, $method])){ return call_user_func([$this, $method], $hanzi); } else { throw new Exception('Cannot find pinyin parser: ' . $this->config['parser']); } } private function filteJiebaTag($segList){ $ret = []; foreach($segList as $seg){ if($seg['tag'] === 'uv' || $seg['tag'] === 'ud'){ //介词 $index = count($ret) - 1; $ret[$index] .= '的'; } else { $ret[] = $seg['word']; } } return $ret; } /** * 使用php内部方法实现汉字转拼音 */ public function innerParse($hanzi){ $ret = []; if(!self::$libLoaded){ require_once(dirname(__DIR__) . '/vendor/autoload.php'); self::$libLoaded = true; } $originalSentenceList = explode('/', $hanzi); $sentenceList = []; if(isset($this->config['cutWord']) && $this->config['cutWord']){ //需要分词 if(!self::$jiebaLoaded){ ini_set('memory_limit', '1024M'); Jieba::init(['test' => true]); Finalseg::init(); Posseg::init(); Jieba::loadUserDict(dirname(__DIR__) . '/data/userDict.txt'); self::$jiebaLoaded = true; } $length = count($originalSentenceList); for($i = 0; $i < $length; $i ++){ $sentence = $originalSentenceList[$i]; $sentenceList[] = $this->filteJiebaTag(Posseg::cut($sentence)); if($i + 1 < $length){ $sentenceList[] = '/'; } } } else { $length = count($originalSentenceList); for($i = 0; $i < $length; $i ++){ $sentence = $originalSentenceList[$i]; $sentenceList[] = [$sentence]; if($i + 1 < $length){ $sentenceList[] = '/'; } } } //分词后,进行拼音标注 if(!self::$pinyinParser){ self::$pinyinParser = new Pinyin(); } foreach($sentenceList as $segList){ if(is_array($segList)){ $segPinyin = []; foreach($segList as $seg){ $segPinyin[] = self::$pinyinParser->convert($seg, PINYIN_NO_TONE | PINYIN_UMLAUT_V | PINYIN_KEEP_PUNCTUATION | PINYIN_KEEP_ENGLISH | PINYIN_KEEP_NUMBER); } $ret[] = $segPinyin; } else { $ret[] = $segList; } } return $ret; } /** * 使用hook进行汉字转拼音 */ public function hookParse($hanzi){ $pinyinList = null; \Hooks::run('Pinyin2Hanzi', [$hanzi, &$pinyinList]); if(!$pinyinList){ if(isset($this->config['fallback'])){ return $this->parse($hanzi, $this->config['fallback']); } else { throw new Exception('Hook Pinyin2Hanzi never handled.'); } } } private function fallbackOrException($hanzi, $message){ if(isset($this->config['fallback']) && $this->config['fallback'] != false){ return $this->parse($hanzi, $this->config['fallback']); } else { throw new Exception($message); } } public function apiParse($hanzi){ if(!isset($this->config['url'])){ throw new Exception('LatinizeUrl remote api url not set.'); } $factory = new HttpRequestFactory(); $req = $factory->create($this->config['url'], [ 'method' => 'POST', 'postData' => [ 'sentence' => $hanzi ], ], __METHOD__); $status = \Status::wrap($req->execute()); if(!$status->isOK()){ $this->fallbackOrException($hanzi, 'Cannot use LatinizeUrl remote api.'); } $json = \FormatJson::decode($req->getContent(), true); if(isset($json["error"])){ $this->fallbackOrException($hanzi, 'LatinizeUrl remote api error: ' . $json["error"]); } if(!isset($json["status"]) || $json["status"] !== 1){ $this->fallbackOrException($hanzi, 'Cannot use LatinizeUrl remote api.'); } return $json["data"]; } public function pinyin2String($sentenceList){ $strBuilder = []; foreach($sentenceList as $pinyinList){ if(is_array($pinyinList)){ $segStrBuilder = []; foreach($pinyinList as $pinyinGroup){ if(is_array($pinyinGroup)){ $groupStrBuilder = []; foreach($pinyinGroup as $pinyin){ $groupStrBuilder[] = $this->initialCapital($pinyin); } $segStrBuilder[] = implode('', $groupStrBuilder); } else { $segStrBuilder[] = $pinyinGroup; } } $strBuilder[] = implode('-', $segStrBuilder); } else { $strBuilder[] = $pinyinList; } } $str = implode('-', $strBuilder); $str = preg_replace('/-([\x20-\x2f\x3a-\x40\x5b-\x60\x7a-\x7f])-/', '$1', $str); return $str; } public function initialCapital($text){ return strtoupper(substr($text, 0, 1)) . substr($text, 1); } }