getMainConfig(); $wgLatinizeUrlChineseConvertorConfig = $config->get('LatinizeUrlChineseConvertorConfig'); self::$standalone = new self($wgLatinizeUrlChineseConvertorConfig); } return self::$standalone; } public static function onGetConvertor($langCode, &$convertor){ if(in_array($langCode, ['zh-cn', 'zh-hans'])){ $convertor = self::standalone(); } return true; } public function __construct($config){ $this->config = $config; } public function parse($hanzi){ $method = $this->config['parser'] . 'Parse'; if(is_callable([$this, $method])){ return call_user_func([$this, $method], $hanzi); } else { throw new Exception('Cannot find pinyin parser: ' . $this->config['parser']); } } private function filteJiebaTag($segList){ $ret = []; foreach($segList as $seg){ if($seg['tag'] === 'uv' || $seg['tag'] === 'ud'){ //介词 $index = count($ret) - 1; $ret[$index] .= '的'; } else { $ret[] = $seg['word']; } } return $ret; } /** * 使用php内部方法实现汉字转拼音 */ public function innerParse($hanzi){ $ret = []; if(!self::$libLoaded){ require_once(dirname(__DIR__) . '/vendor/autoload.php'); self::$libLoaded = true; } $originalSentenceList = explode('/', $hanzi); $sentenceList = []; if(isset($this->config['cutWord']) && $this->config['cutWord']){ //需要分词 if(!self::$jiebaLoaded){ ini_set('memory_limit', '1024M'); Jieba::init(['test' => true]); Finalseg::init(); Posseg::init(); Jieba::loadUserDict(dirname(__DIR__) . '/data/userDict.txt'); self::$jiebaLoaded = true; } $length = count($originalSentenceList); for($i = 0; $i < $length; $i ++){ $sentence = $originalSentenceList[$i]; $sentenceList[] = $this->filteJiebaTag(Posseg::cut($sentence)); if($i + 1 < $length){ $sentenceList[] = '/'; } } } else { $length = count($originalSentenceList); for($i = 0; $i < $length; $i ++){ $sentence = $originalSentenceList[$i]; $sentenceList[] = [$sentence]; if($i + 1 < $length){ $sentenceList[] = '/'; } } } //分词后,进行拼音标注 if(!self::$pinyinParser){ self::$pinyinParser = new Pinyin(); } foreach($sentenceList as $segList){ if(is_array($segList)){ $segPinyin = []; foreach($segList as $seg){ $segPinyin[] = self::$pinyinParser->convert($seg, PINYIN_NO_TONE | PINYIN_UMLAUT_V | PINYIN_KEEP_PUNCTUATION | PINYIN_KEEP_ENGLISH | PINYIN_KEEP_NUMBER); } $ret[] = $segPinyin; } else { $ret[] = $segList; } } return $ret; } /** * 使用hook进行汉字转拼音 */ public function hookParse($hanzi){ $pinyinList = null; MediaWikiServices::getInstance()->getHookContainer()->run('Pinyin2Hanzi', [$hanzi, &$pinyinList]); if(!$pinyinList){ if(isset($this->config['fallback'])){ return $this->parse($hanzi, $this->config['fallback']); } else { throw new Exception('Hook Pinyin2Hanzi never handled.'); } } } private function fallbackOrException($hanzi, $message){ if(isset($this->config['fallback']) && $this->config['fallback'] != false){ return $this->parse($hanzi, $this->config['fallback']); } else { throw new Exception($message); } } public function apiParse($hanzi){ if(!isset($this->config['url'])){ throw new Exception('LatinizeUrl remote api url not set.'); } $factory = MediaWikiServices::getInstance()->getHttpRequestFactory(); $req = $factory->create($this->config['url'], [ 'method' => 'POST', 'postData' => [ 'sentence' => $hanzi ], ], __METHOD__); $status = \Status::wrap($req->execute()); if(!$status->isOK()){ $this->fallbackOrException($hanzi, 'Cannot use LatinizeUrl remote api.'); } $json = \FormatJson::decode($req->getContent(), true); if(isset($json["error"])){ $this->fallbackOrException($hanzi, 'LatinizeUrl remote api error: ' . $json["error"]); } if(!isset($json["status"]) || $json["status"] !== 1){ $this->fallbackOrException($hanzi, 'Cannot use LatinizeUrl remote api.'); } return $json["data"]; } }