From f96ab136e7b39ef96e6500d26960d1619b1a9768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=87=8F=E5=AD=90=E5=A4=8D=E5=90=88=E6=80=81?= Date: Sat, 23 May 2020 21:08:48 +0800 Subject: [PATCH] Init project --- .gitignore | 4 ++ IsekaiAIReview.alias.php | 0 LICENSE | 2 +- README-zh.md | 30 +++++++++++++ README.md | 29 ++++++++++++ composer.json | 14 ++++++ extension.json | 57 ++++++++++++++++++++++++ i18n/zh-hans.json | 21 +++++++++ includes/AIReviewJob.php | 68 +++++++++++++++++++++++++++++ includes/AliyunAIReview.php | 85 ++++++++++++++++++++++++++++++++++++ includes/Hooks.php | 13 ++++++ includes/LogFormatter.php | 70 +++++++++++++++++++++++++++++ includes/SectionSplitter.php | 78 +++++++++++++++++++++++++++++++++ includes/Utils.php | 56 ++++++++++++++++++++++++ 14 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 IsekaiAIReview.alias.php create mode 100644 README-zh.md create mode 100644 README.md create mode 100644 composer.json create mode 100644 extension.json create mode 100644 i18n/zh-hans.json create mode 100644 includes/AIReviewJob.php create mode 100644 includes/AliyunAIReview.php create mode 100644 includes/Hooks.php create mode 100644 includes/LogFormatter.php create mode 100644 includes/SectionSplitter.php create mode 100644 includes/Utils.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..71702de --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.vs/ +vendor/ +composer.lock +test.php diff --git a/IsekaiAIReview.alias.php b/IsekaiAIReview.alias.php new file mode 100644 index 0000000..e69de29 diff --git a/LICENSE b/LICENSE index a5cd869..a5c938e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 量子复合态 +Copyright (c) 2020 Hyperzlib Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README-zh.md b/README-zh.md new file mode 100644 index 0000000..b72fcd4 --- /dev/null +++ b/README-zh.md @@ -0,0 +1,30 @@ +# Isekai AI Review +[English](README.md) + +这个扩展必须和Moderation扩展一起用。 + +通过AI审核的编辑会自动在Moderation中通过审核。 + +如果你想要增加新的API接口,可以提交issue给我。 + +## 使用方法 +先在阿里云注册:[https://www.aliyun.com/product/lvwang](https://www.aliyun.com/product/lvwang) + +然后安装Moderation扩展:[https://github.com/edwardspec/mediawiki-moderation](https://github.com/edwardspec/mediawiki-moderation) + +安装composer包:(如果你是在release页面下载的,可以忽略这一项) +```php +composer update +``` + +在```LocalSettings.php```中添加相关配置: +```php +wfLoadExtension('IsekaiAIReview'); + +//配置部分 +$wgAIReviewEndpoint = 'cn-shanghai'; +$wgAIReviewAccessKeyId = '阿里云的Access key id'; +$wgAIReviewAccessKeySecret = '阿里云的Access key secret'; +$wgAIReviewBizType = 'isekaiwiki'; +$wgAIReviewRobotUID = 0; //在Moderation里显示的,执行审核操作的机器人账号的UID +``` \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9ea43e5 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# Isekai AI Review +[中文文档](README-zh.md) + +This extension require mediawiki Moderation extension. +Use AI to auto review revs in Moderation. + +If you want to add more AI Review API, you can submit a issue. + +## Useage +First, register at Aliyun: [https://www.aliyun.com/product/lvwang](https://www.aliyun.com/product/lvwang) + +And then, install the Moderation extension: [https://github.com/edwardspec/mediawiki-moderation](https://github.com/edwardspec/mediawiki-moderation) + +Install composer packages (If you download the release, ignore it) +```php +composer update +``` + +Finally, add config in ```LocalSettings.php```: +```php +wfLoadExtension('IsekaiAIReview'); + +//config +$wgAIReviewEndpoint = 'cn-shanghai'; +$wgAIReviewAccessKeyId = '阿里云的Access key id'; +$wgAIReviewAccessKeySecret = '阿里云的Access key secret'; +$wgAIReviewBizType = 'isekaiwiki'; +$wgAIReviewRobotUID = 0; //The user account show in Moderation which approve revs +``` \ No newline at end of file diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..a68717a --- /dev/null +++ b/composer.json @@ -0,0 +1,14 @@ +{ + "name": "hyperzlib/isekai-ai-review", + "type": "mediawiki-extension", + "require": { + "alibabacloud/sdk": "^1.8", + "paquettg/php-html-parser": "^2.2" + }, + "authors": [ + { + "name": "量子复合态", + "email": "hyperzlib@outlook.com" + } + ] +} diff --git a/extension.json b/extension.json new file mode 100644 index 0000000..ae8b4c3 --- /dev/null +++ b/extension.json @@ -0,0 +1,57 @@ +{ + "name": "IsekaiAIReview", + "author": "hyperzlib", + "url": "https://www.isekai.cn", + "descriptionmsg": "isekai-aireview-desc", + "version": "1.0.0", + "license-name": "MIT", + "type": "other", + "requires": { + "MediaWiki": ">= 1.31.0", + "extensions": { + "Moderation": ">= 1.5.0" + } + }, + "ExtensionMessagesFiles": { + "IsekaiAIReviewAlias": "IsekaiAIReview.alias.php" + }, + "MessagesDirs": { + "IsekaiAIReview": [ + "i18n" + ] + }, + "AutoloadClasses": { + "Isekai\\AIReview\\Hooks": "includes/Hooks.php", + "Isekai\\AIReview\\SectionSplitter": "includes/SectionSplitter.php", + "Isekai\\AIReview\\Utils": "includes/Utils.php", + "Isekai\\AIReview\\AliyunAIReview": "includes/AliyunAIReview.php", + "Isekai\\AIReview\\AIReviewJob": "includes/AIReviewJob.php", + "Isekai\\AIReview\\LogFormatter": "includes/LogFormatter.php" + }, + "Hooks": { + "ModerationPending": [ + "Isekai\\AIReview\\Hooks::onModerationPending" + ] + }, + "JobClasses": { + "IsekaiAIReview": "Isekai\\AIReview\\AIReviewJob" + }, + "LogTypes": [ + "aireview" + ], + "LogActionsHandlers": { + "aireview/*": "Isekai\\AIReview\\LogFormatter" + }, + "LogRestrictions": { + "aireview": "moderation" + }, + "config": { + "AIReviewEndpoint": "cn-shanghai", + "AIReviewAccessKeyId": "", + "AIReviewAccessKeySecret": "", + "AIReviewBizType": null, + "AIReviewRobotUID": 1 + }, + "load_composer_autoloader": true, + "manifest_version": 1 +} \ No newline at end of file diff --git a/i18n/zh-hans.json b/i18n/zh-hans.json new file mode 100644 index 0000000..cbdcb22 --- /dev/null +++ b/i18n/zh-hans.json @@ -0,0 +1,21 @@ +{ + "isekai-aireview-desc": "通过AI实现自动审核", + "log-name-aireview": "机器审核记录", + "log-description-aireview": "这里会显示经过机器审核的记录", + "logentry-aireview-approve": "AI已{{GENDER:$2|通过}}$4对页面$3的$5", + "logentry-aireview-reject": "AI已{{GENDER:$2|阻止}}$4对页面$3的$5,原因:$6", + + "isekai-aireview-aliyun-reason-normal": "正常(不知道为什么会被拦截)", + "isekai-aireview-aliyun-reason-spam": "垃圾信息", + "isekai-aireview-aliyun-reason-ad": "广告", + "isekai-aireview-aliyun-reason-politics": "政治敏感", + "isekai-aireview-aliyun-reason-terrorism": "暴恐", + "isekai-aireview-aliyun-reason-abuse": "辱骂", + "isekai-aireview-aliyun-reason-porn": "色情", + "isekai-aireview-aliyun-reason-flood": "灌水", + "isekai-aireview-aliyun-reason-contraband": "违禁", + "isekai-aireview-aliyun-reason-meaningless": "无意义", + "isekai-aireview-aliyun-reason-customized": "违禁词", + "isekai-aireview-aliyun-reason-unknow": "未知: $1", + "isekai-aireview-aliyun-server-error": "服务器返回: $1" +} \ No newline at end of file diff --git a/includes/AIReviewJob.php b/includes/AIReviewJob.php new file mode 100644 index 0000000..65b31d4 --- /dev/null +++ b/includes/AIReviewJob.php @@ -0,0 +1,68 @@ +params['mod_id']; + $modUser = $dbr->selectField('moderation', 'mod_user', ['mod_id' => $modid], __METHOD__); + + $services = MediaWikiServices::getInstance(); + $entryFactory = $services->getService('Moderation.EntryFactory'); + $consequenceManager = $services->getService('Moderation.ConsequenceManager'); + + /** @var ModerationViewableEntry $contentEntry */ + $contentEntry = $entryFactory->findViewableEntry($modid); + $title = $contentEntry->getTitle(); + + $context = RequestContext::getMain(); + $context->setTitle($title); + //获取diff内容 + $diffHtml = $contentEntry->getDiffHTML($context); + //取出增加的文本内容 + $addedText = Utils::getDiffAddedLines($diffHtml); + if(strlen($addedText) > 0){ + //开始进行AI审核 + $reviewer = new AliyunAIReview(); + $result = $reviewer->reviewText($addedText); + if(!$result['pass']){ //审核不通过 + wfDebugLog( + 'isekai-aireview', + 'Reject revision on: ' . $title->getText() . ', reason: ' . Utils::getReadableReason($result['reason']) + ); + Utils::addAIReviewLog('reject', $robotUser, $modUser, $title, $modid, $result['reason']); + return true; + } + } + + //审核通过 + wfDebugLog( + 'isekai-aireview', + 'Approve revision on: ' . $title->getText() + ); + Utils::addAIReviewLog('approve', $robotUser, $modUser, $title, $modid); + $approveEntry = $entryFactory->findApprovableEntry($modid); + $approveEntry->approve($robotUser); + return true; + } +} \ No newline at end of file diff --git a/includes/AliyunAIReview.php b/includes/AliyunAIReview.php new file mode 100644 index 0000000..7a5e954 --- /dev/null +++ b/includes/AliyunAIReview.php @@ -0,0 +1,85 @@ +regionId($wgAIReviewEndpoint) + ->asDefaultClient(); + } + + public function reviewText($text){ + $reqData = $this->buildRequestData($text); + $response = $this->doRequest($reqData); + return $response; + } + + public function buildRequestData($text){ + global $wgAIReviewBizType; + + $reqData = [ + 'scenes' => ['antispam'], + 'tasks' => $this->buildTasks($text), + ]; + + if($wgAIReviewBizType) $reqData['bizType'] = $wgAIReviewBizType; + return $reqData; + } + + public function buildTasks($text){ + $splitter = new SectionSplitter($text, self::MAX_LENGTH); + $chunkList = $splitter->getChunkList(); + $taskList = []; + foreach($chunkList as $chunk){ + $task = [ + 'dataId' => uniqid(), + 'content' => $chunk, + ]; + $taskList[] = $task; + }; + unset($chunkList); + return $taskList; + } + + public function doRequest($requestData){ + $textScan = Green::v20180509()->textScan(); + $response = $textScan->setMethod('POST')->setAcceptFormat('JSON')->setContent(json_encode($requestData))->request(); + + if($response->getReasonPhrase() === 'OK'){ + return $this->parseResponse($response->toArray()); + } else { + return ['pass' => false, 'reason' => wfMessage('isekai-aireview-aliyun-server-error', $response->getStatusCode())->escaped()]; + } + } + + public function parseResponse($response){ + if($response['code'] !== 200) + return ['pass' => false, 'reason' => wfMessage('isekai-aireview-aliyun-server-error', $response['code'])->escaped()]; + + $pass = true; + $reasons = []; + foreach($response['data'] as $task){ + if(is_array($task['results'])){ + foreach($task['results'] as $result){ + if($result['suggestion'] !== 'pass'){ + $pass = false; + foreach($result['details'] as $detail){ + $reason = $detail['label']; + if(!in_array($reason, $reasons)){ + $reasons[] = $reason; + } + } + } + } + } + } + return ['pass' => $pass, 'reason' => $reasons]; + } +} \ No newline at end of file diff --git a/includes/Hooks.php b/includes/Hooks.php new file mode 100644 index 0000000..e23f1c4 --- /dev/null +++ b/includes/Hooks.php @@ -0,0 +1,13 @@ + $modid]); + JobQueueGroup::singleton()->push($job); + } +} \ No newline at end of file diff --git a/includes/LogFormatter.php b/includes/LogFormatter.php new file mode 100644 index 0000000..bc386e3 --- /dev/null +++ b/includes/LogFormatter.php @@ -0,0 +1,70 @@ +entry->getSubtype(); + $entryParams = $this->entry->getParameters(); + $linkRenderer = $this->getLinkRenderer(); + + switch($type){ + case 'approve': + $modId = $entryParams['modid']; + + $user = User::newFromId($entryParams['moduser']); + $userLink = Linker::userLink( $user->getId(), $user->getName() ); + $params[3] = Message::rawParam( $userLink ); + + $link = $linkRenderer->makeKnownLink( + SpecialPage::getTitleFor( 'Moderation' ), + $this->msg( 'moderation-log-change' )->params( $modId )->text(), + [ 'title' => $this->msg( 'tooltip-moderation-rejected-change' )->plain() ], + [ 'modaction' => 'show', 'modid' => $modId ] + ); + $params[4] = Message::rawParam( $link ); + + break; + case 'reject': + $modId = $entryParams['modid']; + + $user = User::newFromId($entryParams['moduser']); + $userLink = Linker::userLink( $user->getId(), $user->getName() ); + $params[3] = Message::rawParam( $userLink ); + + $link = $linkRenderer->makeKnownLink( + SpecialPage::getTitleFor( 'Moderation' ), + $this->msg( 'moderation-log-change' )->params( $modId )->text(), + [ 'title' => $this->msg( 'tooltip-moderation-rejected-change' )->plain() ], + [ 'modaction' => 'show', 'modid' => $modId ] + ); + $params[4] = Message::rawParam( $link ); + + $params[5] = Utils::getReadableReason($entryParams['reason']); + break; + } + return $params; + } + + public function getPreloadTitles() { + $type = $this->entry->getSubtype(); + $params = $this->entry->getParameters(); + + $titles = []; + + if ( $params['moduser'] ) { # Not anonymous + $user = User::newFromId($params['moduser']); + $titles[] = Title::makeTitle( NS_USER, $user->getName() ); + } + + return $titles; + } +} \ No newline at end of file diff --git a/includes/SectionSplitter.php b/includes/SectionSplitter.php new file mode 100644 index 0000000..a79b53f --- /dev/null +++ b/includes/SectionSplitter.php @@ -0,0 +1,78 @@ +maxLength = $maxLength; + $this->splitLine($text); + } + + /* 将文本推入chunk列表 */ + public function push($chunk){ + $chunkLength = mb_strlen($chunk, 'UTF-8'); + if($this->bufferLength + $chunkLength > $this->maxLength){ //满一万字 + $this->chunkListSeek ++; + $this->chunkList[$this->chunkListSeek] = $chunk; + $this->bufferLength = $chunkLength; + } else { //没满一万字,接着塞 + $this->chunkList[$this->chunkListSeek] .= $chunk; + $this->bufferLength += $chunkLength; + } + } + + /** + * 按照行来拆分 + */ + public function splitLine($text){ + $text = str_replace("\r\n", "\n", $text); + $lines = explode("\n", $text); + foreach($lines as $line){ + if(empty($line)) continue; + + $line .= "\n"; + if(mb_strlen($line, 'UTF-8') > $this->maxLength){ //见鬼,这个人怎么能写一万字不换行 + $this->splitSentence($line); + } else { + $this->push($line); + } + } + } + + /** + * 按照句子来拆分 + */ + public function splitSentence($text){ //我就不信一句话能一万字 + $sentences = explode("\0", preg_replace('/(。|\\.)/', "$1\0", $text)); + foreach($sentences as $sentence){ + if(mb_strlen($sentence, 'UTF-8') > $this->maxLength){ //一句话能说一万字吗? + $this->forceSplit($sentence); + } else { + $this->push($sentence); + } + } + } + + /** + * 强制拆分 + */ + public function forceSplit($text){ + $len = mb_strlen($text, 'UTF-8'); + $times = ceil($len / $this->maxLength); + for($i = 0; $i < $times; $i ++){ + $startPos = $i * $this->maxLength; + $sentenceLen = min($len - 1 - $i * $startPos, $this->maxLength); + $sentence = substr($text, $startPos, $sentenceLen); + + $this->push($sentence); + } + } + + public function getChunkList(){ + return $this->chunkList; + } +} \ No newline at end of file diff --git a/includes/Utils.php b/includes/Utils.php new file mode 100644 index 0000000..cd05ce1 --- /dev/null +++ b/includes/Utils.php @@ -0,0 +1,56 @@ +load($diffHtml); + $lines = []; + + if($addedLineDomList = $dom->find('.diff-addedline')){ + /** @var \PHPHtmlParser\Dom\HtmlNode $addedLineDom */ + foreach($addedLineDomList as $addedLineDom){ + $lines[] = strip_tags($addedLineDom->innerHtml); + } + } + + return trim(implode("\n", $lines)); + } + + public static function getReadableReason($reasons){ + $allowedReasons = ['spam', 'ad', 'politics', 'terrorism', 'abuse', 'porn', 'flood', 'contraband', 'meaningless', 'customized', 'normal']; + + if(is_string($reasons)) return $reasons; + + $readableReasons = []; + foreach($reasons as $reason){ + if(in_array($reason, $allowedReasons)){ + $readableReasons[] = wfMessage('isekai-aireview-aliyun-reason-' . $reason)->escaped(); + } else { + $readableReasons[] = wfMessage('isekai-aireview-aliyun-reason-unknow', $reason)->escaped(); + } + } + + return implode(', ', $readableReasons); + } + + public static function addAIReviewLog($event, $robotUser, $modUser, $title, $modid, $reason = null){ + $entry = new ManualLogEntry('aireview', $event); + $entry->setPerformer($robotUser); + $entry->setTarget($title); + + $param = [ + 'modid' => $modid, + 'moduser' => $modUser, + ]; + if($reason){ + $param['reason'] = $reason; + } + + $entry->setParameters($param); + $entry->insert(); + } +} \ No newline at end of file