[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: [websites/wiki-kde-org/develop] extensions/Translate/ttmserver: add missing files, stupid gitignore
From: Ingo Malchow <imalchow () kde ! org>
Date: 2012-07-24 20:03:17
Message-ID: 20120724200317.905E1A6094 () git ! kde ! org
[Download RAW message or body]
Git commit f719f9c77765d608935ee8d2d768b15e5d29d836 by Ingo Malchow.
Committed on 24/07/2012 at 21:59.
Pushed by imalchow into branch 'develop'.
add missing files, stupid gitignore
A +283 -0 extensions/Translate/ttmserver/DatabaseTTMServer.php
A +39 -0 extensions/Translate/ttmserver/FakeTTMServer.php
A +94 -0 extensions/Translate/ttmserver/Interfaces.php
A +32 -0 extensions/Translate/ttmserver/RemoteTTMServer.php
A +53 -0 extensions/Translate/ttmserver/SharedDatabaseTTMServer.php
A +227 -0 extensions/Translate/ttmserver/SolrTTMServer.php
A +112 -0 extensions/Translate/ttmserver/TTMServer.php
A +60 -0 extensions/Translate/ttmserver/schema.xml
http://commits.kde.org/websites/wiki-kde-org/f719f9c77765d608935ee8d2d768b15e5d29d836
diff --git a/extensions/Translate/ttmserver/DatabaseTTMServer.php \
b/extensions/Translate/ttmserver/DatabaseTTMServer.php new file mode 100644
index 0000000..5455a27
--- /dev/null
+++ b/extensions/Translate/ttmserver/DatabaseTTMServer.php
@@ -0,0 +1,283 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Mysql based backend.
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+class DatabaseTTMServer extends TTMServer implements WritableTTMServer, \
ReadableTTMServer { + protected $sids;
+
+ /**
+ * @param $mode int DB_SLAVE|DB_MASTER
+ * @return DatabaseBase
+ */
+ protected function getDB( $mode = DB_SLAVE ) {
+ return wfGetDB( $mode, 'ttmserver', $this->config['database'] );
+ }
+
+ public function update( MessageHandle $handle, $targetText ) {
+ if ( !$handle->isValid() || $handle->getCode() === '' ) {
+ return false;
+ }
+
+ $mkey = $handle->getKey();
+ $group = $handle->getGroup();
+ $targetLanguage = $handle->getCode();
+ $sourceLanguage = $group->getSourceLanguage();
+
+ // Skip definitions to not slow down mass imports etc.
+ // These will be added when the first translation is made
+ if ( $targetLanguage === $sourceLanguage ) {
+ return false;
+ }
+
+ $definition = $group->getMessage( $mkey, $sourceLanguage );
+ if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
+ return false;
+ }
+
+ $context = Title::makeTitle( $handle->getTitle()->getNamespace(), $mkey \
); + $dbw = $this->getDB( DB_MASTER );
+ /* Check that the definition exists and fetch the sid. If not, add
+ * the definition and retrieve the sid. If the definition changes,
+ * we will create a new entry - otherwise we could at some point
+ * get suggestions which do not match the original definition any
+ * longer. The old translations are still kept until purged by
+ * rerunning the bootstrap script. */
+ $conds = array(
+ 'tms_context' => $context->getPrefixedText(),
+ 'tms_text' => $definition,
+ );
+
+ $extra = $this->getExtraConditions();
+ $conds = array_merge( $conds, $extra );
+
+ $sid = $dbw->selectField( 'translate_tms', 'tms_sid', $conds, __METHOD__ \
); + if ( $sid === false ) {
+ $sid = $this->insertSource( $context, $sourceLanguage, $definition );
+ }
+
+ // Delete old translations for this message if any. Could also use \
replace + $deleteConds = array(
+ 'tmt_sid' => $sid,
+ 'tmt_lang' => $targetLanguage,
+ );
+ $dbw->delete( 'translate_tmt', $deleteConds, __METHOD__ );
+
+ // Insert the new translation
+ $row = $deleteConds + array(
+ 'tmt_text' => $targetText,
+ );
+
+ $dbw->insert( 'translate_tmt', $row, __METHOD__ );
+
+ return true;
+ }
+
+ /// For subclasses
+ protected function getExtraConditions() {
+ return array();
+ }
+
+ protected function insertSource( Title $context, $sourceLanguage, $text ) \
{ + wfProfileIn( __METHOD__ );
+ $row = array(
+ 'tms_lang' => $sourceLanguage,
+ 'tms_len' => mb_strlen( $text ),
+ 'tms_text' => $text,
+ 'tms_context' => $context->getPrefixedText(),
+ );
+
+ $extra = $this->getExtraConditions();
+ $row = array_merge( $row, $extra );
+
+ $dbw = $this->getDB( DB_MASTER );
+ $dbw->insert( 'translate_tms', $row, __METHOD__ );
+ $sid = $dbw->insertId();
+
+ $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
+ if ( count( $fulltext ) ) {
+ $row = array(
+ 'tmf_sid' => $sid,
+ 'tmf_text' => implode( ' ', $fulltext ),
+ );
+ $dbw->insert( 'translate_tmf', $row, __METHOD__ );
+ }
+
+ wfProfileOut( __METHOD__ );
+ return $sid;
+ }
+
+ /**
+ * Tokenizes the text for fulltext search.
+ * Tries to find the most useful tokens.
+ */
+ protected function filterForFulltext( $language, $input ) {
+ wfProfileIn( __METHOD__ );
+ $lang = Language::factory( $language );
+
+ $text = preg_replace( '/[^[:alnum:]]/u', ' ', $input );
+ $text = $lang->segmentByWord( $text );
+ $text = $lang->lc( $text );
+ $segments = preg_split( '/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY );
+ if ( count( $segments ) < 4 ) {
+ wfProfileOut( __METHOD__ );
+ return array();
+ }
+
+ foreach ( $segments as $i => $segment ) {
+ // Yes strlen
+ $len = strlen( $segment );
+ if ( $len < 4 || $len > 15 ) {
+ unset( $segments[$i] );
+ }
+ }
+
+ $segments = array_unique( $segments );
+ $segments = array_slice( $segments, 0, 10 );
+ wfProfileOut( __METHOD__ );
+ return $segments;
+ }
+
+ public function beginBootstrap() {
+ $dbw = $this->getDB( DB_MASTER );
+ $dbw->delete( 'translate_tms', '*', __METHOD__ );
+ $dbw->delete( 'translate_tmt', '*', __METHOD__ );
+ $dbw->delete( 'translate_tmf', '*', __METHOD__ );
+ $table = $dbw->tableName( 'translate_tmf' );
+ $dbw->ignoreErrors( true );
+ $dbw->query( "DROP INDEX tmf_text ON $table" );
+ $dbw->ignoreErrors( false );
+ }
+
+ public function beginBatch() {
+ $this->sids = array();
+ }
+
+ public function batchInsertDefinitions( array $batch ) {
+ foreach ( $batch as $key => $item ) {
+ list( $title, $language, $text ) = $item;
+ $handle = new MessageHandle( $title );
+ $context = Title::makeTitle( $handle->getTitle()->getNamespace(), \
$handle->getKey() ); + $this->sids[$key] = $this->insertSource( $context, \
$language, $text ); + }
+ wfWaitForSlaves( 10 );
+ }
+
+ public function batchInsertTranslations( array $batch ) {
+ $rows = array();
+ foreach ( $batch as $key => $data ) {
+ list( $title, $language, $text ) = $data;
+ $rows[] = array(
+ 'tmt_sid' => $this->sids[$key],
+ 'tmt_lang' => $language,
+ 'tmt_text' => $text,
+ );
+ }
+
+ $dbw = $this->getDB( DB_MASTER );
+ $dbw->insert( 'translate_tmt', $rows, __METHOD__ );
+ wfWaitForSlaves( 10 );
+ }
+
+ public function endBatch() {}
+
+ public function endBootstrap() {
+ $dbw = $this->getDB( DB_MASTER );
+ $table = $dbw->tableName( 'translate_tmf' );
+ $dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)" );
+ }
+
+ /* Reading interface */
+
+ public function isLocalSuggestion( array $suggestion ) {
+ return true;
+ }
+
+ public function expandLocation( array $suggestion ) {
+ $title = Title::newFromText( $suggestion['location'] );
+ return $title->getCanonicalUrl();
+ }
+
+ public function query( $sourceLanguage, $targetLanguage, $text ) {
+ wfProfileIn( __METHOD__ );
+ // Calculate the bounds of the string length which are able
+ // to satisfy the cutoff percentage in edit distance.
+ $len = mb_strlen( $text );
+ $min = ceil( max( $len * $this->config['cutoff'], 2 ) );
+ $max = floor( $len / $this->config['cutoff'] );
+
+ // We could use fulltext index to narrow the results further
+ $dbr = $this->getDB( DB_SLAVE );
+ $tables = array( 'translate_tmt', 'translate_tms' );
+ $fields = array( 'tms_context', 'tms_text', 'tmt_lang', 'tmt_text' );
+
+ $conds = array(
+ 'tms_lang' => $sourceLanguage,
+ 'tmt_lang' => $targetLanguage,
+ "tms_len BETWEEN $min AND $max",
+ 'tms_sid = tmt_sid',
+ );
+
+ $extra = $this->getExtraConditions();
+ $fields = array_merge( $fields, array_keys( $extra ) );
+ $conds = array_merge( $conds, $extra );
+
+ $fulltext = $this->filterForFulltext( $sourceLanguage, $text );
+ if ( $fulltext ) {
+ $tables[] = 'translate_tmf';
+ $list = implode( ' ', $fulltext );
+ $conds[] = 'tmf_sid = tmt_sid';
+ $conds[] = "MATCH(tmf_text) AGAINST( '$list' )";
+ }
+
+ $res = $dbr->select( $tables, $fields, $conds, __METHOD__ );
+ wfProfileOut( __METHOD__ );
+ return $this->processQueryResults( $res, $text, $sourceLanguage, \
$targetLanguage ); + }
+
+ protected function processQueryResults( $res, $text, $sourceLanguage, \
$targetLanguage ) { + wfProfileIn( __METHOD__ );
+ $lenA = mb_strlen( $text );
+ $results = array();
+ foreach ( $res as $row ) {
+ $a = $text;
+ $b = $row->tms_text;
+ $lenB = mb_strlen( $b );
+ $len = min( $lenA, $lenB );
+ if ( $len > 1000 ) {
+ // two strings of length 1500 ~ 10s
+ // two strings of length 2250 ~ 30s
+ $dist = $len;
+ } else {
+ $dist = self::levenshtein( $a, $b, $lenA, $lenB );
+ }
+ $quality = 1 - ( $dist * 0.9 / $len );
+
+ if ( $quality >= $this->config['cutoff'] ) {
+ $results[] = array(
+ 'source' => $row->tms_text,
+ 'target' => $row->tmt_text,
+ 'context' => $row->tms_context,
+ 'location' => $row->tms_context . '/' . $targetLanguage,
+ 'quality' => $quality,
+ 'wiki' => isset( $row->tms_wiki ) ? $row->tms_wiki : wfWikiId(),
+ );
+ }
+ }
+ $results = TTMServer::sortSuggestions( $results );
+ wfProfileOut( __METHOD__ );
+ return $results;
+ }
+
+}
diff --git a/extensions/Translate/ttmserver/FakeTTMServer.php \
b/extensions/Translate/ttmserver/FakeTTMServer.php new file mode 100644
index 0000000..a9cf1ea
--- /dev/null
+++ b/extensions/Translate/ttmserver/FakeTTMServer.php
@@ -0,0 +1,39 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * NO-OP version of TTMServer when it is disabled.
+ * Keeps other code simpler when they can just do
+ * TTMServer::primary()->update( ... );
+ * @since 2012-01-28
+ * @ingroup TTMServer
+ */
+class FakeTTMServer implements ReadableTTMServer, WritableTTMServer {
+ public function query( $sourceLanguage, $targetLanguage, $text ) {
+ return array();
+ }
+
+ public function isLocalSuggestion( array $suggestion ) {
+ false;
+ }
+
+ public function expandLocation( array $suggestion ) {
+ return '';
+ }
+
+ public function update( MessageHandle $handle, $targetText ) {}
+ public function beginBootstrap() {}
+ public function beginBatch() {}
+ public function batchInsertDefinitions( array $batch ) {}
+ public function batchInsertTranslations( array $batch ) {}
+ public function endBatch() {}
+ public function endBootstrap() {}
+}
diff --git a/extensions/Translate/ttmserver/Interfaces.php \
b/extensions/Translate/ttmserver/Interfaces.php new file mode 100644
index 0000000..f99ad51
--- /dev/null
+++ b/extensions/Translate/ttmserver/Interfaces.php
@@ -0,0 +1,94 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Interface for TTMServer that can be queried (=all of them).
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+interface ReadableTTMServer {
+ /**
+ * Fetches all relevant suggestions for given text.
+ *
+ * @param $sourceLanguage String: language code for the provide text
+ * @param $targetLanguage String: language code for the suggestions
+ * @param $text String: the text for which to search suggestions
+ * @return List: unordered suggestions, which each has fields:
+ * - source: String: the original text of the suggestion
+ * - target: String: the suggestion
+ * - context: String: title of the page where the suggestion comes from
+ * - quality: Float: the quality of suggestion, 1 is perfect match
+ */
+ public function query( $sourceLanguage, $targetLanguage, $text );
+
+ /**
+ * Determines if the suggestion returned by this TTMServer comes
+ * from this wiki or any other wiki.
+ * @return Bool
+ */
+ public function isLocalSuggestion( array $suggestion );
+
+ /**
+ * Given suggestion returned by this TTMServer, constructs fully
+ * qualified URL to the location of the translation.
+ * @return String URL
+ */
+ public function expandLocation( array $suggestion );
+
+}
+
+/**
+ * Interface for TTMServer that can be updated.
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+interface WritableTTMServer {
+ /**
+ * Shovels the new translation into translation memory.
+ * Use this for single updates (=after message edit).
+ *
+ * @param $handle MessageHandle
+ * @param $targetText String
+ */
+ public function update( MessageHandle $handle, $targetText );
+
+ /**
+ * Called when starting to fill the translation memory.
+ * Set up necessary variables and remove old content
+ * from the server.
+ */
+ public function beginBootstrap();
+
+ /**
+ * Called before every batch (MessageGroup).
+ */
+ public function beginBatch();
+
+ /**
+ * Called multiple times per batch if necessary.
+ */
+ public function batchInsertDefinitions( array $batch );
+
+ /**
+ * Called multiple times per batch if necessary.
+ */
+ public function batchInsertTranslations( array $batch );
+
+ /**
+ * Called before every batch (MessageGroup).
+ */
+ public function endBatch();
+
+ /**
+ * Do any cleanup, optimizing etc.
+ */
+ public function endBootstrap();
+}
diff --git a/extensions/Translate/ttmserver/RemoteTTMServer.php \
b/extensions/Translate/ttmserver/RemoteTTMServer.php new file mode 100644
index 0000000..fa1b907
--- /dev/null
+++ b/extensions/Translate/ttmserver/RemoteTTMServer.php
@@ -0,0 +1,32 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Class for handling remote TTMServers over MediaWiki API.
+ * Currently querying is done in TranslationHelpers, and
+ * this class only handles location retrieval.
+ * @since 2012-06-27
+ * @ingroup TTMServer
+ */
+class RemoteTTMServer extends TTMServer implements ReadableTTMServer {
+ public function query( $sourceLanguage, $targetLanguage, $text ) {
+ // TODO: implement some day perhaps?
+ return array();
+ }
+
+ public function isLocalSuggestion( array $suggestion ) {
+ return false;
+ }
+
+ public function expandLocation( array $suggestion ) {
+ return $suggestion['location'];
+ }
+}
diff --git a/extensions/Translate/ttmserver/SharedDatabaseTTMServer.php \
b/extensions/Translate/ttmserver/SharedDatabaseTTMServer.php new file mode \
100644 index 0000000..be90916
--- /dev/null
+++ b/extensions/Translate/ttmserver/SharedDatabaseTTMServer.php
@@ -0,0 +1,53 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Mysql based backend shared with multiple wikies.
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+class SharedDatabaseTTMServer extends DatabaseTTMServer {
+ protected function getExtraConditions() {
+ return array( 'tms_wiki' => wfWikiId() );
+ }
+
+ public function beginBootstrap() {
+ $dbw = $this->getDB( DB_MASTER );
+ $wiki = $this->getExtraConditions();
+
+ $dbw->deleteJoin(
+ 'translate_tmf', 'translate_tms',
+ 'tmf_sid', 'tms_sid',
+ $wiki, __METHOD__
+ );
+
+ $dbw->deleteJoin(
+ 'translate_tmt', 'translate_tms',
+ 'tmt_sid', 'tms_sid',
+ $wiki, __METHOD__
+ );
+ $dbw->delete( 'translate_tms', $wiki, __METHOD__ );
+ }
+
+ // Overwrite parent behaviour
+ public function endBootstrap() {}
+
+ /* Reading interface */
+
+ public function isLocalSuggestion( array $suggestion ) {
+ return $suggestion['wiki'] === wfWikiId();
+ }
+
+ public function expandLocation( array $suggestion ) {
+ $wiki = WikiMap::getWiki( $suggestion['wiki'] );
+ return $wiki->getCanonicalUrl( $suggestion['location'] );
+ }
+}
diff --git a/extensions/Translate/ttmserver/SolrTTMServer.php \
b/extensions/Translate/ttmserver/SolrTTMServer.php new file mode 100644
index 0000000..8522aae
--- /dev/null
+++ b/extensions/Translate/ttmserver/SolrTTMServer.php
@@ -0,0 +1,227 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * TTMServer backed based on Solr instance. Depends on Solarium.
+ * @since 2012-06-27
+ * @ingroup TTMServer
+ */
+class SolrTTMServer extends TTMServer implements ReadableTTMServer, \
WritableTTMServer { + protected $client;
+ protected $updates;
+
+ public function __construct( $config ) {
+ wfProfileIn( __METHOD__ );
+ parent::__construct( $config );
+ if ( isset( $config['config'] ) ) {
+ $this->client = new Solarium_Client( $config['config'] );
+ } else {
+ $this->client = new Solarium_Client();
+ }
+ wfProfileOut( __METHOD__ );
+ }
+
+ public function isLocalSuggestion( array $suggestion ) {
+ return $suggestion['wiki'] === wfWikiId();
+ }
+
+ public function expandLocation( array $suggestion ) {
+ return $suggestion['uri'];
+ }
+
+ public function query( $sourceLanguage, $targetLanguage, $text ) {
+ wfProfileIn( __METHOD__ );
+ $len = mb_strlen( $text );
+ $min = ceil( max( $len * $this->config['cutoff'], 2 ) );
+ $max = floor( $len / $this->config['cutoff'] );
+ $languageField = "text_$targetLanguage";
+
+ $query = $this->client->createSelect();
+ $query->setFields( array( 'uri', 'wiki', 'content', $languageField, \
'messageid' ) ); + $query->setRows( 250 );
+ $helper = $query->getHelper();
+
+ $queryString = 'content:%P1%';
+ $query->setQuery( $queryString, array( $text ) );
+
+ $query->createFilterQuery( 'lang' )
+ ->setQuery( 'language:%T1%', array( $sourceLanguage ) );
+ $query->createFilterQuery( 'trans' )
+ ->setQuery( '%T1%:["" TO *]', array( $languageField ) );
+ $query->createFilterQuery( 'len' )
+ ->setQuery( $helper->rangeQuery( 'charcount', $min, $max ) );
+
+ $dist = $helper->escapePhrase( $text );
+ $dist = "strdist($dist,text,edit)";
+ $query->addSort( $dist, 'asc' );
+
+ $resultset = $this->client->select( $query );
+
+ $edCache = array();
+ $suggestions = array();
+ foreach ( $resultset as $doc ) {
+ $candidate = $doc->content;
+
+ if ( isset( $edCache[$candidate] ) ) {
+ $dist = $edCache[$candidate];
+ } else {
+ $candidateLen = mb_strlen( $candidate );
+ $dist = TTMServer::levenshtein( $text, $candidate, $len, $candidateLen \
); + $quality = 1 - ( $dist * 0.9 / min( $len, $candidateLen ) );
+ $edCache[$candidate] = $dist;
+ }
+ if ( $quality < $this->config['cutoff'] ) {
+ break;
+ }
+
+ $suggestions[] = array(
+ 'source' => $candidate,
+ 'target' => $doc->$languageField,
+ 'context' => $doc->messageid,
+ 'quality' => $quality,
+ 'wiki' => $doc->wiki,
+ 'location' => $doc->messageid . '/' . $targetLanguage,
+ 'uri' => $doc->uri . '/' . $targetLanguage,
+ );
+ }
+ wfProfileOut( __METHOD__ );
+ return $suggestions;
+ }
+
+ /* Write functions */
+
+ public function update( MessageHandle $handle, $targetText ) {
+ if ( !$handle->isValid() || $handle->getCode() === '' ) {
+ return false;
+ }
+
+ $mkey = $handle->getKey();
+ $group = $handle->getGroup();
+ $targetLanguage = $handle->getCode();
+ $sourceLanguage = $group->getSourceLanguage();
+
+ // Skip definitions to not slow down mass imports etc.
+ // These will be added when the first translation is made
+ if ( $targetLanguage === $sourceLanguage ) {
+ return false;
+ }
+
+ $definition = $group->getMessage( $mkey, $sourceLanguage );
+ if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
+ return false;
+ }
+
+ wfProfileIn( __METHOD__ );
+ $doc = $this->createDocument( $handle, $sourceLanguage, $definition );
+
+ $query = $this->client->createSelect();
+ $query->createFilterQuery( 'globalid' )->setQuery( 'globalid:%P1%', \
array( $doc->globalid ) ); + $resultset = $this->client->select( $query );
+
+ $found = count( $resultset );
+ if ( $found > 1 ) {
+ throw new MWException( "Found multiple documents with global id \
{$doc->globalid}" ); + }
+
+ // Fill in the fields from existing entry if it exists
+ if ( $found === 1 ) {
+ foreach ( $resultset as $resultdoc ) {
+ foreach( $resultdoc as $field => $value ) {
+ if ( $field !== 'score' && !isset( $doc->$field ) ) {
+ $doc->$field = $value;
+ }
+ }
+ }
+ }
+
+ $languageField = "text_$targetLanguage";
+ $doc->$languageField = $targetText;
+
+ $update = $this->client->createUpdate();
+ $update->addDocument( $doc );
+ $update->addCommit();
+ $this->client->update( $update );
+
+ wfProfileOut( __METHOD__ );
+ return true;
+ }
+
+ protected function createDocument( MessageHandle $handle, $language, \
$text ) { + $title = Title::makeTitle( \
$handle->getTitle()->getNamespace(), $handle->getKey() ); + $wiki = \
wfWikiId(); + $messageid = $title->getPrefixedText();
+ $globalid = "$wiki-$messageid-" . substr( sha1( $text ), 0, 8 );
+
+ $doc = new Solarium_Document_ReadWrite();
+ $doc->language = $language;
+ $doc->content = $text;
+ $doc->charcount = mb_strlen( $text );
+
+ $doc->uri = $title->getCanonicalUrl();
+ $doc->wiki = $wiki;
+ $doc->messageid = $messageid;
+ $doc->globalid = $globalid;
+ return $doc;
+ }
+
+ public function beginBootstrap() {
+ $update = $this->client->createUpdate();
+ $update->addDeleteQuery( 'wiki:%T1%', wfWikiId() );
+ $this->client->update( $update );
+ }
+
+ public function beginBatch() {
+ $this->updates = array();
+ }
+
+ public function batchInsertDefinitions( array $batch ) {
+ foreach ( $batch as $key => $data ) {
+ $this->updates[$key]['*'] = $data;
+ }
+ }
+
+ public function batchInsertTranslations( array $batch ) {
+ foreach ( $batch as $key => $data ) {
+ list( $title, $language, $text ) = $data;
+ $this->updates[$key][$language] = $text;
+ }
+ }
+
+ public function endBatch() {
+ $update = $this->client->createUpdate();
+
+ foreach ( $this->updates as $key => $languages ) {
+ $definition = $languages['*'];
+ list( $title, $language, $text ) = $definition;
+ $handle = new MessageHandle( $title );
+ $doc = $this->createDocument( $handle, $language, $text );
+ unset( $languages['*'] );
+ $field = "text_$language";
+ $doc->$field = $text;
+
+ foreach ( $languages as $language => $text ) {
+ $field = "text_$language";
+ $doc->$field = $text;
+ }
+ $update->addDocument( $doc );
+
+ }
+
+ $this->client->update( $update );
+ }
+
+ public function endBootstrap() {
+ $update = $this->client->createUpdate();
+ $update->addOptimize( false, false, 2 );
+ $this->client->update( $update );
+ }
+
+}
diff --git a/extensions/Translate/ttmserver/TTMServer.php \
b/extensions/Translate/ttmserver/TTMServer.php new file mode 100644
index 0000000..879b17c
--- /dev/null
+++ b/extensions/Translate/ttmserver/TTMServer.php
@@ -0,0 +1,112 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @defgroup TTMServer The Translate extension \
translation memory interface + */
+
+/**
+ * Some general static methods for instantiating TTMServer and helpers.
+ * @since 2012-01-28
+ * Rewritten in 2012-06-27.
+ * @ingroup TTMServer
+ */
+class TTMServer {
+ protected $config;
+
+ protected function __construct( $config ) {
+ $this->config = $config;
+ }
+
+ public static function factory( $config ) {
+ if ( isset( $config['class'] ) ) {
+ $class = $config['class'];
+ return new $class( $config );
+ } elseif ( isset( $config['type'] ) ) {
+ $type = $config['type'];
+ switch( $type ) {
+ case 'ttmserver':
+ return new DatabaseTTMServer( $config );
+ case 'shared-ttmserver':
+ return new SharedDatabaseTTMServer( $config );
+ case 'remote-ttmserver':
+ return new RemoteTTMServer( $config );
+ default:
+ return null;
+ }
+ }
+
+ throw new MWEXception( "TTMServer with no type" );
+ }
+
+ /**
+ * Returns the primary server instance, useful for chaining.
+ * Primary one is defined as config with key TTMServer
+ * in $wgTranslateTranslationServices.
+ * @return WritableTTMServer
+ */
+ public static function primary() {
+ global $wgTranslateTranslationServices;
+ if ( isset( $wgTranslateTranslationServices['TTMServer'] ) ) {
+ $obj = self::factory( $wgTranslateTranslationServices['TTMServer'] );
+ if ( $obj instanceof WritableTTMServer ) {
+ return $obj;
+ }
+ }
+ return new FakeTTMServer();
+ }
+
+ public static function sortSuggestions( array $suggestions ) {
+ usort( $suggestions, array( __CLASS__, 'qualitySort' ) );
+ return $suggestions;
+ }
+
+ protected static function qualitySort( $a, $b ) {
+ list( $c, $d ) = array( $a['quality'], $b['quality'] );
+ if ( $c === $d ) {
+ return 0;
+ }
+ // Descending sort
+ return ( $c > $d ) ? -1 : 1;
+ }
+
+ /**
+ * PHP implementation of Levenshtein edit distance algorithm.
+ * Uses the native PHP implementation when possible for speed.
+ * The native levenshtein is limited to 255 bytes.
+ * @return int
+ */
+ public static function levenshtein( $str1, $str2, $length1, $length2 ) {
+ if ( $length1 == 0 ) return $length2;
+ if ( $length2 == 0 ) return $length1;
+ if ( $str1 === $str2 ) return 0;
+
+ $bytelength1 = strlen( $str1 );
+ $bytelength2 = strlen( $str2 );
+ if ( $bytelength1 === $length1 && $bytelength1 <= 255
+ && $bytelength2 === $length2 && $bytelength2 <= 255
+ ) {
+ return levenshtein( $str1, $str2 );
+ }
+
+ $prevRow = range( 0, $length2 );
+ for ( $i = 0; $i < $length1; $i++ ) {
+ $currentRow = array();
+ $currentRow[0] = $i + 1;
+ $c1 = mb_substr( $str1, $i, 1 ) ;
+ for ( $j = 0; $j < $length2; $j++ ) {
+ $c2 = mb_substr( $str2, $j, 1 );
+ $insertions = $prevRow[$j + 1] + 1;
+ $deletions = $currentRow[$j] + 1;
+ $substitutions = $prevRow[$j] + ( ( $c1 != $c2 ) ? 1:0 );
+ $currentRow[] = min( $insertions, $deletions, $substitutions );
+ }
+ $prevRow = $currentRow;
+ }
+ return $prevRow[$length2];
+ }
+}
diff --git a/extensions/Translate/ttmserver/schema.xml \
b/extensions/Translate/ttmserver/schema.xml new file mode 100644
index 0000000..065c5d4
--- /dev/null
+++ b/extensions/Translate/ttmserver/schema.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- This is schema file for TTMServer using Solr as backend -->
+<schema name="ttmserver" version="1.5">
+ <types>
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="50" \
positionIncrementGap="0"/> + <fieldType name="text_en" \
class="solr.TextField" positionIncrementGap="100"> + <analyzer \
type="index"> + <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" \
protected="protwords.txt"/> + <!-- Optionally you may want to use this less \
aggressive stemmer instead of PorterStemFilterFactory: + <filter \
class="solr.EnglishMinimalStemFilterFactory"/> + -->
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" \
ignoreCase="true" expand="true"/> + <filter \
class="solr.LowerCaseFilterFactory"/> + <filter \
class="solr.EnglishPossessiveFilterFactory"/> + <filter \
class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> \
+ <filter class="solr.PorterStemFilterFactory"/> + </analyzer>
+ </fieldType>
+ <fieldType name="text_ws" class="solr.TextField" \
positionIncrementGap="100"> + <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+ </types>
+
+ <fields>
+ <!-- If multiple wikies are using the same server, this will tell which \
one owns this document --> + <field name="wiki" type="string" \
indexed="true" stored="true" required="true" /> + <!-- URL or something to \
the source document. It is assumed that + translations can be found by \
appending /langcode to the url + -->
+ <field name="uri" type="string" indexed="true" stored="true" \
required="true" /> + <field name="messageid" type="string" indexed="true" \
stored="true" required="true" /> + <field name="globalid" type="string" \
indexed="true" stored="true" required="true" /> +
+ <!--Original language of the translation-->
+ <field name="language" type="string" indexed="true" stored="true" \
required="true" /> + <!-- For now assuming that content is always in \
English --> + <field name="content" type="text_en" indexed="true" \
stored="true" required="true" /> +
+ <!-- For filtering suggestions which are way too long or short -->
+ <field name="charcount" type="tint" indexed="true" stored="true" />
+ <!-- This is the field used for searching -->
+ <field name="text" type="string" indexed="true" stored="false" />
+ <copyField source="content" dest="text" />
+
+ <!-- Fields for translations. -->
+ <dynamicField name="text_*" type="string" indexed="true" stored="true" \
/> + </fields>
+
+ <uniqueKey>globalid</uniqueKey>
+</schema>
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic