[prev in list] [next in list] [prev in thread] [next in thread] 

List:       kde-commits
Subject:    [websites/wiki-kde-org/develop] extensions/Translate/ttmserver: add missing files, stupid gitignore
From:       Ingo Malchow <imalchow () kde ! org>
Date:       2012-07-24 20:03:17
Message-ID: 20120724200317.905E1A6094 () git ! kde ! org
[Download RAW message or body]

Git commit f719f9c77765d608935ee8d2d768b15e5d29d836 by Ingo Malchow.
Committed on 24/07/2012 at 21:59.
Pushed by imalchow into branch 'develop'.

add missing files, stupid gitignore

A  +283  -0    extensions/Translate/ttmserver/DatabaseTTMServer.php
A  +39   -0    extensions/Translate/ttmserver/FakeTTMServer.php
A  +94   -0    extensions/Translate/ttmserver/Interfaces.php
A  +32   -0    extensions/Translate/ttmserver/RemoteTTMServer.php
A  +53   -0    extensions/Translate/ttmserver/SharedDatabaseTTMServer.php
A  +227  -0    extensions/Translate/ttmserver/SolrTTMServer.php
A  +112  -0    extensions/Translate/ttmserver/TTMServer.php
A  +60   -0    extensions/Translate/ttmserver/schema.xml

http://commits.kde.org/websites/wiki-kde-org/f719f9c77765d608935ee8d2d768b15e5d29d836


diff --git a/extensions/Translate/ttmserver/DatabaseTTMServer.php \
b/extensions/Translate/ttmserver/DatabaseTTMServer.php new file mode 100644
index 0000000..5455a27
--- /dev/null
+++ b/extensions/Translate/ttmserver/DatabaseTTMServer.php
@@ -0,0 +1,283 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright  © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Mysql based backend.
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+class DatabaseTTMServer extends TTMServer implements WritableTTMServer, \
ReadableTTMServer { +	protected $sids;
+
+	/**
+	 * @param $mode int DB_SLAVE|DB_MASTER
+	 * @return DatabaseBase
+	 */
+	protected function getDB( $mode = DB_SLAVE ) {
+		return wfGetDB( $mode, 'ttmserver', $this->config['database'] );
+	}
+
+	public function update( MessageHandle $handle, $targetText ) {
+		if ( !$handle->isValid() || $handle->getCode() === '' ) {
+			return false;
+		}
+
+		$mkey  = $handle->getKey();
+		$group = $handle->getGroup();
+		$targetLanguage = $handle->getCode();
+		$sourceLanguage = $group->getSourceLanguage();
+
+		// Skip definitions to not slow down mass imports etc.
+		// These will be added when the first translation is made
+		if ( $targetLanguage === $sourceLanguage ) {
+			return false;
+		}
+
+		$definition = $group->getMessage( $mkey, $sourceLanguage );
+		if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
+			return false;
+		}
+
+		$context = Title::makeTitle( $handle->getTitle()->getNamespace(), $mkey \
); +		$dbw = $this->getDB( DB_MASTER );
+		/* Check that the definition exists and fetch the sid. If not, add
+		 * the definition and retrieve the sid. If the definition changes,
+		 * we will create a new entry - otherwise we could at some point
+		 * get suggestions which do not match the original definition any
+		 * longer. The old translations are still kept until purged by
+		 * rerunning the bootstrap script. */
+		$conds = array(
+			'tms_context' => $context->getPrefixedText(),
+			'tms_text' => $definition,
+		);
+
+		$extra = $this->getExtraConditions();
+		$conds = array_merge( $conds, $extra );
+
+		$sid = $dbw->selectField( 'translate_tms', 'tms_sid', $conds, __METHOD__ \
); +		if ( $sid === false ) {
+			$sid = $this->insertSource( $context, $sourceLanguage, $definition );
+		}
+
+		// Delete old translations for this message if any. Could also use \
replace +		$deleteConds = array(
+			'tmt_sid' => $sid,
+			'tmt_lang' => $targetLanguage,
+		);
+		$dbw->delete( 'translate_tmt', $deleteConds, __METHOD__ );
+
+		// Insert the new translation
+		$row = $deleteConds + array(
+			'tmt_text' => $targetText,
+		);
+
+		$dbw->insert( 'translate_tmt', $row, __METHOD__ );
+
+		return true;
+	}
+
+	/// For subclasses
+	protected function getExtraConditions() {
+		return array();
+	}
+
+	protected function insertSource( Title $context, $sourceLanguage, $text ) \
{ +		wfProfileIn( __METHOD__ );
+		$row = array(
+			'tms_lang' => $sourceLanguage,
+			'tms_len' => mb_strlen( $text ),
+			'tms_text' => $text,
+			'tms_context' => $context->getPrefixedText(),
+		);
+
+		$extra = $this->getExtraConditions();
+		$row = array_merge( $row, $extra );
+
+		$dbw = $this->getDB( DB_MASTER );
+		$dbw->insert( 'translate_tms', $row, __METHOD__ );
+		$sid = $dbw->insertId();
+
+		$fulltext = $this->filterForFulltext( $sourceLanguage, $text );
+		if ( count( $fulltext ) ) {
+			$row = array(
+				'tmf_sid' => $sid,
+				'tmf_text' => implode( ' ', $fulltext ),
+			);
+			$dbw->insert( 'translate_tmf', $row, __METHOD__ );
+		}
+
+		wfProfileOut( __METHOD__ );
+		return $sid;
+	}
+
+	/**
+	 * Tokenizes the text for fulltext search.
+	 * Tries to find the most useful tokens.
+	 */
+	protected function filterForFulltext( $language, $input ) {
+		wfProfileIn( __METHOD__ );
+		$lang = Language::factory( $language );
+
+		$text = preg_replace( '/[^[:alnum:]]/u', ' ', $input );
+		$text = $lang->segmentByWord( $text );
+		$text = $lang->lc( $text );
+		$segments = preg_split( '/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY );
+		if ( count( $segments ) < 4 ) {
+			wfProfileOut( __METHOD__ );
+			return array();
+		}
+
+		foreach ( $segments as $i => $segment ) {
+			// Yes strlen
+			$len = strlen( $segment );
+			if ( $len < 4 || $len > 15 ) {
+				unset( $segments[$i] );
+			}
+		}
+
+		$segments = array_unique( $segments );
+		$segments = array_slice( $segments, 0, 10 );
+		wfProfileOut( __METHOD__ );
+		return $segments;
+	}
+
+	public function beginBootstrap() {
+		$dbw = $this->getDB( DB_MASTER );
+		$dbw->delete( 'translate_tms', '*', __METHOD__ );
+		$dbw->delete( 'translate_tmt', '*', __METHOD__ );
+		$dbw->delete( 'translate_tmf', '*', __METHOD__ );
+		$table = $dbw->tableName( 'translate_tmf' );
+		$dbw->ignoreErrors( true );
+		$dbw->query( "DROP INDEX tmf_text ON $table" );
+		$dbw->ignoreErrors( false );
+	}
+
+	public function beginBatch() {
+		$this->sids = array();
+	}
+
+	public function batchInsertDefinitions( array $batch ) {
+		foreach ( $batch as $key => $item ) {
+			list( $title, $language, $text ) = $item;
+			$handle = new MessageHandle( $title );
+			$context = Title::makeTitle( $handle->getTitle()->getNamespace(), \
$handle->getKey() ); +			$this->sids[$key] = $this->insertSource( $context, \
$language, $text ); +		}
+		wfWaitForSlaves( 10 );
+	}
+
+	public function batchInsertTranslations( array $batch ) {
+		$rows = array();
+		foreach ( $batch as $key => $data ) {
+			list( $title, $language, $text ) = $data;
+			$rows[] = array(
+				'tmt_sid' => $this->sids[$key],
+				'tmt_lang' => $language,
+				'tmt_text' => $text,
+			);
+		}
+
+		$dbw = $this->getDB( DB_MASTER );
+		$dbw->insert( 'translate_tmt', $rows, __METHOD__ );
+		wfWaitForSlaves( 10 );
+	}
+
+	public function endBatch() {}
+
+	public function endBootstrap() {
+		$dbw = $this->getDB( DB_MASTER );
+		$table = $dbw->tableName( 'translate_tmf' );
+		$dbw->query( "CREATE FULLTEXT INDEX tmf_text ON $table (tmf_text)" );
+	}
+
+	/* Reading interface */
+
+	public function isLocalSuggestion( array $suggestion ) {
+		return true;
+	}
+
+	public function expandLocation( array $suggestion ) {
+		$title = Title::newFromText( $suggestion['location'] );
+		return $title->getCanonicalUrl();
+	}
+
+	public function query( $sourceLanguage, $targetLanguage, $text ) {
+		wfProfileIn( __METHOD__ );
+		// Calculate the bounds of the string length which are able
+		// to satisfy the cutoff percentage in edit distance.
+		$len = mb_strlen( $text );
+		$min = ceil( max( $len * $this->config['cutoff'], 2 ) );
+		$max = floor( $len / $this->config['cutoff'] );
+
+		// We could use fulltext index to narrow the results further
+		$dbr = $this->getDB( DB_SLAVE );
+		$tables = array( 'translate_tmt', 'translate_tms' );
+		$fields = array( 'tms_context', 'tms_text', 'tmt_lang', 'tmt_text' );
+
+		$conds = array(
+			'tms_lang' => $sourceLanguage,
+			'tmt_lang' => $targetLanguage,
+			"tms_len BETWEEN $min AND $max",
+			'tms_sid = tmt_sid',
+		);
+
+		$extra = $this->getExtraConditions();
+		$fields = array_merge( $fields, array_keys( $extra ) );
+		$conds = array_merge( $conds, $extra );
+
+		$fulltext = $this->filterForFulltext( $sourceLanguage, $text );
+		if ( $fulltext ) {
+			$tables[] = 'translate_tmf';
+			$list = implode( ' ',  $fulltext );
+			$conds[] = 'tmf_sid = tmt_sid';
+			$conds[] = "MATCH(tmf_text) AGAINST( '$list' )";
+		}
+
+		$res = $dbr->select( $tables, $fields, $conds, __METHOD__ );
+		wfProfileOut( __METHOD__ );
+		return $this->processQueryResults( $res, $text, $sourceLanguage, \
$targetLanguage ); +	}
+
+	protected function processQueryResults( $res, $text, $sourceLanguage, \
$targetLanguage ) { +		wfProfileIn( __METHOD__ );
+		$lenA = mb_strlen( $text );
+		$results = array();
+		foreach ( $res as $row ) {
+			$a = $text;
+			$b = $row->tms_text;
+			$lenB = mb_strlen( $b );
+			$len = min( $lenA, $lenB );
+			if ( $len > 1000 ) {
+				// two strings of length 1500 ~ 10s
+				// two strings of length 2250 ~ 30s
+				$dist = $len;
+			} else {
+				$dist = self::levenshtein( $a, $b, $lenA, $lenB );
+			}
+			$quality = 1 - ( $dist * 0.9 / $len );
+
+			if ( $quality >= $this->config['cutoff'] ) {
+				$results[] = array(
+					'source' => $row->tms_text,
+					'target' => $row->tmt_text,
+					'context' => $row->tms_context,
+					'location' => $row->tms_context . '/' . $targetLanguage,
+					'quality' => $quality,
+					'wiki' => isset( $row->tms_wiki ) ? $row->tms_wiki : wfWikiId(),
+				);
+			}
+		}
+		$results = TTMServer::sortSuggestions( $results );
+		wfProfileOut( __METHOD__ );
+		return $results;
+	}
+
+}
diff --git a/extensions/Translate/ttmserver/FakeTTMServer.php \
b/extensions/Translate/ttmserver/FakeTTMServer.php new file mode 100644
index 0000000..a9cf1ea
--- /dev/null
+++ b/extensions/Translate/ttmserver/FakeTTMServer.php
@@ -0,0 +1,39 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright  © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * NO-OP version of TTMServer when it is disabled.
+ * Keeps other code simpler when they can just do
+ * TTMServer::primary()->update( ... );
+ * @since 2012-01-28
+ * @ingroup TTMServer
+ */
+class FakeTTMServer implements ReadableTTMServer, WritableTTMServer {
+	public function query( $sourceLanguage, $targetLanguage, $text ) {
+		return array();
+	}
+
+	public function isLocalSuggestion( array $suggestion ) {
+		false;
+	}
+
+	public function expandLocation( array $suggestion ) {
+		return '';
+	}
+
+	public function update( MessageHandle $handle, $targetText ) {}
+	public function beginBootstrap() {}
+	public function beginBatch() {}
+	public function batchInsertDefinitions( array $batch ) {}
+	public function batchInsertTranslations( array $batch ) {}
+	public function endBatch() {}
+	public function endBootstrap() {}
+}
diff --git a/extensions/Translate/ttmserver/Interfaces.php \
b/extensions/Translate/ttmserver/Interfaces.php new file mode 100644
index 0000000..f99ad51
--- /dev/null
+++ b/extensions/Translate/ttmserver/Interfaces.php
@@ -0,0 +1,94 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright  © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Interface for TTMServer that can be queried (=all of them).
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+interface ReadableTTMServer {
+	/**
+	 * Fetches all relevant suggestions for given text.
+	 *
+	 * @param $sourceLanguage String: language code for the provide text
+	 * @param $targetLanguage String: language code for the suggestions
+	 * @param $text String: the text for which to search suggestions
+	 * @return List: unordered suggestions, which each has fields:
+	 *   - source: String: the original text of the suggestion
+	 *   - target: String: the suggestion
+	 *   - context: String: title of the page where the suggestion comes from
+	 *   - quality: Float: the quality of suggestion, 1 is perfect match
+	 */
+	public function query( $sourceLanguage, $targetLanguage, $text );
+
+	/**
+	 * Determines if the suggestion returned by this TTMServer comes
+	 * from this wiki or any other wiki.
+	 * @return Bool
+	 */
+	public function isLocalSuggestion( array $suggestion );
+
+	/**
+	 * Given suggestion returned by this TTMServer, constructs fully
+	 * qualified URL to the location of the translation.
+	 * @return String URL
+	 */
+	public function expandLocation( array $suggestion );
+
+}
+
+/**
+ * Interface for TTMServer that can be updated.
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+interface WritableTTMServer {
+	/**
+	 * Shovels the new translation into translation memory.
+	 * Use this for single updates (=after message edit).
+	 *
+	 * @param $handle MessageHandle
+	 * @param $targetText String
+	 */
+	public function update( MessageHandle $handle, $targetText );
+
+	/**
+	 * Called when starting to fill the translation memory.
+	 * Set up necessary variables and remove old content
+	 * from the server.
+	 */
+	public function beginBootstrap();
+
+	/**
+	 * Called before every batch (MessageGroup).
+	 */
+	public function beginBatch();
+
+	/**
+	 * Called multiple times per batch if necessary.
+	 */
+	public function batchInsertDefinitions( array $batch );
+
+	/**
+	 * Called multiple times per batch if necessary.
+	 */
+	public function batchInsertTranslations( array $batch );
+
+	/**
+	 * Called before every batch (MessageGroup).
+	 */
+	public function endBatch();
+
+	/**
+	 * Do any cleanup, optimizing etc.
+	 */
+	public function endBootstrap();
+}
diff --git a/extensions/Translate/ttmserver/RemoteTTMServer.php \
b/extensions/Translate/ttmserver/RemoteTTMServer.php new file mode 100644
index 0000000..fa1b907
--- /dev/null
+++ b/extensions/Translate/ttmserver/RemoteTTMServer.php
@@ -0,0 +1,32 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright  © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Class for handling remote TTMServers over MediaWiki API.
+ * Currently querying is done in TranslationHelpers, and
+ * this class only handles location retrieval.
+ * @since 2012-06-27
+ * @ingroup TTMServer
+ */
+class RemoteTTMServer extends TTMServer implements ReadableTTMServer {
+	public function query( $sourceLanguage, $targetLanguage, $text ) {
+		// TODO: implement some day perhaps?
+		return array();
+	}
+
+	public function isLocalSuggestion( array $suggestion ) {
+		return false;
+	}
+
+	public function expandLocation( array $suggestion ) {
+		return $suggestion['location'];
+	}
+}
diff --git a/extensions/Translate/ttmserver/SharedDatabaseTTMServer.php \
b/extensions/Translate/ttmserver/SharedDatabaseTTMServer.php new file mode \
100644 index 0000000..be90916
--- /dev/null
+++ b/extensions/Translate/ttmserver/SharedDatabaseTTMServer.php
@@ -0,0 +1,53 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright  © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * Mysql based backend shared with multiple wikies.
+ * @ingroup TTMServer
+ * @since 2012-06-27
+ */
+class SharedDatabaseTTMServer extends DatabaseTTMServer {
+	protected function getExtraConditions() {
+		return array( 'tms_wiki' => wfWikiId() );
+	}
+
+	public function beginBootstrap() {
+		$dbw = $this->getDB( DB_MASTER );
+		$wiki = $this->getExtraConditions();
+
+		$dbw->deleteJoin(
+			'translate_tmf', 'translate_tms',
+			'tmf_sid', 'tms_sid',
+			$wiki, __METHOD__
+		);
+
+		$dbw->deleteJoin(
+			'translate_tmt', 'translate_tms',
+				'tmt_sid', 'tms_sid',
+				$wiki, __METHOD__
+		);
+		$dbw->delete( 'translate_tms', $wiki, __METHOD__ );
+	}
+
+	// Overwrite parent behaviour
+	public function endBootstrap() {}
+
+	/* Reading interface */
+
+	public function isLocalSuggestion( array $suggestion ) {
+		return $suggestion['wiki'] === wfWikiId();
+	}
+
+	public function expandLocation( array $suggestion ) {
+		$wiki = WikiMap::getWiki( $suggestion['wiki'] );
+		return $wiki->getCanonicalUrl( $suggestion['location'] );
+	}
+}
diff --git a/extensions/Translate/ttmserver/SolrTTMServer.php \
b/extensions/Translate/ttmserver/SolrTTMServer.php new file mode 100644
index 0000000..8522aae
--- /dev/null
+++ b/extensions/Translate/ttmserver/SolrTTMServer.php
@@ -0,0 +1,227 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright  © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @ingroup TTMServer
+ */
+
+/**
+ * TTMServer backed based on Solr instance. Depends on Solarium.
+ * @since 2012-06-27
+ * @ingroup TTMServer
+ */
+class SolrTTMServer extends TTMServer implements ReadableTTMServer, \
WritableTTMServer  { +	protected $client;
+	protected $updates;
+
+	public function __construct( $config ) {
+		wfProfileIn( __METHOD__ );
+		parent::__construct( $config );
+		if ( isset( $config['config'] ) ) {
+			$this->client = new Solarium_Client( $config['config'] );
+		} else {
+			$this->client = new Solarium_Client();
+		}
+		wfProfileOut( __METHOD__ );
+	}
+
+	public function isLocalSuggestion( array $suggestion ) {
+		return $suggestion['wiki'] === wfWikiId();
+	}
+
+	public function expandLocation( array $suggestion ) {
+		return $suggestion['uri'];
+	}
+
+	public function query( $sourceLanguage, $targetLanguage, $text ) {
+		wfProfileIn( __METHOD__ );
+		$len = mb_strlen( $text );
+		$min = ceil( max( $len * $this->config['cutoff'], 2 ) );
+		$max = floor( $len / $this->config['cutoff'] );
+		$languageField = "text_$targetLanguage";
+
+		$query = $this->client->createSelect();
+		$query->setFields( array( 'uri', 'wiki', 'content', $languageField, \
'messageid' ) ); +		$query->setRows( 250 );
+		$helper = $query->getHelper();
+
+		$queryString = 'content:%P1%';
+		$query->setQuery( $queryString, array( $text ) );
+
+		$query->createFilterQuery( 'lang' )
+			->setQuery( 'language:%T1%', array( $sourceLanguage ) );
+		$query->createFilterQuery( 'trans' )
+			->setQuery( '%T1%:["" TO *]', array( $languageField ) );
+		$query->createFilterQuery( 'len' )
+			->setQuery( $helper->rangeQuery( 'charcount', $min, $max ) );
+
+		$dist = $helper->escapePhrase( $text );
+		$dist = "strdist($dist,text,edit)";
+		$query->addSort( $dist, 'asc' );
+
+		$resultset = $this->client->select( $query );
+
+		$edCache = array();
+		$suggestions = array();
+		foreach ( $resultset as $doc ) {
+			$candidate = $doc->content;
+
+			if ( isset( $edCache[$candidate] ) ) {
+				$dist = $edCache[$candidate];
+			} else {
+				$candidateLen = mb_strlen( $candidate );
+				$dist = TTMServer::levenshtein( $text, $candidate, $len, $candidateLen \
); +				$quality = 1 - ( $dist * 0.9 / min( $len, $candidateLen ) );
+				$edCache[$candidate] = $dist;
+			}
+			if ( $quality < $this->config['cutoff'] ) {
+				break;
+			}
+
+			$suggestions[] = array(
+				'source' => $candidate,
+				'target' => $doc->$languageField,
+				'context' => $doc->messageid,
+				'quality' => $quality,
+				'wiki' => $doc->wiki,
+				'location' => $doc->messageid . '/' . $targetLanguage,
+				'uri' => $doc->uri . '/' . $targetLanguage,
+			);
+		}
+		wfProfileOut( __METHOD__ );
+		return $suggestions;
+	}
+
+	/* Write functions */
+
+	public function update( MessageHandle $handle, $targetText ) {
+		if ( !$handle->isValid() || $handle->getCode() === '' ) {
+			return false;
+		}
+
+		$mkey  = $handle->getKey();
+		$group = $handle->getGroup();
+		$targetLanguage = $handle->getCode();
+		$sourceLanguage = $group->getSourceLanguage();
+
+		// Skip definitions to not slow down mass imports etc.
+		// These will be added when the first translation is made
+		if ( $targetLanguage === $sourceLanguage ) {
+			return false;
+		}
+
+		$definition = $group->getMessage( $mkey, $sourceLanguage );
+		if ( !is_string( $definition ) || !strlen( trim( $definition ) ) ) {
+			return false;
+		}
+
+		wfProfileIn( __METHOD__ );
+		$doc = $this->createDocument( $handle, $sourceLanguage, $definition );
+
+		$query = $this->client->createSelect();
+		$query->createFilterQuery( 'globalid' )->setQuery( 'globalid:%P1%', \
array( $doc->globalid ) ); +		$resultset = $this->client->select( $query );
+
+		$found = count( $resultset );
+		if ( $found > 1 ) {
+			throw new MWException( "Found multiple documents with global id \
{$doc->globalid}" ); +		}
+
+		// Fill in the fields from existing entry if it exists
+		if ( $found === 1 ) {
+			foreach ( $resultset as $resultdoc ) {
+				foreach( $resultdoc as $field => $value ) {
+					if ( $field !== 'score' && !isset( $doc->$field ) ) {
+						$doc->$field = $value;
+					}
+				}
+			}
+		}
+
+		$languageField = "text_$targetLanguage";
+		$doc->$languageField = $targetText;
+
+		$update = $this->client->createUpdate();
+		$update->addDocument( $doc );
+		$update->addCommit();
+		$this->client->update( $update );
+
+		wfProfileOut( __METHOD__ );
+		return true;
+	}
+
+	protected function createDocument( MessageHandle $handle, $language, \
$text ) { +		$title = Title::makeTitle( \
$handle->getTitle()->getNamespace(), $handle->getKey() ); +		$wiki = \
wfWikiId(); +		$messageid = $title->getPrefixedText();
+		$globalid = "$wiki-$messageid-" . substr( sha1( $text ), 0, 8 );
+
+		$doc = new Solarium_Document_ReadWrite();
+		$doc->language = $language;
+		$doc->content = $text;
+		$doc->charcount = mb_strlen( $text );
+
+		$doc->uri = $title->getCanonicalUrl();
+		$doc->wiki = $wiki;
+		$doc->messageid = $messageid;
+		$doc->globalid = $globalid;
+		return $doc;
+	}
+
+	public function beginBootstrap() {
+		$update = $this->client->createUpdate();
+		$update->addDeleteQuery( 'wiki:%T1%', wfWikiId() );
+		$this->client->update( $update );
+	}
+
+	public function beginBatch() {
+		$this->updates = array();
+	}
+
+	public function batchInsertDefinitions( array $batch ) {
+		foreach ( $batch as $key => $data ) {
+			$this->updates[$key]['*'] = $data;
+		}
+	}
+
+	public function batchInsertTranslations( array $batch ) {
+		foreach ( $batch as $key => $data ) {
+			list( $title, $language, $text ) = $data;
+			$this->updates[$key][$language] = $text;
+		}
+	}
+
+	public function endBatch() {
+		$update = $this->client->createUpdate();
+
+		foreach ( $this->updates as $key => $languages ) {
+			$definition = $languages['*'];
+			list( $title, $language, $text ) = $definition;
+			$handle = new MessageHandle( $title );
+			$doc = $this->createDocument( $handle, $language, $text );
+			unset( $languages['*'] );
+			$field = "text_$language";
+			$doc->$field = $text;
+
+			foreach ( $languages as $language => $text ) {
+				$field = "text_$language";
+				$doc->$field = $text;
+			}
+			$update->addDocument( $doc );
+
+		}
+
+		$this->client->update( $update );
+	}
+
+	public function endBootstrap() {
+		$update = $this->client->createUpdate();
+		$update->addOptimize( false, false, 2 );
+		$this->client->update( $update );
+	}
+
+}
diff --git a/extensions/Translate/ttmserver/TTMServer.php \
b/extensions/Translate/ttmserver/TTMServer.php new file mode 100644
index 0000000..879b17c
--- /dev/null
+++ b/extensions/Translate/ttmserver/TTMServer.php
@@ -0,0 +1,112 @@
+<?php
+/**
+ * TTMServer - The Translate extension translation memory interface
+ *
+ * @file
+ * @author Niklas Laxström
+ * @copyright Copyright  © 2012, Niklas Laxström
+ * @license http://www.gnu.org/copyleft/gpl.html GNU General Public \
License 2.0 or later + * @defgroup TTMServer The Translate extension \
translation memory interface + */
+
+/**
+ * Some general static methods for instantiating TTMServer and helpers.
+ * @since 2012-01-28
+ * Rewritten in 2012-06-27.
+ * @ingroup TTMServer
+ */
+class TTMServer  {
+	protected $config;
+
+	protected function __construct( $config ) {
+		$this->config = $config;
+	}
+
+	public static function factory( $config ) {
+		if ( isset( $config['class'] ) ) {
+			$class = $config['class'];
+			return new $class( $config );
+		} elseif ( isset( $config['type'] ) ) {
+			$type = $config['type'];
+			switch( $type ) {
+			case 'ttmserver':
+				return new DatabaseTTMServer( $config );
+			case 'shared-ttmserver':
+				return new SharedDatabaseTTMServer( $config );
+			case 'remote-ttmserver':
+				return new RemoteTTMServer( $config );
+			default:
+				return null;
+			}
+		}
+
+		throw new MWEXception( "TTMServer with no type" );
+	}
+
+	/**
+	 * Returns the primary server instance, useful for chaining.
+	 * Primary one is defined as config with key TTMServer
+	 * in $wgTranslateTranslationServices.
+	 * @return WritableTTMServer
+	 */
+	public static function primary() {
+		global $wgTranslateTranslationServices;
+		if ( isset( $wgTranslateTranslationServices['TTMServer'] ) ) {
+			$obj = self::factory( $wgTranslateTranslationServices['TTMServer'] );
+			if ( $obj instanceof WritableTTMServer ) {
+				return $obj;
+			}
+		}
+		return new FakeTTMServer();
+	}
+
+	public static function sortSuggestions( array $suggestions ) {
+		usort( $suggestions, array( __CLASS__, 'qualitySort' ) );
+		return $suggestions;
+	}
+
+	protected static function qualitySort( $a, $b ) {
+		list( $c, $d ) = array( $a['quality'], $b['quality'] );
+		if ( $c === $d ) {
+			return 0;
+		}
+		// Descending sort
+		return ( $c > $d ) ? -1 : 1;
+	}
+
+	/**
+	 * PHP implementation of Levenshtein edit distance algorithm.
+	 * Uses the native PHP implementation when possible for speed.
+	 * The native levenshtein is limited to 255 bytes.
+	 * @return int
+	 */
+	public static function levenshtein( $str1, $str2, $length1, $length2 ) {
+		if ( $length1 == 0 ) return $length2;
+		if ( $length2 == 0 ) return $length1;
+		if ( $str1 === $str2 ) return 0;
+
+		$bytelength1 = strlen( $str1 );
+		$bytelength2 = strlen( $str2 );
+		if ( $bytelength1 === $length1 && $bytelength1 <= 255
+			&& $bytelength2 === $length2 && $bytelength2 <= 255
+		) {
+			return levenshtein( $str1, $str2 );
+		}
+
+		$prevRow = range( 0, $length2 );
+		for ( $i = 0; $i < $length1; $i++ ) {
+			$currentRow = array();
+			$currentRow[0] = $i + 1;
+			$c1 = mb_substr( $str1, $i, 1 ) ;
+			for ( $j = 0; $j < $length2; $j++ ) {
+				$c2 = mb_substr( $str2, $j, 1 );
+				$insertions = $prevRow[$j + 1] + 1;
+				$deletions = $currentRow[$j] + 1;
+				$substitutions = $prevRow[$j] + ( ( $c1 != $c2 ) ? 1:0 );
+				$currentRow[] = min( $insertions, $deletions, $substitutions );
+			}
+			$prevRow = $currentRow;
+		}
+		return $prevRow[$length2];
+	}
+}
diff --git a/extensions/Translate/ttmserver/schema.xml \
b/extensions/Translate/ttmserver/schema.xml new file mode 100644
index 0000000..065c5d4
--- /dev/null
+++ b/extensions/Translate/ttmserver/schema.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- This is schema file for TTMServer using Solr as backend -->
+<schema name="ttmserver" version="1.5">
+	<types>
+		<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
+		<fieldType name="tint" class="solr.TrieIntField" precisionStep="50" \
positionIncrementGap="0"/> +		<fieldType name="text_en" \
class="solr.TextField" positionIncrementGap="100"> +			<analyzer \
type="index"> +				<tokenizer class="solr.StandardTokenizerFactory"/>
+				<filter class="solr.LowerCaseFilterFactory"/>
+				<filter class="solr.EnglishPossessiveFilterFactory"/>
+				<filter class="solr.KeywordMarkerFilterFactory" \
protected="protwords.txt"/> +	<!-- Optionally you may want to use this less \
aggressive stemmer instead of PorterStemFilterFactory: +				<filter \
class="solr.EnglishMinimalStemFilterFactory"/> +	-->
+				<filter class="solr.PorterStemFilterFactory"/>
+			</analyzer>
+			<analyzer type="query">
+				<tokenizer class="solr.StandardTokenizerFactory"/>
+				<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" \
ignoreCase="true" expand="true"/> +				<filter \
class="solr.LowerCaseFilterFactory"/> +				<filter \
class="solr.EnglishPossessiveFilterFactory"/> +				<filter \
class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> \
+				<filter class="solr.PorterStemFilterFactory"/> +			</analyzer>
+		</fieldType>
+		<fieldType name="text_ws" class="solr.TextField" \
positionIncrementGap="100"> +			<analyzer>
+				<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+			</analyzer>
+		</fieldType>
+	</types>
+
+	<fields>
+		<!-- If multiple wikies are using the same server, this will tell which \
one owns this document --> +		<field name="wiki" type="string" \
indexed="true" stored="true" required="true" /> +		<!-- URL or something to \
the source document. It is assumed that +				 translations can be found by \
appending /langcode to the url +				 -->
+		<field name="uri" type="string" indexed="true" stored="true" \
required="true" /> +		<field name="messageid" type="string" indexed="true" \
stored="true" required="true" /> +		<field name="globalid" type="string" \
indexed="true" stored="true" required="true" /> +
+		<!--Original language of the translation-->
+		<field name="language" type="string" indexed="true" stored="true" \
required="true" /> +		<!-- For now assuming that content is always in \
English --> +		<field name="content" type="text_en" indexed="true" \
stored="true" required="true" /> +
+		<!-- For filtering suggestions which are way too long or short -->
+		<field name="charcount" type="tint" indexed="true" stored="true" />
+		<!-- This is the field used for searching -->
+		<field name="text" type="string" indexed="true" stored="false" />
+		<copyField source="content" dest="text" />
+
+		<!-- Fields for translations. -->
+		<dynamicField name="text_*" type="string" indexed="true" stored="true" \
/> +	</fields>
+
+	<uniqueKey>globalid</uniqueKey>
+</schema>


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic