[prev in list] [next in list] [prev in thread] [next in thread] 

List:       xine-cvslog
Subject:    [xine-cvs] =?utf-8?q?HG=3A_xine-lib-1=2E2=3A_external_text_subtit?= =?utf-8?q?les=3A_detect_utf-8?=
From:       Petri_Hintukainen <phintuka () users ! sourceforge ! net>
Date:       2014-05-20 9:34:21
Message-ID: d64d0469396cf326fc4e.1400577605 () hg ! debian ! org
[Download RAW message or body]

# HG changeset patch
# User Petri Hintukainen <phintuka@users.sourceforge.net>
# Date 1400577605 -10800
# Node ID d64d0469396cf326fc4e2204bb020ae696e5b3bf
# Branch  default
# Parent  ed21a95f043b1f23e79c7601d4b6827d94ca5a07
external text subtitles: detect utf-8

diff --git a/src/spu_dec/sputext_demuxer.c b/src/spu_dec/sputext_demuxer.c
--- a/src/spu_dec/sputext_demuxer.c
+++ b/src/spu_dec/sputext_demuxer.c
@@ -94,6 +94,8 @@
   int                format;         /* constants see below        */
   char               next_line[SUB_BUFSIZE]; /* a buffer for next line read from \
file */  
+  char              *encoding; /* charset. NULL if unknown. currently only "utf-8" \
autodetected. */ +
 } demux_sputext_t;
 
 typedef struct demux_sputext_class_s {
@@ -1105,6 +1107,44 @@
   return FORMAT_UNKNOWN;  /* too many bad lines */
 }
 
+static int detect_utf8(subtitle_t *subs, int num_subs)
+{
+  /* return:
+     -1: unknown (ASCII?)
+      0: not valid utf-8
+      1: valid utf-8
+  */
+  int i, l, utf8 = -1;
+
+  for (i = 0; i < num_subs; i++) {
+    for (l = 0; l < subs[i].lines && subs[i].text[l]; l++) {
+      const uint8_t *c = subs[i].text[l];
+      for (; *c; c++) {
+        if (*c & 0x80) {
+          if ( (c[0]>=0xC2 && c[0]<=0xDF) && (c[1]>=0x80 && c[1]<=0xBF) ) {
+            /* valid 2-byte */
+            utf8 = 1;
+            c++;
+          } else if ( ( c[0]==0xE0 && (c[1]>=0xA0 && c[1]<=0xBF) && (c[2]>=0x80 && \
c[1]<=0xBF)) || +                      ( (c[0]>=0xE1 && c[0]<=0xEC) && (c[1]>=0x80 && \
c[1]<=0xBF) && (c[2]>=0x80 && c[1]<=0xBF)) || +                      ( c[0]==0xED && \
(c[1]>=0x80 && c[1]<=0x9F) && (c[2]>=0x80 && c[1]<=0xBF))  || +                      \
( c[0]==0xEE && (c[1]>=0xA4 && c[1]<=0xBF) && (c[2]>=0x80 && c[1]<=0xBF) ) || +       \
( c[0]==0xEF && (c[1]>=0xA4 && c[1]<=0xBF) && (c[2]>=0x80 && c[1]<=0xBF) )) { +       \
/* valid 3-byte */ +            utf8 = 1;
+            c += 2;
+          } else {
+            /* TODO: 4-byte not checked */
+            return 0;
+          }
+        }
+      }
+    }
+  }
+
+  return utf8;
+}
+
 static subtitle_t *sub_read_file (demux_sputext_t *this) {
 
   int n_max;
@@ -1196,6 +1236,11 @@
       first[this->num-1].end = first[this->num-1].start + timeout;
     }
 
+  if (detect_utf8(first, this->num) > 0) {
+    xprintf (this->stream->xine, XINE_VERBOSITY_LOG, "detected utf-8 subtitles\n");
+    this->encoding = strdup("utf-8");
+  }
+
   if(this->stream->xine->verbosity >= XINE_VERBOSITY_DEBUG) {
     char buffer[1024];
 
@@ -1240,6 +1285,13 @@
     str[SUB_BUFSIZE-1] = '\0';
   }
 
+  if (this->encoding) {
+    buf->decoder_flags |= BUF_FLAG_SPECIAL;
+    buf->decoder_info[1] = BUF_SPECIAL_CHARSET_ENCODING;
+    buf->decoder_info_ptr[2] = this->encoding;
+    buf->decoder_info[2] = strlen(buf->decoder_info_ptr[2]);
+  }
+
   this->stream->video_fifo->put(this->stream->video_fifo, buf);
   this->cur++;
 
@@ -1255,6 +1307,7 @@
       free(this->subtitles[i].text[l]);
   }
   free(this->subtitles);
+  free(this->encoding);
   free(this);
 }
 

------------------------------------------------------------------------------
"Accelerate Dev Cycles with Automated Cross-Browser Testing - For FREE
Instantly run your Selenium tests across 300+ browser/OS combos.
Get unparalleled scalability from the best Selenium testing platform available
Simple to use. Nothing to install. Get started now for free."
http://p.sf.net/sfu/SauceLabs
_______________________________________________
Xine-cvslog mailing list
Xine-cvslog@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/xine-cvslog


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic