[prev in list] [next in list] [prev in thread] [next in thread]
List: kde-commits
Subject: extragear/office/tellico/src/fetch
From: Robby Stephenson <robby () periapsis ! org>
Date: 2011-02-19 21:51:24
Message-ID: 20110219215124.0BC4BAC8BF () svn ! kde ! org
[Download RAW message or body]
SVN commit 1221646 by rstephenson:
improve some imdb regexp
M +39 -7 imdbfetcher.cpp
--- trunk/extragear/office/tellico/src/fetch/imdbfetcher.cpp #1221645:1221646
@@ -878,6 +878,8 @@
if(plotRx.cap(0).endsWith(QLatin1String("(more)</")) || \
plotURLRx.indexIn(plotRx.cap(0)) > -1) { useUserSummary = true;
}
+ } else {
+ useUserSummary = true;
}
if(useUserSummary) {
@@ -936,7 +938,7 @@
const QString& imdbHeader_, const QString& fieldName_) {
QRegExp br2Rx(QLatin1String("<br[\\s/]*>\\s*<br[\\s/]*>"), Qt::CaseInsensitive);
br2Rx.setMinimal(true);
- QRegExp divRx(QLatin1String("<div\\s[^>]*class\\s*=\\s*\"info\"[^>]*>(.*)</div"), \
Qt::CaseInsensitive); + QRegExp \
divRx(QLatin1String("<div\\s[^>]*class\\s*=\\s*\"(?:info|txt-block)\"[^>]*>(.*)</div"), \
Qt::CaseInsensitive); divRx.setMinimal(true);
QString name = QLatin1String("/name/");
@@ -1080,7 +1082,6 @@
}
void IMDBFetcher::doCover(const QString& str_, Tellico::Data::EntryPtr entry_, const \
KUrl& baseURL_) {
- // cover is the img with the "cover" alt text
QRegExp imgRx(QLatin1String("<img\\s+[^>]*src\\s*=\\s*\"([^\"]*)\"[^>]*>"), \
Qt::CaseInsensitive); imgRx.setMinimal(true);
@@ -1096,30 +1097,58 @@
QString id = ImageFactory::addImage(u, true);
if(!id.isEmpty()) {
entry_->setField(cover, id);
- }
return;
}
+ }
pos = posterRx.indexIn(str_, pos+posterRx.matchedLength());
}
// didn't find the cover, IMDb also used to put "cover" inside the url
+ // cover is the img with the "cover" alt text
+
pos = imgRx.indexIn(str_);
while(pos > -1) {
- if(imgRx.cap(0).contains(cover, Qt::CaseInsensitive)) {
+ const QString url = imgRx.cap(0).toLower();
+ if(url.contains(cover)) {
KUrl u(baseURL_, imgRx.cap(1));
QString id = ImageFactory::addImage(u, true);
if(!id.isEmpty()) {
entry_->setField(cover, id);
- }
return;
}
+ }
pos = imgRx.indexIn(str_, pos+imgRx.matchedLength());
}
+
+ // also check for <link rel='image_src'
+ QRegExp linkRx(QLatin1String("<link (.*)>"), Qt::CaseInsensitive);
+ linkRx.setMinimal(true);
+
+ const QString src = QLatin1String("image_src");
+
+ pos = linkRx.indexIn(str_);
+ while(pos > -1) {
+ const QString tag = linkRx.cap(1);
+ if(tag.contains(src, Qt::CaseInsensitive)) {
+ QRegExp hrefRx(QLatin1String("href=['\"](.*)['\"]"), Qt::CaseInsensitive);
+ hrefRx.setMinimal(true);
+ if(hrefRx.indexIn(tag) > -1) {
+ KUrl u(baseURL_, hrefRx.cap(1));
+ QString id = ImageFactory::addImage(u, true);
+ if(!id.isEmpty()) {
+ entry_->setField(cover, id);
+ return;
}
+ }
+ }
+ pos = linkRx.indexIn(str_, pos+linkRx.matchedLength());
+ }
+}
// look at every anchor tag in the string
void IMDBFetcher::doLists(const QString& str_, Tellico::Data::EntryPtr entry_) {
const QString genre = QLatin1String("/Genres/");
+ const QString genre2 = QLatin1String("/genre/");
const QString country = QLatin1String("/country/");
const QString lang = QLatin1String("/language/");
const QString colorInfo = QLatin1String("colors=");
@@ -1130,7 +1159,7 @@
// if we reach faqs or user comments, we can stop
const QString faqs = QLatin1String("/faq");
const QString users = QLatin1String("/user/");
- // IIMdb also has links with the word "sections" in them, remove that
+ // IMdb also has links with the word "sections" in them, remove that
// for genres and nationalities
int startPos = str_.indexOf(QLatin1String("<div id=\"pagecontent\">"));
@@ -1141,7 +1170,7 @@
QStringList genres, countries, langs, certs, tracks;
for(int pos = s_anchorRx->indexIn(str_, startPos); pos > -1; pos = \
s_anchorRx->indexIn(str_, pos+s_anchorRx->matchedLength())) { const QString cap1 = \
s_anchorRx->cap(1);
- if(cap1.contains(genre)) {
+ if(cap1.contains(genre) || cap1.contains(genre2)) {
if(!s_anchorRx->cap(2).contains(QLatin1String(" section"), \
Qt::CaseInsensitive)) { genres += s_anchorRx->cap(2).trimmed();
}
@@ -1167,6 +1196,9 @@
}
}
+ // since we have multiple genre search strings
+ genres.removeDuplicates();
+
entry_->setField(QLatin1String("genre"), \
genres.join(FieldFormat::delimiterString())); \
entry_->setField(QLatin1String("nationality"), \
countries.join(FieldFormat::delimiterString())); \
entry_->setField(QLatin1String("language"), \
langs.join(FieldFormat::delimiterString()));
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic