diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1d3e474 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/strigi-0.7.7.tar.xz diff --git a/.strigi.metadata b/.strigi.metadata new file mode 100644 index 0000000..4060a9f --- /dev/null +++ b/.strigi.metadata @@ -0,0 +1 @@ +eea57998206050aca779f1fbcb326a0a3d69f153 SOURCES/strigi-0.7.7.tar.xz diff --git a/SOURCES/0001-Fix-xpm-and-xbm-index.patch b/SOURCES/0001-Fix-xpm-and-xbm-index.patch new file mode 100644 index 0000000..b199ea2 --- /dev/null +++ b/SOURCES/0001-Fix-xpm-and-xbm-index.patch @@ -0,0 +1,70 @@ +From 35cf4a4818a0d1bc7cda07e29f271360e06443a0 Mon Sep 17 00:00:00 2001 +From: Weng Xuetian +Date: Mon, 12 Dec 2011 09:12:35 +0000 +Subject: [PATCH 1/8] Fix xpm and xbm index. + +REVIEW: 103368 +--- + plugins/lineplugins/xpmlineanalyzer.cpp | 10 +++++----- + plugins/throughplugins/xbmthroughanalyzer.cpp | 2 +- + 2 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/plugins/lineplugins/xpmlineanalyzer.cpp b/plugins/lineplugins/xpmlineanalyzer.cpp +index 980b2f7..c87a071 100644 +--- a/plugins/lineplugins/xpmlineanalyzer.cpp ++++ b/plugins/lineplugins/xpmlineanalyzer.cpp +@@ -65,7 +65,7 @@ XpmLineAnalyzer::handleLine(const char* data, uint32_t length) { + uint32_t i = 0; + // we have found the line which should contain the information we want + ready = true; +- // read the height ++ // read the width + uint32_t propertyValue = 0; + i++; + while (i < length && isdigit(data[i])) { +@@ -76,9 +76,9 @@ XpmLineAnalyzer::handleLine(const char* data, uint32_t length) { + if (i >= length || data[i] != ' ') + return; + +- analysisResult->addValue(factory->heightField, propertyValue); ++ analysisResult->addValue(factory->widthField, propertyValue); + +- // read the width ++ // read the height + propertyValue = 0; + i++; + while (i < length && isdigit(data[i])) { +@@ -89,7 +89,7 @@ XpmLineAnalyzer::handleLine(const char* data, uint32_t length) { + if (i >= length || data[i] != ' ') + return; + +- analysisResult->addValue(factory->widthField, propertyValue); ++ analysisResult->addValue(factory->heightField, propertyValue); + + // read the number of colors + propertyValue = 0; +@@ -103,7 +103,7 @@ XpmLineAnalyzer::handleLine(const char* data, uint32_t length) { + return; + + analysisResult->addValue(factory->numberOfColorsField, propertyValue); +- analysisResult->addValue(factory->typeField, "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Image"); ++ analysisResult->addValue(factory->typeField, "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#RasterImage"); + } + bool + XpmLineAnalyzer::isReadyWithStream() { +diff --git a/plugins/throughplugins/xbmthroughanalyzer.cpp b/plugins/throughplugins/xbmthroughanalyzer.cpp +index e926e84..e3f35cc 100644 +--- a/plugins/throughplugins/xbmthroughanalyzer.cpp ++++ b/plugins/throughplugins/xbmthroughanalyzer.cpp +@@ -145,7 +145,7 @@ XbmThroughAnalyzer::connectInputStream(InputStream* in) { + } + } + +- analysisResult->addValue(factory->typeField, "http://www.semanticdesktop.org/ontologies/nfo#Cursor"); ++ analysisResult->addValue(factory->typeField, "http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#Cursor"); + + return in; + } +-- +1.7.10.4 + diff --git a/SOURCES/0001-Minor.-Fix-grammar-typo-in-cmake-output.patch b/SOURCES/0001-Minor.-Fix-grammar-typo-in-cmake-output.patch new file mode 100644 index 0000000..26881c1 --- /dev/null +++ b/SOURCES/0001-Minor.-Fix-grammar-typo-in-cmake-output.patch @@ -0,0 +1,26 @@ +From 24484bfb91b89ab5e0ef799252e69212ce94bde8 Mon Sep 17 00:00:00 2001 +From: Shaun Reich +Date: Fri, 23 Dec 2011 00:03:20 -0500 +Subject: [PATCH 1/3] Minor. Fix (grammar) typo in cmake output. + +SVN_SILENT +--- + cmake/FindCppUnit.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/FindCppUnit.cmake b/cmake/FindCppUnit.cmake +index adaaeb6..f983e10 100644 +--- a/cmake/FindCppUnit.cmake ++++ b/cmake/FindCppUnit.cmake +@@ -33,7 +33,7 @@ ELSE(CPPUNIT_INCLUDE_DIR AND CPPUNIT_LIBRARIES) + FIND_PATH(CPPUNIT_CFLAGS cppunit/TestRunner.h PATHS /usr/include /usr/local/include ) + FIND_LIBRARY(CPPUNIT_LIBRARIES NAMES cppunit PATHS /usr/lib /usr/local/lib ) + # how can we find cppunit version? +- MESSAGE (STATUS "Ensure you cppunit installed version is at least ${CPPUNIT_MIN_VERSION}") ++ MESSAGE (STATUS "Ensure your cppunit installed version is at least ${CPPUNIT_MIN_VERSION}") + SET (CPPUNIT_INSTALLED_VERSION ${CPPUNIT_MIN_VERSION}) + ENDIF(CPPUNIT_CONFIG_EXECUTABLE) + +-- +1.7.10.4 + diff --git a/SOURCES/0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch b/SOURCES/0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch new file mode 100644 index 0000000..3a17cba --- /dev/null +++ b/SOURCES/0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch @@ -0,0 +1,131 @@ +From 92df13d9b5fab8259a85315eb2c277546d20d4a6 Mon Sep 17 00:00:00 2001 +From: Sebastian Trueg +Date: Fri, 10 Feb 2012 13:31:31 +0100 +Subject: [PATCH 2/8] Extract tracknumber and track count from a value like + "03/16". + +REVIEW: 103911 +--- + lib/endanalyzers/flacendanalyzer.cpp | 16 +++++++++++-- + lib/throughanalyzers/oggthroughanalyzer.cpp | 33 ++++++++++++++++++++++----- + 2 files changed, 41 insertions(+), 8 deletions(-) + +diff --git a/lib/endanalyzers/flacendanalyzer.cpp b/lib/endanalyzers/flacendanalyzer.cpp +index c043872..8877d1d 100644 +--- a/lib/endanalyzers/flacendanalyzer.cpp ++++ b/lib/endanalyzers/flacendanalyzer.cpp +@@ -52,6 +52,8 @@ const string + NMM_DRAFT "musicBrainzAlbumID"), + discNumberPropertyName( + NMM_DRAFT "setNumber"), ++ albumTrackCountName( ++ NMM_DRAFT "albumTrackCount"), + + musicClassName( + NMM_DRAFT "MusicPiece"), +@@ -226,7 +228,17 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + const string value(p2+eq+1, size-eq-1); + + if (iter != factory->fields.end()) { +- indexable.addValue(iter->second, value); ++ // Hack: the tracknumber sometimes contains the track count, too ++ int pos = 0; ++ if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) { ++ // the track number ++ indexable.addValue(iter->second, value.substr(0, pos)); ++ // the track count ++ addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1)); ++ } ++ else { ++ indexable.addValue(iter->second, value); ++ } + } else if(name=="artist") { + artist = value; + } else if(name=="lyrics") { +@@ -261,7 +273,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + indexable.addTriplet(publisherUri, fullnamePropertyName, value); + } else if(name=="performer") { + performer = value; +- } ++ } + } + } else { + m_error = "problem with tag size"; +diff --git a/lib/throughanalyzers/oggthroughanalyzer.cpp b/lib/throughanalyzers/oggthroughanalyzer.cpp +index 26faa15..741b28e 100644 +--- a/lib/throughanalyzers/oggthroughanalyzer.cpp ++++ b/lib/throughanalyzers/oggthroughanalyzer.cpp +@@ -37,6 +37,8 @@ const string + "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#fullname"), + titlePropertyName( + "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title"), ++ albumTrackCountName( ++ NMM_DRAFT "albumTrackCount"), + + musicClassName( + NMM_DRAFT "MusicPiece"), +@@ -68,6 +70,14 @@ OggThroughAnalyzerFactory::registerFields(FieldRegister& r) { + fields["type"] = r.typeField; + } + ++inline ++void ++addStatement(AnalysisResult* indexable, string& subject, const string& predicate, const string& object) { ++ if (subject.empty()) ++ subject = indexable->newAnonymousUri(); ++ indexable->addTriplet(subject, predicate, object); ++} ++ + void + OggThroughAnalyzer::setIndexable(AnalysisResult* i) { + indexable = i; +@@ -129,6 +139,7 @@ OggThroughAnalyzer::connectInputStream(InputStream* in) { + // but for the composer in calssical music. Thus, we cache both and make the decision + // at the end + string artist, performer; ++ string albumUri; + + // read all the comments + p2 += 4; +@@ -151,15 +162,21 @@ OggThroughAnalyzer::connectInputStream(InputStream* in) { + = factory->fields.find(name); + string value(p2+eq+1, size-eq-1); + if (iter != factory->fields.end()) { +- indexable->addValue(iter->second, value); ++ // Hack: the tracknumber sometimes contains the track count, too ++ int pos = 0; ++ if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) { ++ // the track number ++ indexable->addValue(iter->second, value.substr(0, pos)); ++ // the track count ++ addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1)); ++ } ++ else { ++ indexable->addValue(iter->second, value); ++ } + } else if(name=="artist") { + artist = value; + } else if(name=="album") { +- string albumUri = indexable->newAnonymousUri(); +- +- indexable->addValue(factory->albumField, albumUri); +- indexable->addTriplet(albumUri, typePropertyName, albumClassName); +- indexable->addTriplet(albumUri, titlePropertyName, value); ++ addStatement(indexable, albumUri, titlePropertyName, value); + } else if(name=="composer") { + string composerUri = indexable->newAnonymousUri(); + +@@ -206,6 +223,10 @@ OggThroughAnalyzer::connectInputStream(InputStream* in) { + indexable->addTriplet(performerUri, typePropertyName, contactClassName); + indexable->addTriplet(performerUri, fullnamePropertyName, performer); + } ++ if(!albumUri.empty()) { ++ indexable->addValue(factory->albumField, albumUri); ++ indexable->addTriplet(albumUri, typePropertyName, albumClassName); ++ } + + // set the "codec" value + indexable->addValue(factory->fields.find("codec")->second, "Ogg/Vorbis"); +-- +1.7.10.4 + diff --git a/SOURCES/0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch b/SOURCES/0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch new file mode 100644 index 0000000..b6f9d1e --- /dev/null +++ b/SOURCES/0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch @@ -0,0 +1,27 @@ +From a8e97f672325557b3fbc84b987299350ec5ac10b Mon Sep 17 00:00:00 2001 +From: Rex Dieter +Date: Mon, 9 Jan 2012 11:13:25 -0600 +Subject: [PATCH 2/3] gcc47 fix, unistd.h header required unconditionally for + 'sleep' + +--- + bin/daemon/eventlistener/eventlistenerqueue.cpp | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/bin/daemon/eventlistener/eventlistenerqueue.cpp b/bin/daemon/eventlistener/eventlistenerqueue.cpp +index 9fcdcb1..1e97599 100644 +--- a/bin/daemon/eventlistener/eventlistenerqueue.cpp ++++ b/bin/daemon/eventlistener/eventlistenerqueue.cpp +@@ -27,9 +27,7 @@ + #include + #include + +-#if defined(__SUNPRO_CC) + #include +-#endif + + using namespace std; + +-- +1.7.10.4 + diff --git a/SOURCES/0003-Fix-return-value-wrong-type.patch b/SOURCES/0003-Fix-return-value-wrong-type.patch new file mode 100644 index 0000000..b6c129a --- /dev/null +++ b/SOURCES/0003-Fix-return-value-wrong-type.patch @@ -0,0 +1,54 @@ +From 1a291699d17fab42aa35505093e09efab8141eb2 Mon Sep 17 00:00:00 2001 +From: Christoph Feck +Date: Wed, 21 Mar 2012 23:36:56 +0100 +Subject: [PATCH 3/3] Fix return value (wrong type) + +--- + bin/daemon/dbus/dbustest.cpp | 2 +- + bin/daemon/socketserver.cpp | 6 +++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/bin/daemon/dbus/dbustest.cpp b/bin/daemon/dbus/dbustest.cpp +index 4f86376..c065f1a 100644 +--- a/bin/daemon/dbus/dbustest.cpp ++++ b/bin/daemon/dbus/dbustest.cpp +@@ -67,7 +67,7 @@ serverthread(void*) { + } + if (DBUS_REQUEST_NAME_REPLY_PRIMARY_OWNER != ret) { + fprintf(stderr, "Not Primary Owner (%d)\n", ret); +- return false; ++ return 0; + } + + for (int i=0; i<10; ++i) { +diff --git a/bin/daemon/socketserver.cpp b/bin/daemon/socketserver.cpp +index 7212b0d..777831e 100644 +--- a/bin/daemon/socketserver.cpp ++++ b/bin/daemon/socketserver.cpp +@@ -54,7 +54,7 @@ SocketServer::run(void*) { + sd = socket(AF_UNIX, SOCK_STREAM, 0); + if(sd < 0) { + perror("cannot open socket "); +- return false; ++ return 0; + } + + /* set the address */ +@@ -67,12 +67,12 @@ SocketServer::run(void*) { + sock.sun_family = AF_UNIX; + if (bind(sd, (struct sockaddr *)&sock, sizeof(sock))<0) { + perror("cannot bind port "); +- return false; ++ return 0; + } + + if (::listen(sd, 5) < 0) { + perror("cannot listen to port"); +- return false; ++ return 0; + } + + while (interface->isActive()) { +-- +1.7.10.4 + diff --git a/SOURCES/0003-Fixed-indexing-of-m3u-files.patch b/SOURCES/0003-Fixed-indexing-of-m3u-files.patch new file mode 100644 index 0000000..3d79631 --- /dev/null +++ b/SOURCES/0003-Fixed-indexing-of-m3u-files.patch @@ -0,0 +1,114 @@ +From ee49b5e1a8065ae7823c1ee091ace2e7741059e9 Mon Sep 17 00:00:00 2001 +From: Sebastian Trueg +Date: Mon, 13 Feb 2012 09:20:47 +0100 +Subject: [PATCH 3/8] Fixed indexing of m3u files. + +The analyzer now constructs absolute paths to the containing files and +creates nie:links relations to them. nie:hasLogicalPart does not make +much sense since the audio files are not actually contained in the +playlist, they are just referenced. NFO specifies the usage of rdf:list. +However, that is not supported in Nepomuk at the moment. Thus, we +cannot put any order on the linked audio files yet. + +REVIEW: 103961 +--- + lib/lineanalyzers/m3ustreamanalyzer.cpp | 33 ++++++++++++++++++++++++++----- + lib/lineanalyzers/m3ustreamanalyzer.h | 4 ++++ + 2 files changed, 32 insertions(+), 5 deletions(-) + +diff --git a/lib/lineanalyzers/m3ustreamanalyzer.cpp b/lib/lineanalyzers/m3ustreamanalyzer.cpp +index dacc15a..c20b28a 100644 +--- a/lib/lineanalyzers/m3ustreamanalyzer.cpp ++++ b/lib/lineanalyzers/m3ustreamanalyzer.cpp +@@ -26,13 +26,16 @@ + #include + #include + ++#include ++#include ++ + // AnalyzerFactory + void M3uLineAnalyzerFactory::registerFields(Strigi::FieldRegister& reg) + { + // track list length is easily obtained via API + // tracksField = reg.registerField(); + trackPathField = reg.registerField( +- "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#hasLogicalPart"); ++ "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#links"); + m3uTypeField = reg.registerField( + "http://freedesktop.org/standards/xesam/1.0/core#formatSubtype"); + typeField = reg.typeField; +@@ -43,7 +46,7 @@ void M3uLineAnalyzerFactory::registerFields(Strigi::FieldRegister& reg) + } + + // Analyzer +-void M3uLineAnalyzer::startAnalysis(Strigi::AnalysisResult* i) ++void M3uLineAnalyzer::startAnalysis(Strigi::AnalysisResult* i) + { + extensionOk = i->extension() == "m3u" || i->extension() == "M3U"; + +@@ -52,7 +55,24 @@ void M3uLineAnalyzer::startAnalysis(Strigi::AnalysisResult* i) + count = 0; + } + +-void M3uLineAnalyzer::handleLine(const char* data, uint32_t length) ++std::string M3uLineAnalyzer::constructAbsolutePath(const std::string &relative) const ++{ ++ if(char* buf = realpath(analysisResult->path().c_str(), 0)) { ++#ifdef _WIN32 ++ static const char s_pathSeparator = '\\'; ++#else ++ static const char s_pathSeparator = '/'; ++#endif ++ std::string path(buf); ++ free(buf); ++ return path.substr(0, path.rfind(s_pathSeparator)+1) + relative; ++ } ++ else { ++ return std::string(); ++ } ++} ++ ++void M3uLineAnalyzer::handleLine(const char* data, uint32_t length) + { + if (!extensionOk) + return; +@@ -68,8 +88,11 @@ void M3uLineAnalyzer::handleLine(const char* data, uint32_t length) + //if (line == 1) + // analysisResult->addValue(factory->m3uTypeField, "simple"); + +- // TODO: Check for a valid url with QUrl +- analysisResult->addValue(factory->trackPathField, std::string(data, length)); ++ // we create absolute paths and drop links to non-existing files ++ const std::string path = constructAbsolutePath(std::string(data, length)); ++ if(!access(path.c_str(), F_OK)) { ++ analysisResult->addValue(factory->trackPathField, path); ++ } + + ++count; + } else if (line == 1 && strncmp(data, "#EXTM3U", 7) == 0) { +diff --git a/lib/lineanalyzers/m3ustreamanalyzer.h b/lib/lineanalyzers/m3ustreamanalyzer.h +index 461def3..9033f14 100644 +--- a/lib/lineanalyzers/m3ustreamanalyzer.h ++++ b/lib/lineanalyzers/m3ustreamanalyzer.h +@@ -26,6 +26,8 @@ + #include + #include + ++#include ++ + class M3uLineAnalyzerFactory; + + class M3uLineAnalyzer : public Strigi::StreamLineAnalyzer +@@ -37,6 +39,8 @@ private: + bool extensionOk; + int32_t count; + ++ std::string constructAbsolutePath(const std::string& relative) const; ++ + public: + M3uLineAnalyzer(const M3uLineAnalyzerFactory* f) : factory(f) {} + ~M3uLineAnalyzer() {} +-- +1.7.10.4 + diff --git a/SOURCES/0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch b/SOURCES/0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch new file mode 100644 index 0000000..ca18bf4 --- /dev/null +++ b/SOURCES/0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch @@ -0,0 +1,85 @@ +From 0fb8aee587436663cbb60bf743882c70954f8683 Mon Sep 17 00:00:00 2001 +From: Vishesh Handa +Date: Tue, 14 Feb 2012 21:50:13 +0530 +Subject: [PATCH 4/8] Fix FLAC Files: Remove addtional 'db' in replaygain + +FLAC files have a parameter named "replaygain" which is in the format " dB", +and because of that Nepomuk cannot parse it as a float. + +REVIEW: 103977 +--- + lib/endanalyzers/flacendanalyzer.cpp | 41 ++++++++++++++++++++++------------ + 1 file changed, 27 insertions(+), 14 deletions(-) + +diff --git a/lib/endanalyzers/flacendanalyzer.cpp b/lib/endanalyzers/flacendanalyzer.cpp +index 8877d1d..7e34270 100644 +--- a/lib/endanalyzers/flacendanalyzer.cpp ++++ b/lib/endanalyzers/flacendanalyzer.cpp +@@ -124,6 +124,16 @@ addStatement(AnalysisResult& indexable, string& subject, const string& predicate + indexable.addTriplet(subject, predicate, object); + } + ++string ++removeAlphabets(const string& str) { ++ std::string newStr; ++ newStr.reserve(str.length()); ++ for( int i=0; i::const_iterator iter + = factory->fields.find(name); +- const string value(p2+eq+1, size-eq-1); ++ string value(p2+eq+1, size-eq-1); + + if (iter != factory->fields.end()) { +- // Hack: the tracknumber sometimes contains the track count, too +- int pos = 0; +- if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) { +- // the track number +- indexable.addValue(iter->second, value.substr(0, pos)); +- // the track count +- addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1)); +- } +- else { +- indexable.addValue(iter->second, value); +- } ++ // Hack: the tracknumber sometimes contains the track count, too ++ int pos = 0; ++ if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) { ++ // the track number ++ indexable.addValue(iter->second, value.substr(0, pos)); ++ // the track count ++ addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1)); ++ } ++ else { ++ if(name == "replaygain_track_gain") ++ value = removeAlphabets(value); ++ indexable.addValue(iter->second, value); ++ } + } else if(name=="artist") { + artist = value; + } else if(name=="lyrics") { +@@ -256,9 +268,10 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + } else if(name=="trackcount") { + addStatement(indexable, albumUri, albumTrackCountPropertyName, value); + } else if(name=="replaygain_album_gain") { +- addStatement(indexable, albumUri, albumGainPropertyName, value); ++ // the gain is often in the form "number dB", the appending "dB" must be removed ++ addStatement(indexable, albumUri, albumGainPropertyName, removeAlphabets(value)); + } else if(name=="replaygain_album_peak") { +- addStatement(indexable, albumUri, albumPeakGainPropertyName, value); ++ addStatement(indexable, albumUri, albumPeakGainPropertyName, removeAlphabets(value)); + } else if(name=="composer") { + const string composerUri( indexable.newAnonymousUri() ); + +-- +1.7.10.4 + diff --git a/SOURCES/0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch b/SOURCES/0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch new file mode 100644 index 0000000..7f2849b --- /dev/null +++ b/SOURCES/0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch @@ -0,0 +1,103 @@ +From f1c837823b6dde8464f46ccb02a2c91eff69bee0 Mon Sep 17 00:00:00 2001 +From: Ignacio Serantes +Date: Thu, 14 Jun 2012 20:15:10 +0200 +Subject: [PATCH 5/8] Fix: flac analizer was importing only one artist tag. + REVIEW:105208 + +--- + lib/endanalyzers/flacendanalyzer.cpp | 37 +++++++++++++++++++++------------- + 1 file changed, 23 insertions(+), 14 deletions(-) + +diff --git a/lib/endanalyzers/flacendanalyzer.cpp b/lib/endanalyzers/flacendanalyzer.cpp +index 7e34270..0a7d680 100644 +--- a/lib/endanalyzers/flacendanalyzer.cpp ++++ b/lib/endanalyzers/flacendanalyzer.cpp +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + using namespace Strigi; + using namespace std; + +@@ -212,7 +213,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + // in Vorbis comments the "artist" field is used for the performer in modern music + // but for the composer in calssical music. Thus, we cache both and make the decision + // at the end +- string artist, performer; ++ list artists, performers; + + // read all the comments + p2 += 4; +@@ -252,7 +253,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + indexable.addValue(iter->second, value); + } + } else if(name=="artist") { +- artist = value; ++ artists.push_back(value); + } else if(name=="lyrics") { + indexable.addText(value.c_str(), + (int32_t)value.length()); +@@ -285,7 +286,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + indexable.addTriplet(publisherUri, typePropertyName, contactClassName); + indexable.addTriplet(publisherUri, fullnamePropertyName, value); + } else if(name=="performer") { +- performer = value; ++ performers.push_back(value); + } + } + } else { +@@ -298,8 +299,8 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + // we now decide how to store the artist and performer as suggested by the Vorbis comments spec + const Strigi::RegisteredField* artistField = 0; + const Strigi::RegisteredField* performerField = 0; +- if (!artist.empty()) { +- if (!performer.empty()) { ++ if (!artists.empty()) { ++ if (!performers.empty()) { + artistField = factory->composerField; + performerField = factory->performerField; + } +@@ -307,22 +308,30 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + artistField = factory->performerField; + } + } +- else if (!performer.empty()) { ++ else if (!performers.empty()) { + performerField = factory->performerField; + } + if (artistField) { +- const string artistUri( indexable.newAnonymousUri() ); ++ list::iterator aIt; + +- indexable.addValue(artistField, artistUri); +- indexable.addTriplet(artistUri, typePropertyName, contactClassName); +- indexable.addTriplet(artistUri, fullnamePropertyName, artist); ++ for(aIt=artists.begin(); aIt != artists.end(); ++aIt) { ++ const string artistUri( indexable.newAnonymousUri() ); ++ ++ indexable.addValue(artistField, artistUri); ++ indexable.addTriplet(artistUri, typePropertyName, contactClassName); ++ indexable.addTriplet(artistUri, fullnamePropertyName, *aIt); ++ } + } + if (performerField) { +- const string performerUri( indexable.newAnonymousUri() ); ++ list::iterator pIt; + +- indexable.addValue(performerField, performerUri); +- indexable.addTriplet(performerUri, typePropertyName, contactClassName); +- indexable.addTriplet(performerUri, fullnamePropertyName, performer); ++ for(pIt=performers.begin(); pIt != performers.end(); ++pIt) { ++ const string performerUri( indexable.newAnonymousUri() ); ++ ++ indexable.addValue(performerField, performerUri); ++ indexable.addTriplet(performerUri, typePropertyName, contactClassName); ++ indexable.addTriplet(performerUri, fullnamePropertyName, *pIt); ++ } + } + + if(!albumUri.empty()) { +-- +1.7.10.4 + diff --git a/SOURCES/0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch b/SOURCES/0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch new file mode 100644 index 0000000..6fed939 --- /dev/null +++ b/SOURCES/0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch @@ -0,0 +1,666 @@ +From 17e9cdbdee9b3b3cdccab416004a99547a7196a7 Mon Sep 17 00:00:00 2001 +From: Ignacio Serantes +Date: Sun, 17 Jun 2012 20:39:36 +0200 +Subject: [PATCH 6/8] Fix: non numeric genres in id3 v2 mp3 are ignored. + REVIEW:105242. + +--- + id3endanalyzer.cpp | 646 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 646 insertions(+) + create mode 100644 id3endanalyzer.cpp + +diff --git a/id3endanalyzer.cpp b/id3endanalyzer.cpp +new file mode 100644 +index 0000000..677ece0 +--- /dev/null ++++ b/id3endanalyzer.cpp +@@ -0,0 +1,646 @@ ++/* This file is part of Strigi Desktop Search ++ * ++ * Copyright (C) 2006 Jos van den Oever ++ * 2009 Evgeny Egorochkin ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Library General Public ++ * License as published by the Free Software Foundation; either ++ * version 2 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Library General Public License for more details. ++ * ++ * You should have received a copy of the GNU Library General Public License ++ * along with this library; see the file COPYING.LIB. If not, write to ++ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ++ * Boston, MA 02110-1301, USA. ++ */ ++ ++#ifdef HAVE_CONFIG_H ++# include "config.h" ++#endif ++ ++#include "id3endanalyzer.h" ++#include "analysisresult.h" ++#include "../rdfnamespaces.h" ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef ICONV_SECOND_ARGUMENT_IS_CONST ++ #define ICONV_CONST const ++#else ++ #define ICONV_CONST ++#endif ++ ++using namespace Strigi; ++using namespace std; ++ ++const string ++ typePropertyName( ++ RDF "type"), ++ fullnamePropertyName( ++ NCO "fullname"), ++ titlePropertyName( ++ NIE "title"), ++ albumTrackCountPropertyName( ++ NMM_DRAFT "albumTrackCount"), ++ discNumberPropertyName( ++ NMM_DRAFT "setNumber"), ++ discCountPropertyName( ++ NMM_DRAFT "setSize"), ++ ++ musicClassName( ++ NMM_DRAFT "MusicPiece"), ++ audioClassName( ++ NFO "Audio"), ++ albumClassName( ++ NMM_DRAFT "MusicAlbum"), ++ contactClassName( ++ NCO "Contact"); ++ ++/* ++ENCA autodetection of broken encodings. First, need to make sure it's going to be actually useful. ++ID3v2.0 ++play counter:needs nepomuk resolution ++replaygain +++lyrics +++Improve: ++ creation date: ++ language: support multiple ++ Genre ++ album art type handling ++VBR detection ++*/ ++ ++static const string genres[148] = { ++ "Blues", ++ "Classic Rock", ++ "Country", ++ "Dance", ++ "Disco", ++ "Funk", ++ "Grunge", ++ "Hip-Hop", ++ "Jazz", ++ "Metal", ++ "New Age", ++ "Oldies", ++ "Other", ++ "Pop", ++ "R&B", ++ "Rap", ++ "Reggae", ++ "Rock", ++ "Techno", ++ "Industrial", ++ "Alternative", ++ "Ska", ++ "Death Metal", ++ "Pranks", ++ "Soundtrack", ++ "Euro-Techno", ++ "Ambient", ++ "Trip-Hop", ++ "Vocal", ++ "Jazz+Funk", ++ "Fusion", ++ "Trance", ++ "Classical", ++ "Instrumental", ++ "Acid", ++ "House", ++ "Game", ++ "Sound Clip", ++ "Gospel", ++ "Noise", ++ "Alternative Rock", ++ "Bass", ++ "Soul", ++ "Punk", ++ "Space", ++ "Meditative", ++ "Instrumental Pop", ++ "Instrumental Rock", ++ "Ethnic", ++ "Gothic", ++ "Darkwave", ++ "Techno-Industrial", ++ "Electronic", ++ "Pop-Folk", ++ "Eurodance", ++ "Dream", ++ "Southern Rock", ++ "Comedy", ++ "Cult", ++ "Gangsta", ++ "Top 40", ++ "Christian Rap", ++ "Pop/Funk", ++ "Jungle", ++ "Native American", ++ "Cabaret", ++ "New Wave", ++ "Psychedelic", ++ "Rave", ++ "Showtunes", ++ "Trailer", ++ "Lo-Fi", ++ "Tribal", ++ "Acid Punk", ++ "Acid Jazz", ++ "Polka", ++ "Retro", ++ "Musical", ++ "Rock & Roll", ++ "Hard Rock", ++ "Folk", ++ "Folk/Rock", ++ "National Folk", ++ "Swing", ++ "Fusion", ++ "Bebop", ++ "Latin", ++ "Revival", ++ "Celtic", ++ "Bluegrass", ++ "Avantgarde", ++ "Gothic Rock", ++ "Progressive Rock", ++ "Psychedelic Rock", ++ "Symphonic Rock", ++ "Slow Rock", ++ "Big Band", ++ "Chorus", ++ "Easy Listening", ++ "Acoustic", ++ "Humour", ++ "Speech", ++ "Chanson", ++ "Opera", ++ "Chamber Music", ++ "Sonata", ++ "Symphony", ++ "Booty Bass", ++ "Primus", ++ "Porn Groove", ++ "Satire", ++ "Slow Jam", ++ "Club", ++ "Tango", ++ "Samba", ++ "Folklore", ++ "Ballad", ++ "Power Ballad", ++ "Rhythmic Soul", ++ "Freestyle", ++ "Duet", ++ "Punk Rock", ++ "Drum Solo", ++ "A Cappella", ++ "Euro-House", ++ "Dance Hall", ++ "Goa", ++ "Drum & Bass", ++ "Club-House", ++ "Hardcore", ++ "Terror", ++ "Indie", ++ "BritPop", ++ "Negerpunk", ++ "Polsk Punk", ++ "Beat", ++ "Christian Gangsta Rap", ++ "Heavy Metal", ++ "Black Metal", ++ "Crossover", ++ "Contemporary Christian", ++ "Christian Rock", ++ "Merengue", ++ "Salsa", ++ "Thrash Metal", ++ "Anime", ++ "Jpop", ++ "Synthpop" ++}; ++ ++const uint32_t bitrate [15] = {0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000 }; ++ ++const uint32_t samplerate[3] = {44100, 48000, 32000}; ++ ++const char * encodings[5] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8", "UTF-16LE"}; ++ ++#ifndef _GNU_SOURCE ++size_t ++strnlen(const char *s, size_t maxlen) { ++ for(size_t i=0; i10000 && capacity>len*8) ) { ++ capacity = len*3; ++ out = (char*)realloc(out, len*3); ++ } ++ ++ char *result = out; ++ size_t reslen = capacity; ++ ++ ICONV_CONST char *input = (char *)data; ++ iconv(conv, &input, &len, &result, &reslen); ++ ++ return string(out,capacity-reslen); ++} ++ ++void ++ID3EndAnalyzerFactory::registerFields(FieldRegister& r) { ++ createdField = r.registerField(NIE "contentCreated"); ++ subjectField = r.registerField(NIE "subject"); ++ titleField = r.registerField(titlePropertyName); ++ descriptionField = r.registerField(NIE "description"); ++ commentField = r.registerField(NIE "comment"); ++ albumField = r.registerField(NMM_DRAFT "musicAlbum"); ++ genreField = r.registerField(NMM_DRAFT "genre"); ++ composerField = r.registerField(NMM_DRAFT "composer"); ++ performerField = r.registerField(NMM_DRAFT "performer"); ++ lyricistField = r.registerField(NMM_DRAFT "lyricist"); ++ publisherField = r.registerField(NCO "publisher"); ++ languageField = r.registerField(NIE "language"); ++ copyrightField = r.registerField(NIE "copyright"); ++ trackNumberField = r.registerField(NMM_DRAFT "trackNumber"); ++ discNumberField = r.registerField(discNumberPropertyName); ++ durationField = r.registerField(NFO "duration"); ++ typeField = r.typeField; ++ ++ bitrateField = r.registerField(NFO "averageBitrate"); ++ samplerateField = r.registerField(NFO "sampleRate"); ++ codecField = r.registerField(NFO "codec"); ++ channelsField = r.registerField(NFO "channels"); ++} ++ ++inline ++void ++addStatement(AnalysisResult &indexable, string& subject, const string& predicate, const string& object) { ++ if (subject.empty()) ++ subject = indexable.newAnonymousUri(); ++ indexable.addTriplet(subject, predicate, object); ++} ++ ++inline ++int32_t readAsyncSize(const unsigned char* b) { ++ return (((int32_t)b[0])<<21) + (((int32_t)b[1])<<14) ++ + (((int32_t)b[2])<<7) + ((int32_t)b[3]); ++} ++ ++int32_t ++readSize(const unsigned char* b, bool async) { ++ const signed char* c = (const signed char*)b; ++ if (async) { ++ if (c[0] < 0 || c[1] < 0 || c[2] < 0 || c[3] < 0) ++ return -1; ++ return readAsyncSize(b); ++ } ++ return readBigEndianInt32(b); ++} ++bool ++ID3EndAnalyzer::checkHeader(const char* header, int32_t headersize) const { ++ const unsigned char* usbuf = (const unsigned char*)header; ++ int32_t i; ++ ++ for(i=0; (header[i] == '\0') && (i=6+i) ++ && ( ++ (strncmp("ID3", header+i, 3) == 0 // check that it's ID3 ++ && usbuf[3+i] <= 4 // only handle version <= 4 ++ && (usbuf[5+i]&~0x80) == 0) // we're too dumb too handle other flags ++ || ++ ((unsigned char)header[0+i] == 0xff && ((unsigned char)header[1+i]&0xfe) == 0xfa ++ && (unsigned char)header[2+i]>>4 != 0xf // MP3 frame header is ok too ++ && (((unsigned char)header[2+i]>>2)&3) != 3) ++ ); ++ ++} ++ ++static void trim(string& s,const string& drop = " ") ++{ ++ string r = s.erase(s.find_last_not_of(drop)+1); ++ r.erase(0, r.find_first_not_of(drop)); ++} ++ ++static bool extract_and_trim(const char* buf, int offset, int length, string& s) ++{ ++ // We're extracting here the ID3v1 tags and doing some sanity checks: ++ // 1) Strip of all leading and prefixed spaces ++ // 2) Test if string contains at least something ++ if (!buf[offset]) ++ return false; ++ ++ s = string(buf + offset, strnlen(buf + offset, length)); ++ trim(s); ++ // Return true if the extracted value is not empty (read: contains something) ++ return !s.empty(); ++} ++ ++signed char ++ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) { ++ const int max_padding = 1000; ++ if(!in) ++ return -1; ++ ++ bool found_title = false, found_artist = false, ++ found_album = false, found_comment = false, ++ found_year = false, found_track = false, ++ found_genre = false, found_tag = false; ++ string albumUri; ++ char albumArtNum = '\0'; ++ ++ // read 10 byte header ++ const char* buf; ++ int32_t nread = in->read(buf, 10+max_padding, 10+max_padding); ++ ++ // parse ID3v2* tag ++ ++ if (nread == 10+max_padding && strncmp("ID3", buf, 3) == 0) { // check for ID3 header ++ ++ bool async = buf[3] >= 4; ++ bool unsync = (buf[5] & 0x80)!=0; ++ ++ // calculate size from 4 syncsafe bytes ++ int32_t size = readAsyncSize((unsigned char*)buf+6); ++ if (size < 0 || size > 5000000) ++ return -1; ++ size += 10+4+max_padding; // add the size of the ID3 header, MP3 frame header and possible padding generated by LAME(workaround) ++ ++ // read the entire tag ++ in->reset(0); ++ nread = in->read(buf, size, size); ++ if (nread != size) ++ return -1; ++ ++ found_tag = true; ++ ++ const char* p = buf + 10; ++ buf += size-4-max_padding; ++ while (p < buf && *p) { ++ size = readSize((unsigned char*)p+4, async); ++ if (size <= 0 || size > (buf-p)-10) { ++ //cerr << "size < 0: " << size << endl; ++ break; ++ } ++ ++ string value; ++ uint8_t enc = p[10]; ++ const char *encoding = enc>4 ? encodings[0] : encodings[enc] ; ++ UTF8Convertor conv(encoding); ++ const char *decoded_value; ++ int32_t decoded_value_size; ++ string deunsyncbuf; ++ if (unsync) { ++ deunsyncbuf.reserve(size-1); ++ for(int32_t i = 0; iaddValue(factory->descriptionField, string(desc, desclen) ); ++ } else { ++ indexable.child()->addValue(factory->descriptionField, conv.convert(desc, desclen) ); ++ } ++ } ++ ++ indexable.finishIndexChild(); ++ } ++ } ++ } ++ ++ if (enc == 0 || enc == 3) { ++ value = string(decoded_value, strnlen(decoded_value, decoded_value_size)); ++ } else { ++ value = conv.convert(decoded_value, decoded_value_size); // FIXME: add similar workaround ++ } ++ ++ if (!value.empty()) { ++ if (strncmp("TIT1", p, 4) == 0) { ++ indexable.addValue(factory->subjectField, value); ++ } else if (strncmp("TIT2", p, 4) == 0) { ++ indexable.addValue(factory->titleField, value); ++ found_title = true; ++ } else if (strncmp("TIT3", p, 4) == 0) { ++ indexable.addValue(factory->descriptionField, value); ++ } else if (strncmp("TLAN", p, 4) == 0) { ++ indexable.addValue(factory->languageField, value); ++ } else if (strncmp("TCOP", p, 4) == 0) { ++ indexable.addValue(factory->copyrightField, value); ++ } else if ((strncmp("TDRL", p, 4) == 0) || ++ (strncmp("TDAT", p, 4) == 0) || ++ (strncmp("TYER", p, 4) == 0) || ++ (strncmp("TDRC", p, 4) == 0)) { ++ indexable.addValue(factory->createdField, value); ++ found_year = true; ++ } else if ((strncmp("TPE1", p, 4) == 0) || ++ (strncmp("TPE2", p, 4) == 0) || ++ (strncmp("TPE3", p, 4) == 0) || ++ (strncmp("TPE4", p, 4) == 0)) { ++ string performerUri = indexable.newAnonymousUri(); ++ ++ indexable.addValue(factory->performerField, performerUri); ++ indexable.addTriplet(performerUri, typePropertyName, contactClassName); ++ indexable.addTriplet(performerUri, fullnamePropertyName, value); ++ found_artist = true; ++ } else if ((strncmp("TPUB", p, 4) == 0) || ++ (strncmp("TENC", p, 4) == 0)) { ++ string publisherUri = indexable.newAnonymousUri(); ++ ++ indexable.addValue(factory->publisherField, publisherUri); ++ indexable.addTriplet(publisherUri, typePropertyName, contactClassName); ++ indexable.addTriplet(publisherUri, fullnamePropertyName, value); ++ } else if ((strncmp("TALB", p, 4) == 0) || ++ (strncmp("TOAL", p, 4) == 0)) { ++ addStatement(indexable, albumUri, titlePropertyName, value); ++ found_album = true; ++ } else if (strncmp("TCON", p, 4) == 0) { ++ // The Genre is stored as (number) ++ if( value[0] == '(' && value[value.length()-1] == ')' ) { ++ //vHanda: Maybe one should check if all the characters in between are digits ++ int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() ); ++ indexable.addValue(factory->genreField, genres[ genreIndex ]); ++ found_genre = true; ++ } else { ++ // We must not forget that genre could be a string. ++ if (!value.empty()) { ++ indexable.addValue(factory->genreField, value); ++ found_genre = true; ++ } ++ } ++ } else if (strncmp("TLEN", p, 4) == 0) { ++ indexable.addValue(factory->durationField, value); ++ } else if (strncmp("TEXT", p, 4) == 0) { ++ string lyricistUri = indexable.newAnonymousUri(); ++ ++ indexable.addValue(factory->lyricistField, lyricistUri); ++ indexable.addTriplet(lyricistUri, typePropertyName, contactClassName); ++ indexable.addTriplet(lyricistUri, fullnamePropertyName, value); ++ } else if (strncmp("TCOM", p, 4) == 0) { ++ string composerUri = indexable.newAnonymousUri(); ++ ++ indexable.addValue(factory->composerField, composerUri); ++ indexable.addTriplet(composerUri, typePropertyName, contactClassName); ++ indexable.addTriplet(composerUri, fullnamePropertyName, value); ++ } else if (strncmp("TRCK", p, 4) == 0) { ++ istringstream ins(value); ++ int tnum; ++ ins >> tnum; ++ if (!ins.fail()) { ++ indexable.addValue(factory->trackNumberField, tnum); ++ found_track = true; ++ ins.ignore(10,'/'); ++ int tcount; ++ ins >> tcount; ++ if (!ins.fail()) { ++ ostringstream outs; ++ outs << tcount; ++ addStatement(indexable, albumUri, albumTrackCountPropertyName, outs.str()); ++ } ++ } ++ } else if (strncmp("TPOS", p, 4) == 0) { ++ istringstream ins(value); ++ int dnum; ++ ins >> dnum; ++ if (!ins.fail()) { ++ indexable.addValue(factory->discNumberField, dnum); ++ ins.ignore(10,'/'); ++ int dcount; ++ ins >> dcount; ++ if (!ins.fail()) { ++ ostringstream outs; ++ outs << dcount; ++ addStatement(indexable, albumUri, discCountPropertyName, outs.str()); ++ } ++ } ++ } ++ } ++ p += size + 10; ++ } ++ } ++ // parse MP3 frame header ++ ++ int bitrateindex, samplerateindex; ++ int i; ++ for(i=0; (buf[i]=='\0') && (i>4)) != 0xf) ++ && ((samplerateindex = (((unsigned char)buf[2+i]>>2)&3)) != 3 )) { // is this MP3? ++ ++ indexable.addValue(factory->typeField, audioClassName); ++ // FIXME: no support for VBR :( ++ // ideas: compare bitrate from the frame with stream size/duration from ID3 tags ++ // check several consecutive frames to see if bitrate is different ++ // in neither case you can be sure to properly detected VBR :( ++ indexable.addValue(factory->bitrateField, bitrate[bitrateindex]); ++ indexable.addValue(factory->samplerateField, samplerate[samplerateindex]); ++ indexable.addValue(factory->codecField, "MP3"); ++ indexable.addValue(factory->channelsField, ((buf[3+i]>>6) == 3 ? 1:2 ) ); ++ } ++ ++ // Parse ID3v1 tag ++ ++ int64_t insize; ++ if ( (insize = in->size()) > (128+nread)) { ++ ++ // read the tag and check signature ++ int64_t nskip = insize-128-nread; ++ if (nskip == in->skip(nskip)) ++ if (in->read(buf, 128, 128)==128) ++ if (!strncmp("TAG", buf, 3)) { ++ ++ found_tag = true; ++ ++ std::string s; ++ ++ if (!found_title && extract_and_trim(buf, 3, 30, s)) { ++ indexable.addValue(factory->titleField, s); ++ } ++ if (!found_artist && extract_and_trim(buf, 33, 30, s)) { ++ const string performerUri = indexable.newAnonymousUri(); ++ indexable.addValue(factory->performerField, performerUri); ++ indexable.addTriplet(performerUri, typePropertyName, contactClassName); ++ indexable.addTriplet(performerUri, fullnamePropertyName, s); ++ } ++ if (!found_album && extract_and_trim(buf, 63, 30, s)) ++ addStatement(indexable, albumUri, titlePropertyName, s); ++ if (!found_year && extract_and_trim(buf, 93, 4, s)) ++ indexable.addValue(factory->createdField, s); ++ if (!found_comment && extract_and_trim(buf, 97, 30, s)) { ++ indexable.addValue(factory->commentField, s); ++ } ++ if (!found_track && !buf[125] && buf[126]) { ++ indexable.addValue(factory->trackNumberField, (int)(buf[126])); ++ } ++ if (!found_genre && (unsigned char)(buf[127]) < 148) ++ indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]); ++ } ++ } ++ ++ if(!albumUri.empty()) { ++ indexable.addValue(factory->albumField, albumUri); ++ indexable.addTriplet(albumUri, typePropertyName, albumClassName); ++ } ++ ++ if (found_tag) ++ indexable.addValue(factory->typeField, musicClassName); ++ ++ return 0; ++} +-- +1.7.10.4 + diff --git a/SOURCES/0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch b/SOURCES/0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch new file mode 100644 index 0000000..f376016 --- /dev/null +++ b/SOURCES/0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch @@ -0,0 +1,699 @@ +From 907162391395412c058d7339c4f84533ef92023d Mon Sep 17 00:00:00 2001 +From: Ignacio Serantes +Date: Sun, 17 Jun 2012 21:38:31 +0200 +Subject: [PATCH 7/8] Opps! Rmoving a wrong commited file, + "id3endanalyzer.cpp", and updating the right one + "lib/endanalyzers/id3endanalyzer.cpp". + +--- + id3endanalyzer.cpp | 646 ----------------------------------- + lib/endanalyzers/id3endanalyzer.cpp | 20 +- + 2 files changed, 13 insertions(+), 653 deletions(-) + delete mode 100644 id3endanalyzer.cpp + +diff --git a/id3endanalyzer.cpp b/id3endanalyzer.cpp +deleted file mode 100644 +index 677ece0..0000000 +--- a/id3endanalyzer.cpp ++++ /dev/null +@@ -1,646 +0,0 @@ +-/* This file is part of Strigi Desktop Search +- * +- * Copyright (C) 2006 Jos van den Oever +- * 2009 Evgeny Egorochkin +- * +- * This library is free software; you can redistribute it and/or +- * modify it under the terms of the GNU Library General Public +- * License as published by the Free Software Foundation; either +- * version 2 of the License, or (at your option) any later version. +- * +- * This library is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- * Library General Public License for more details. +- * +- * You should have received a copy of the GNU Library General Public License +- * along with this library; see the file COPYING.LIB. If not, write to +- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +- * Boston, MA 02110-1301, USA. +- */ +- +-#ifdef HAVE_CONFIG_H +-# include "config.h" +-#endif +- +-#include "id3endanalyzer.h" +-#include "analysisresult.h" +-#include "../rdfnamespaces.h" +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#ifdef ICONV_SECOND_ARGUMENT_IS_CONST +- #define ICONV_CONST const +-#else +- #define ICONV_CONST +-#endif +- +-using namespace Strigi; +-using namespace std; +- +-const string +- typePropertyName( +- RDF "type"), +- fullnamePropertyName( +- NCO "fullname"), +- titlePropertyName( +- NIE "title"), +- albumTrackCountPropertyName( +- NMM_DRAFT "albumTrackCount"), +- discNumberPropertyName( +- NMM_DRAFT "setNumber"), +- discCountPropertyName( +- NMM_DRAFT "setSize"), +- +- musicClassName( +- NMM_DRAFT "MusicPiece"), +- audioClassName( +- NFO "Audio"), +- albumClassName( +- NMM_DRAFT "MusicAlbum"), +- contactClassName( +- NCO "Contact"); +- +-/* +-ENCA autodetection of broken encodings. First, need to make sure it's going to be actually useful. +-ID3v2.0 +-play counter:needs nepomuk resolution +-replaygain +-+lyrics +-+Improve: +- creation date: +- language: support multiple +- Genre +- album art type handling +-VBR detection +-*/ +- +-static const string genres[148] = { +- "Blues", +- "Classic Rock", +- "Country", +- "Dance", +- "Disco", +- "Funk", +- "Grunge", +- "Hip-Hop", +- "Jazz", +- "Metal", +- "New Age", +- "Oldies", +- "Other", +- "Pop", +- "R&B", +- "Rap", +- "Reggae", +- "Rock", +- "Techno", +- "Industrial", +- "Alternative", +- "Ska", +- "Death Metal", +- "Pranks", +- "Soundtrack", +- "Euro-Techno", +- "Ambient", +- "Trip-Hop", +- "Vocal", +- "Jazz+Funk", +- "Fusion", +- "Trance", +- "Classical", +- "Instrumental", +- "Acid", +- "House", +- "Game", +- "Sound Clip", +- "Gospel", +- "Noise", +- "Alternative Rock", +- "Bass", +- "Soul", +- "Punk", +- "Space", +- "Meditative", +- "Instrumental Pop", +- "Instrumental Rock", +- "Ethnic", +- "Gothic", +- "Darkwave", +- "Techno-Industrial", +- "Electronic", +- "Pop-Folk", +- "Eurodance", +- "Dream", +- "Southern Rock", +- "Comedy", +- "Cult", +- "Gangsta", +- "Top 40", +- "Christian Rap", +- "Pop/Funk", +- "Jungle", +- "Native American", +- "Cabaret", +- "New Wave", +- "Psychedelic", +- "Rave", +- "Showtunes", +- "Trailer", +- "Lo-Fi", +- "Tribal", +- "Acid Punk", +- "Acid Jazz", +- "Polka", +- "Retro", +- "Musical", +- "Rock & Roll", +- "Hard Rock", +- "Folk", +- "Folk/Rock", +- "National Folk", +- "Swing", +- "Fusion", +- "Bebop", +- "Latin", +- "Revival", +- "Celtic", +- "Bluegrass", +- "Avantgarde", +- "Gothic Rock", +- "Progressive Rock", +- "Psychedelic Rock", +- "Symphonic Rock", +- "Slow Rock", +- "Big Band", +- "Chorus", +- "Easy Listening", +- "Acoustic", +- "Humour", +- "Speech", +- "Chanson", +- "Opera", +- "Chamber Music", +- "Sonata", +- "Symphony", +- "Booty Bass", +- "Primus", +- "Porn Groove", +- "Satire", +- "Slow Jam", +- "Club", +- "Tango", +- "Samba", +- "Folklore", +- "Ballad", +- "Power Ballad", +- "Rhythmic Soul", +- "Freestyle", +- "Duet", +- "Punk Rock", +- "Drum Solo", +- "A Cappella", +- "Euro-House", +- "Dance Hall", +- "Goa", +- "Drum & Bass", +- "Club-House", +- "Hardcore", +- "Terror", +- "Indie", +- "BritPop", +- "Negerpunk", +- "Polsk Punk", +- "Beat", +- "Christian Gangsta Rap", +- "Heavy Metal", +- "Black Metal", +- "Crossover", +- "Contemporary Christian", +- "Christian Rock", +- "Merengue", +- "Salsa", +- "Thrash Metal", +- "Anime", +- "Jpop", +- "Synthpop" +-}; +- +-const uint32_t bitrate [15] = {0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000 }; +- +-const uint32_t samplerate[3] = {44100, 48000, 32000}; +- +-const char * encodings[5] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8", "UTF-16LE"}; +- +-#ifndef _GNU_SOURCE +-size_t +-strnlen(const char *s, size_t maxlen) { +- for(size_t i=0; i10000 && capacity>len*8) ) { +- capacity = len*3; +- out = (char*)realloc(out, len*3); +- } +- +- char *result = out; +- size_t reslen = capacity; +- +- ICONV_CONST char *input = (char *)data; +- iconv(conv, &input, &len, &result, &reslen); +- +- return string(out,capacity-reslen); +-} +- +-void +-ID3EndAnalyzerFactory::registerFields(FieldRegister& r) { +- createdField = r.registerField(NIE "contentCreated"); +- subjectField = r.registerField(NIE "subject"); +- titleField = r.registerField(titlePropertyName); +- descriptionField = r.registerField(NIE "description"); +- commentField = r.registerField(NIE "comment"); +- albumField = r.registerField(NMM_DRAFT "musicAlbum"); +- genreField = r.registerField(NMM_DRAFT "genre"); +- composerField = r.registerField(NMM_DRAFT "composer"); +- performerField = r.registerField(NMM_DRAFT "performer"); +- lyricistField = r.registerField(NMM_DRAFT "lyricist"); +- publisherField = r.registerField(NCO "publisher"); +- languageField = r.registerField(NIE "language"); +- copyrightField = r.registerField(NIE "copyright"); +- trackNumberField = r.registerField(NMM_DRAFT "trackNumber"); +- discNumberField = r.registerField(discNumberPropertyName); +- durationField = r.registerField(NFO "duration"); +- typeField = r.typeField; +- +- bitrateField = r.registerField(NFO "averageBitrate"); +- samplerateField = r.registerField(NFO "sampleRate"); +- codecField = r.registerField(NFO "codec"); +- channelsField = r.registerField(NFO "channels"); +-} +- +-inline +-void +-addStatement(AnalysisResult &indexable, string& subject, const string& predicate, const string& object) { +- if (subject.empty()) +- subject = indexable.newAnonymousUri(); +- indexable.addTriplet(subject, predicate, object); +-} +- +-inline +-int32_t readAsyncSize(const unsigned char* b) { +- return (((int32_t)b[0])<<21) + (((int32_t)b[1])<<14) +- + (((int32_t)b[2])<<7) + ((int32_t)b[3]); +-} +- +-int32_t +-readSize(const unsigned char* b, bool async) { +- const signed char* c = (const signed char*)b; +- if (async) { +- if (c[0] < 0 || c[1] < 0 || c[2] < 0 || c[3] < 0) +- return -1; +- return readAsyncSize(b); +- } +- return readBigEndianInt32(b); +-} +-bool +-ID3EndAnalyzer::checkHeader(const char* header, int32_t headersize) const { +- const unsigned char* usbuf = (const unsigned char*)header; +- int32_t i; +- +- for(i=0; (header[i] == '\0') && (i=6+i) +- && ( +- (strncmp("ID3", header+i, 3) == 0 // check that it's ID3 +- && usbuf[3+i] <= 4 // only handle version <= 4 +- && (usbuf[5+i]&~0x80) == 0) // we're too dumb too handle other flags +- || +- ((unsigned char)header[0+i] == 0xff && ((unsigned char)header[1+i]&0xfe) == 0xfa +- && (unsigned char)header[2+i]>>4 != 0xf // MP3 frame header is ok too +- && (((unsigned char)header[2+i]>>2)&3) != 3) +- ); +- +-} +- +-static void trim(string& s,const string& drop = " ") +-{ +- string r = s.erase(s.find_last_not_of(drop)+1); +- r.erase(0, r.find_first_not_of(drop)); +-} +- +-static bool extract_and_trim(const char* buf, int offset, int length, string& s) +-{ +- // We're extracting here the ID3v1 tags and doing some sanity checks: +- // 1) Strip of all leading and prefixed spaces +- // 2) Test if string contains at least something +- if (!buf[offset]) +- return false; +- +- s = string(buf + offset, strnlen(buf + offset, length)); +- trim(s); +- // Return true if the extracted value is not empty (read: contains something) +- return !s.empty(); +-} +- +-signed char +-ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) { +- const int max_padding = 1000; +- if(!in) +- return -1; +- +- bool found_title = false, found_artist = false, +- found_album = false, found_comment = false, +- found_year = false, found_track = false, +- found_genre = false, found_tag = false; +- string albumUri; +- char albumArtNum = '\0'; +- +- // read 10 byte header +- const char* buf; +- int32_t nread = in->read(buf, 10+max_padding, 10+max_padding); +- +- // parse ID3v2* tag +- +- if (nread == 10+max_padding && strncmp("ID3", buf, 3) == 0) { // check for ID3 header +- +- bool async = buf[3] >= 4; +- bool unsync = (buf[5] & 0x80)!=0; +- +- // calculate size from 4 syncsafe bytes +- int32_t size = readAsyncSize((unsigned char*)buf+6); +- if (size < 0 || size > 5000000) +- return -1; +- size += 10+4+max_padding; // add the size of the ID3 header, MP3 frame header and possible padding generated by LAME(workaround) +- +- // read the entire tag +- in->reset(0); +- nread = in->read(buf, size, size); +- if (nread != size) +- return -1; +- +- found_tag = true; +- +- const char* p = buf + 10; +- buf += size-4-max_padding; +- while (p < buf && *p) { +- size = readSize((unsigned char*)p+4, async); +- if (size <= 0 || size > (buf-p)-10) { +- //cerr << "size < 0: " << size << endl; +- break; +- } +- +- string value; +- uint8_t enc = p[10]; +- const char *encoding = enc>4 ? encodings[0] : encodings[enc] ; +- UTF8Convertor conv(encoding); +- const char *decoded_value; +- int32_t decoded_value_size; +- string deunsyncbuf; +- if (unsync) { +- deunsyncbuf.reserve(size-1); +- for(int32_t i = 0; iaddValue(factory->descriptionField, string(desc, desclen) ); +- } else { +- indexable.child()->addValue(factory->descriptionField, conv.convert(desc, desclen) ); +- } +- } +- +- indexable.finishIndexChild(); +- } +- } +- } +- +- if (enc == 0 || enc == 3) { +- value = string(decoded_value, strnlen(decoded_value, decoded_value_size)); +- } else { +- value = conv.convert(decoded_value, decoded_value_size); // FIXME: add similar workaround +- } +- +- if (!value.empty()) { +- if (strncmp("TIT1", p, 4) == 0) { +- indexable.addValue(factory->subjectField, value); +- } else if (strncmp("TIT2", p, 4) == 0) { +- indexable.addValue(factory->titleField, value); +- found_title = true; +- } else if (strncmp("TIT3", p, 4) == 0) { +- indexable.addValue(factory->descriptionField, value); +- } else if (strncmp("TLAN", p, 4) == 0) { +- indexable.addValue(factory->languageField, value); +- } else if (strncmp("TCOP", p, 4) == 0) { +- indexable.addValue(factory->copyrightField, value); +- } else if ((strncmp("TDRL", p, 4) == 0) || +- (strncmp("TDAT", p, 4) == 0) || +- (strncmp("TYER", p, 4) == 0) || +- (strncmp("TDRC", p, 4) == 0)) { +- indexable.addValue(factory->createdField, value); +- found_year = true; +- } else if ((strncmp("TPE1", p, 4) == 0) || +- (strncmp("TPE2", p, 4) == 0) || +- (strncmp("TPE3", p, 4) == 0) || +- (strncmp("TPE4", p, 4) == 0)) { +- string performerUri = indexable.newAnonymousUri(); +- +- indexable.addValue(factory->performerField, performerUri); +- indexable.addTriplet(performerUri, typePropertyName, contactClassName); +- indexable.addTriplet(performerUri, fullnamePropertyName, value); +- found_artist = true; +- } else if ((strncmp("TPUB", p, 4) == 0) || +- (strncmp("TENC", p, 4) == 0)) { +- string publisherUri = indexable.newAnonymousUri(); +- +- indexable.addValue(factory->publisherField, publisherUri); +- indexable.addTriplet(publisherUri, typePropertyName, contactClassName); +- indexable.addTriplet(publisherUri, fullnamePropertyName, value); +- } else if ((strncmp("TALB", p, 4) == 0) || +- (strncmp("TOAL", p, 4) == 0)) { +- addStatement(indexable, albumUri, titlePropertyName, value); +- found_album = true; +- } else if (strncmp("TCON", p, 4) == 0) { +- // The Genre is stored as (number) +- if( value[0] == '(' && value[value.length()-1] == ')' ) { +- //vHanda: Maybe one should check if all the characters in between are digits +- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() ); +- indexable.addValue(factory->genreField, genres[ genreIndex ]); +- found_genre = true; +- } else { +- // We must not forget that genre could be a string. +- if (!value.empty()) { +- indexable.addValue(factory->genreField, value); +- found_genre = true; +- } +- } +- } else if (strncmp("TLEN", p, 4) == 0) { +- indexable.addValue(factory->durationField, value); +- } else if (strncmp("TEXT", p, 4) == 0) { +- string lyricistUri = indexable.newAnonymousUri(); +- +- indexable.addValue(factory->lyricistField, lyricistUri); +- indexable.addTriplet(lyricistUri, typePropertyName, contactClassName); +- indexable.addTriplet(lyricistUri, fullnamePropertyName, value); +- } else if (strncmp("TCOM", p, 4) == 0) { +- string composerUri = indexable.newAnonymousUri(); +- +- indexable.addValue(factory->composerField, composerUri); +- indexable.addTriplet(composerUri, typePropertyName, contactClassName); +- indexable.addTriplet(composerUri, fullnamePropertyName, value); +- } else if (strncmp("TRCK", p, 4) == 0) { +- istringstream ins(value); +- int tnum; +- ins >> tnum; +- if (!ins.fail()) { +- indexable.addValue(factory->trackNumberField, tnum); +- found_track = true; +- ins.ignore(10,'/'); +- int tcount; +- ins >> tcount; +- if (!ins.fail()) { +- ostringstream outs; +- outs << tcount; +- addStatement(indexable, albumUri, albumTrackCountPropertyName, outs.str()); +- } +- } +- } else if (strncmp("TPOS", p, 4) == 0) { +- istringstream ins(value); +- int dnum; +- ins >> dnum; +- if (!ins.fail()) { +- indexable.addValue(factory->discNumberField, dnum); +- ins.ignore(10,'/'); +- int dcount; +- ins >> dcount; +- if (!ins.fail()) { +- ostringstream outs; +- outs << dcount; +- addStatement(indexable, albumUri, discCountPropertyName, outs.str()); +- } +- } +- } +- } +- p += size + 10; +- } +- } +- // parse MP3 frame header +- +- int bitrateindex, samplerateindex; +- int i; +- for(i=0; (buf[i]=='\0') && (i>4)) != 0xf) +- && ((samplerateindex = (((unsigned char)buf[2+i]>>2)&3)) != 3 )) { // is this MP3? +- +- indexable.addValue(factory->typeField, audioClassName); +- // FIXME: no support for VBR :( +- // ideas: compare bitrate from the frame with stream size/duration from ID3 tags +- // check several consecutive frames to see if bitrate is different +- // in neither case you can be sure to properly detected VBR :( +- indexable.addValue(factory->bitrateField, bitrate[bitrateindex]); +- indexable.addValue(factory->samplerateField, samplerate[samplerateindex]); +- indexable.addValue(factory->codecField, "MP3"); +- indexable.addValue(factory->channelsField, ((buf[3+i]>>6) == 3 ? 1:2 ) ); +- } +- +- // Parse ID3v1 tag +- +- int64_t insize; +- if ( (insize = in->size()) > (128+nread)) { +- +- // read the tag and check signature +- int64_t nskip = insize-128-nread; +- if (nskip == in->skip(nskip)) +- if (in->read(buf, 128, 128)==128) +- if (!strncmp("TAG", buf, 3)) { +- +- found_tag = true; +- +- std::string s; +- +- if (!found_title && extract_and_trim(buf, 3, 30, s)) { +- indexable.addValue(factory->titleField, s); +- } +- if (!found_artist && extract_and_trim(buf, 33, 30, s)) { +- const string performerUri = indexable.newAnonymousUri(); +- indexable.addValue(factory->performerField, performerUri); +- indexable.addTriplet(performerUri, typePropertyName, contactClassName); +- indexable.addTriplet(performerUri, fullnamePropertyName, s); +- } +- if (!found_album && extract_and_trim(buf, 63, 30, s)) +- addStatement(indexable, albumUri, titlePropertyName, s); +- if (!found_year && extract_and_trim(buf, 93, 4, s)) +- indexable.addValue(factory->createdField, s); +- if (!found_comment && extract_and_trim(buf, 97, 30, s)) { +- indexable.addValue(factory->commentField, s); +- } +- if (!found_track && !buf[125] && buf[126]) { +- indexable.addValue(factory->trackNumberField, (int)(buf[126])); +- } +- if (!found_genre && (unsigned char)(buf[127]) < 148) +- indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]); +- } +- } +- +- if(!albumUri.empty()) { +- indexable.addValue(factory->albumField, albumUri); +- indexable.addTriplet(albumUri, typePropertyName, albumClassName); +- } +- +- if (found_tag) +- indexable.addValue(factory->typeField, musicClassName); +- +- return 0; +-} +diff --git a/lib/endanalyzers/id3endanalyzer.cpp b/lib/endanalyzers/id3endanalyzer.cpp +index d8487b5..677ece0 100644 +--- a/lib/endanalyzers/id3endanalyzer.cpp ++++ b/lib/endanalyzers/id3endanalyzer.cpp +@@ -512,13 +512,19 @@ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + addStatement(indexable, albumUri, titlePropertyName, value); + found_album = true; + } else if (strncmp("TCON", p, 4) == 0) { +- // The Genre is stored as (number) +- if( value[0] == '(' && value[value.length()-1] == ')' ) { +- //vHanda: Maybe one should check if all the characters in between are digits +- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() ); +- indexable.addValue(factory->genreField, genres[ genreIndex ]); +- found_genre = true; +- } ++ // The Genre is stored as (number) ++ if( value[0] == '(' && value[value.length()-1] == ')' ) { ++ //vHanda: Maybe one should check if all the characters in between are digits ++ int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() ); ++ indexable.addValue(factory->genreField, genres[ genreIndex ]); ++ found_genre = true; ++ } else { ++ // We must not forget that genre could be a string. ++ if (!value.empty()) { ++ indexable.addValue(factory->genreField, value); ++ found_genre = true; ++ } ++ } + } else if (strncmp("TLEN", p, 4) == 0) { + indexable.addValue(factory->durationField, value); + } else if (strncmp("TEXT", p, 4) == 0) { +-- +1.7.10.4 + diff --git a/SOURCES/0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch b/SOURCES/0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch new file mode 100644 index 0000000..5476527 --- /dev/null +++ b/SOURCES/0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch @@ -0,0 +1,112 @@ +From 808a3fafc1d89a9b8ec76bbcc5b2514cefa9345d Mon Sep 17 00:00:00 2001 +From: Sune Vuorela +Date: Sun, 24 Jun 2012 16:18:31 +0200 +Subject: [PATCH 8/8] fix parsing of genre field in id3v2 tags and clean code + up a bit + +the genre field of a id3v2 tag might or might not be a number that matches +entries in a lookup table or alternatively a string. If it is a number, +then it might or might not be in parenthesis. +Handle all of the above and also handle the fact that some people might +enjoy adding numbers that are outside the range of the lookup table +--- + lib/endanalyzers/id3endanalyzer.cpp | 62 +++++++++++++++++++++++++++++++---- + 1 file changed, 55 insertions(+), 7 deletions(-) + +diff --git a/lib/endanalyzers/id3endanalyzer.cpp b/lib/endanalyzers/id3endanalyzer.cpp +index 677ece0..0db3728 100644 +--- a/lib/endanalyzers/id3endanalyzer.cpp ++++ b/lib/endanalyzers/id3endanalyzer.cpp +@@ -81,7 +81,9 @@ replaygain + VBR detection + */ + +-static const string genres[148] = { ++#define ID3_NUMBER_OF_GENRES 148 ++ ++static const string genres[ID3_NUMBER_OF_GENRES] = { + "Blues", + "Classic Rock", + "Country", +@@ -372,6 +374,54 @@ static bool extract_and_trim(const char* buf, int offset, int length, string& s) + return !s.empty(); + } + ++/** ++ * Functional helper class to get the right numbers out of a 'genre' string which ++ * might be a number in a index ++ */ ++class genre_number_parser { ++ private: ++ bool success; ++ long result; ++ void parse_string( string genre ) { ++ char* endptr; ++ int r = strtol(genre.c_str(),&endptr, 10); ++ if(*endptr == '\0') { //to check if the convertion went more or less ok ++ if(r >=0 && r < ID3_NUMBER_OF_GENRES ) { //to ensure it is within the range we have ++ success=true; ++ result=r; ++ } ++ } ++ } ++ public: ++ /** ++ * constructor taking the genre string you want parsed as a number ++ */ ++ genre_number_parser(string genre) : success(false), result(-1) { ++ if(genre.size()==0) { ++ //if the string is empty, there is no need to try to parse it ++ return; ++ } ++ //the string might start and end with parenthesis ++ if(genre[0]=='(' && genre[genre.size()-1]==')') { ++ parse_string(genre.substr(1,genre.length()-2)); ++ return; ++ } ++ parse_string(genre); ++ } ++ /** ++ * wether or not parsing was successful ++ */ ++ operator bool() { ++ return success; ++ } ++ /** ++ * the actual result of the parsing, or -1 if parsing wasn't successful ++ */ ++ operator long() { ++ return result; ++ } ++}; ++ + signed char + ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) { + const int max_padding = 1000; +@@ -512,11 +562,9 @@ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + addStatement(indexable, albumUri, titlePropertyName, value); + found_album = true; + } else if (strncmp("TCON", p, 4) == 0) { +- // The Genre is stored as (number) +- if( value[0] == '(' && value[value.length()-1] == ')' ) { +- //vHanda: Maybe one should check if all the characters in between are digits +- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() ); +- indexable.addValue(factory->genreField, genres[ genreIndex ]); ++ genre_number_parser p(value); ++ if(p) { ++ indexable.addValue(factory->genreField, genres[ p ]); + found_genre = true; + } else { + // We must not forget that genre could be a string. +@@ -629,7 +677,7 @@ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* + if (!found_track && !buf[125] && buf[126]) { + indexable.addValue(factory->trackNumberField, (int)(buf[126])); + } +- if (!found_genre && (unsigned char)(buf[127]) < 148) ++ if (!found_genre && (unsigned char)(buf[127]) < ID3_NUMBER_OF_GENRES) + indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]); + } + } +-- +1.7.10.4 + diff --git a/SOURCES/strigi-daemon.desktop b/SOURCES/strigi-daemon.desktop new file mode 100644 index 0000000..435f905 --- /dev/null +++ b/SOURCES/strigi-daemon.desktop @@ -0,0 +1,13 @@ +[Desktop Entry] +Name=Strigi +Comment=Strigi search engine daemon +Icon= +Exec=strigidaemon +Terminal=false +Type=Application +Categories= +X-KDE-autostart-after=panel +X-KDE-StartupNotify=false +X-KDE-UniqueApplet=true +X-KDE-autostart-condition=nepomukserverrc:Service-nepomukstrigiservice:autostart:true +Hidden=true diff --git a/SOURCES/strigiclient.desktop b/SOURCES/strigiclient.desktop new file mode 100644 index 0000000..4bce8bd --- /dev/null +++ b/SOURCES/strigiclient.desktop @@ -0,0 +1,7 @@ +[Desktop Entry] +Type=Application +Icon= +Exec=/usr/bin/strigiclient +Name=strigi client +Comment=Strigi daemon configuration tool +Categories=Qt;KDE;Settings; diff --git a/SOURCES/strigiconfig-multilib.h b/SOURCES/strigiconfig-multilib.h new file mode 100644 index 0000000..ea9dcc4 --- /dev/null +++ b/SOURCES/strigiconfig-multilib.h @@ -0,0 +1,23 @@ +/* + * strigiconfig.h + * + * This file is here to prevent a file conflict on multiarch systems. + * conflict will occur because strigiconfig.h has arch-specific definitions. + * + * DO NOT INCLUDE THE NEW FILE DIRECTLY -- ALWAYS INCLUDE THIS ONE INSTEAD. +*/ + +#ifndef STRIGICONFIG_MULTILIB_H +#define STRIGICONFIG_MULTILIB_H +#include + +#if __WORDSIZE == 32 +#include "strigi/strigiconfig-32.h" +#elif __WORDSIZE == 64 +#include "strigi/strigiconfig-64.h" +#else +#error "unexpected value for __WORDSIZE macro" +#endif + +#endif + diff --git a/SPECS/strigi.spec b/SPECS/strigi.spec new file mode 100644 index 0000000..396de36 --- /dev/null +++ b/SPECS/strigi.spec @@ -0,0 +1,415 @@ +%global multilib_archs x86_64 %{ix86} ppc64 ppc s390x s390 sparc64 sparcv9 +%if 0%{?fedora} && 0%{?fedora} < 16 +%define dt_vendor fedora +# include clucene support +%global clucene 1 +%endif + +%define snap 20120626 + +Name: strigi +Version: 0.7.7 +Release: 13.20120626%{?dist} +Summary: A desktop search program +Group: Applications/Productivity +License: LGPLv2+ +#URL: http://strigi.sf.net/ +URL: http://www.vandenoever.info/software/strigi/ +#Source0: http://www.vandenoever.info/software/strigi/strigi-%{version}%{?pre:-%{pre}}.tar.bz2 +Source0: http://rdieter.fedorapeople.org/strigi/strigi-%{version}%{?pre:-%{pre}}.tar.xz +Source1: strigiclient.desktop +Source2: strigi-daemon.desktop +Source3: strigiconfig-multilib.h +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) + +## upstream patches +# strigidaemon +Patch101: 0001-Minor.-Fix-grammar-typo-in-cmake-output.patch +Patch102: 0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch +Patch103: 0003-Fix-return-value-wrong-type.patch +# libstreamanalizer +Patch201: 0001-Fix-xpm-and-xbm-index.patch +Patch202: 0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch +Patch203: 0003-Fixed-indexing-of-m3u-files.patch +Patch204: 0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch +Patch205: 0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch +Patch206: 0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch +Patch207: 0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch +Patch208: 0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch + +BuildRequires: bison +BuildRequires: boost-devel +BuildRequires: bzip2-devel +BuildRequires: cmake >= 2.4.5 +%if 0%{?clucene:1} +BuildRequires: clucene-core-devel +%endif +BuildRequires: desktop-file-utils +BuildRequires: expat-devel +BuildRequires: pkgconfig(cppunit) +BuildRequires: pkgconfig(dbus-1) dbus-x11 +BuildRequires: pkgconfig(exiv2) +BuildRequires: pkgconfig(gamin) +BuildRequires: pkgconfig(libxml-2.0) +BuildRequires: pkgconfig(QtDBus) pkgconfig(QtGui) +BuildRequires: pkgconfig(zlib) + +Requires: %{name}-libs%{?_isa} = %{version}-%{release} + +%description +Strigi is a fast and light desktop search engine. It can handle a large range +of file formats such as emails, office documents, media files, and file +archives. It can index files that are embedded in other files. This means email +attachments and files in zip files are searchable as if they were normal files +on your harddisk. + +Strigi is normally run as a background daemon that can be accessed by many +other programs at once. In addition to the daemon, Strigi comes with powerful +replacements for the popular unix commands 'find' and 'grep'. These are called +'deepfind' and 'deepgrep' and can search inside files just like the strigi +daemon can. + +%package devel +Summary: Development files for the strigi desktop search engine +Group: Development/Libraries +Requires: %{name}-libs%{?_isa} = %{version}-%{release} +%description devel +Development files for the strigi desktop search engine + +%package libs +Summary: Strigi libraries +Group: Development/Libraries +%description libs +Strigi search engine libraries + + +%prep +%setup -q -n %{name}-%{version}%{?pre:-%{pre}} + +pushd strigidaemon +%patch101 -p1 +%patch102 -p1 +%patch103 -p1 +popd +pushd libstreamanalyzer +%patch201 -p1 +%patch202 -p1 +%patch203 -p1 +%patch204 -p1 +%patch205 -p1 +%patch206 -p1 +%patch207 -p1 +%patch208 -p1 +popd + + +%build +mkdir -p %{_target_platform} +pushd %{_target_platform} +%{cmake} \ +%if ! 0%{?clucene:1} + -DENABLE_CLUCENE:BOOL=OFF \ + -DENABLE_CLUCENE_NG:BOOL=OFF \ +%endif + -DENABLE_DBUS:BOOL=ON \ + -DENABLE_FAM:BOOL=ON \ + -DENABLE_FFMPEG:BOOL=OFF \ + %{?_cmake_skip_rpath} \ + .. +popd + +make %{?_smp_mflags} -C %{_target_platform} + + +%install +rm -rf %{buildroot} +make install/fast -C %{_target_platform} DESTDIR=%{buildroot} + +desktop-file-install \ + --vendor="%{?dt_vendor}" \ + --dir=%{buildroot}%{_datadir}/applications \ + %{SOURCE1} + +# Add an autostart desktop file for the strigi daemon +install -p -m644 -D %{SOURCE2} %{buildroot}%{_sysconfdir}/xdg/autostart/strigi-daemon.desktop + +%ifarch %{multilib_archs} +# multilib: strigiconfig.h + mv %{buildroot}%{_includedir}/strigi/strigiconfig.h %{buildroot}%{_includedir}/strigi/strigiconfig-%{__isa_bits}.h + install -p -m644 -D %{SOURCE3} %{buildroot}%{_includedir}/strigi/strigiconfig-multilib.h + ln -sf strigiconfig-multilib.h %{buildroot}%{_includedir}/strigi/strigiconfig.h +%endif + +%clean +rm -rf %{buildroot} + + +%post libs -p /sbin/ldconfig + +%postun libs -p /sbin/ldconfig + + +%files +%defattr(-,root,root,-) +%doc AUTHORS COPYING ChangeLog +%{_bindir}/* +%{_datadir}/applications/*strigiclient.desktop +%{_datadir}/dbus-1/services/*.service +%{_sysconfdir}/xdg/autostart/strigi-daemon.desktop +%if 0%{?clucene} +%{_libdir}/strigi/strigiindex_clucene.so +%endif + +%files devel +%defattr(-,root,root,-) +%{_libdir}/lib*.so +%{_libdir}/pkgconfig/libstreamanalyzer.pc +%{_libdir}/pkgconfig/libstreams.pc +%{_libdir}/strigi/StrigiConfig.cmake +%{_libdir}/libsearchclient/ +%{_libdir}/libstreamanalyzer/ +%{_libdir}/libstreams/ +%{_includedir}/strigi/ + +%files libs +%defattr(-,root,root,-) +%{_datadir}/strigi/ +%{_libdir}/libsearchclient.so.0* +%{_libdir}/libstreamanalyzer.so.0* +%{_libdir}/libstreams.so.0* +%{_libdir}/libstrigihtmlgui.so.0* +%{_libdir}/libstrigiqtdbusclient.so.0* +%dir %{_libdir}/strigi/ +%{_libdir}/strigi/strigiea_*.so +%{_libdir}/strigi/strigila_*.so +%{_libdir}/strigi/strigita_*.so + + +%changelog +* Wed Sep 06 2017 Jan Grulich - 0.7.7-13.20120626 +- Rebuild exiv2 + Resolves: bz#1488012 + +* Mon Apr 28 2014 Than Ngo - 0.7.7-12.20120626 +- fix typo bug which build failure in kde-workspace + +* Mon Mar 17 2014 Than Ngo - 0.7.7-11.20120626 +- fix multilib issue + +* Fri Jan 24 2014 Daniel Mach - 0.7.7-10.20120626 +- Mass rebuild 2014-01-24 + +* Fri Dec 27 2013 Daniel Mach - 0.7.7-9.20120626 +- Mass rebuild 2013-12-27 + +* Fri Feb 15 2013 Fedora Release Engineering - 0.7.7-8.20120626 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild + +* Mon Jul 30 2012 Rex Dieter 0.7.7-7.20120626 +- rebuild (boost) + +* Fri Jul 27 2012 Fedora Release Engineering - 0.7.7-6.20120626 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild + +* Tue Jun 26 2012 Rex Dieter 0.7.7-5.20120626 +- backport upstream patches (as of 20120626) + +* Wed May 02 2012 Rex Dieter 0.7.7-4 +- rebuild (exiv2) + +* Tue Feb 28 2012 Fedora Release Engineering - 0.7.7-3 +- Rebuilt for c++ ABI breakage + +* Mon Jan 09 2012 Rex Dieter 0.7.7-2 +- gcc47 patch + +* Mon Jan 09 2012 Rex Dieter 0.7.7-1 +- 0.7.7 +- upstream xpm patch + +* Wed Oct 26 2011 Fedora Release Engineering - 0.7.6-4 +- Rebuilt for glibc bug#747377 + +* Mon Oct 17 2011 Rex Dieter 0.7.6-3 +- Revert commit that breaks parsing of some PDF files + +* Fri Oct 14 2011 Rex Dieter - 0.7.6-2 +- rebuild (exiv2) + +* Tue Oct 04 2011 Rex Dieter 0.7.6-1 +- 0.7.6 +- BR: boost-devel +- pkgconfig-style deps + +* Tue Aug 16 2011 Rex Dieter 0.7.5-5 +- libstreams rpm analyzer fixed upstream + +* Sun Aug 07 2011 Rex Dieter 0.7.5-4 +- enable dbus/fam support + +* Sun Aug 07 2011 Rex Dieter 0.7.5-3 +- explicitly mark convenience libs static + +* Fri Aug 05 2011 Rex Dieter 0.7.5-2 +- rebuild + +* Fri Jul 29 2011 Rex Dieter 0.7.5-1 +- strigi-0.7.5 (#726507) + +* Mon Jun 06 2011 Rex Dieter 0.7.2-10 +- drop clucene support, for now (f16+) + +* Thu Jun 02 2011 Rex Dieter 0.7.2-9 +- move strigiindex_clucene.so to main pkg +- drop .desktop --vendor (f16+) + +* Wed Feb 09 2011 Fedora Release Engineering - 0.7.2-8 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild + +* Sat Jan 01 2011 Rex Dieter - 0.7.2-7 +- rebuild (exiv2) + +* Fri Nov 05 2010 Thomas Janssen 0.7.2-6 +- rebuild for new libxml2 + +* Mon Jul 12 2010 Rex Dieter - 0.7.2-5 +- disable rpmanalyzer support, until crasher(s) fixed (#609541) +- tidy up spec + +* Wed Jun 30 2010 Rex Dieter - 0.7.2-4 +- strigi flac analyser crashes with floating point (arithmetic) exception (kdebug234398) + +* Mon May 31 2010 Rex Dieter - 0.7.2-3 +- rebuild (exiv2) + +* Fri Feb 12 2010 Rex Dieter - 0.7.2-2 +- %%build: +%%_cmake_skip_rpath + +* Fri Feb 12 2010 Rex Dieter - 0.7.2-1 +- strigi-0.7.2 + +* Mon Jan 11 2010 Rex Dieter - 0.7.1-1 +- strigi-0.7.1 + +* Sun Jan 03 2010 Rex Dieter - 0.7.0-2 +- rebuild (exiv2) + +* Tue Aug 04 2009 Rex Dieter - 0.7.0-1 +- strigi-0.7.0 (final) + +* Sun Jul 26 2009 Fedora Release Engineering - 0.7-0.2.RC1 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_12_Mass_Rebuild + +* Thu Jul 23 2009 Rex Dieter 0.7-0.1.RC1 +- strigi-0.7-RC1 +- use %%_isa where appropriate +- %%files: track lib sonames +- strigi-daemon.desktop: +Hidden=true (ie, disable autostart by default) + +* Mon Jun 29 2009 Lukáš Tinkl - 0.6.5-2 +- don't start strigi daemon unconditionally (#487322) + +* Fri May 29 2009 Lukáš Tinkl - 0.6.5-1 +- Strigi 0.6.5 + +* Tue Apr 21 2009 Jaroslav Reznik - 0.6.4-4 +- fix crash with / char in path (#496620, kde#185551) + +* Wed Feb 25 2009 Fedora Release Engineering - 0.6.4-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_11_Mass_Rebuild + +* Wed Feb 04 2009 Deji Akingunola - 0.6.4-2 +- Add patch to build with gcc-4.4 + +* Mon Feb 02 2009 Rex Dieter - 0.6.4-1 +- strigi-0.6.4 +- Summary: s/for KDE// +- *.desktop: validate, remove OnlyShowIn=KDE +- -devel: move *.cmake here + +* Mon Jan 12 2009 Rex Dieter - 0.6.3-1 +- strigi-0.6.3 + +* Tue Jan 06 2009 Rex Dieter - 0.6.2-1 +- strigi-0.6.2 +- use %%cmake macro + +* Thu Dec 18 2008 Rex Dieter - 0.5.11.1-2 +- respin (exiv2) + +* Thu Nov 27 2008 Lorenzo Villani - 0.5.11.1-1 +- drop _default_patch_fuzz +- rebase strigi-multilib patch +- No official 0.5.11.1 tarballs were released but we need 0.5.11.1, apply a + diff between 0.5.11 and 0.5.11.1 svn tags + +* Sun Jul 20 2008 Kevin Kofler - 0.5.11-1 +- Update to 0.5.11 +- Drop compile-fix and lucenetest_fix patches (fixed upstream) + +* Sat May 10 2008 Deji Akingunola - 0.5.9-2 +- Disable 'make test' for now, seems the buildroot cannot find java + +* Sat May 03 2008 Deji Akingunola - 0.5.9-1 +- Update to 0.5.9 (bugfix release) + +* Thu Mar 06 2008 Deji Akingunola - 0.5.8-2 +- Use upstream's default build options (disable inotify support, #436096) + +* Thu Feb 21 2008 Kevin Kofler - 0.5.8-1 +- Update to 0.5.8 +- Fix LIB_DESTINATION (#433627) +- Drop GCC 4.3 patch (fixed upstream) + +* Sat Feb 09 2008 Kevin Kofler - 0.5.7-4 +- Rebuild for GCC 4.3 + +* Fri Jan 11 2008 Deji Akingunola - 0.5.7-3 +- Fix build failure with gcc-4.3 + +* Tue Nov 13 2007 Kevin Kofler - 0.5.7-2 +- Rebuild for new exiv2 + +* Tue Oct 30 2007 Deji Akingunola - 0.5.7-1 +- Update to 0.5.7 release +- Fix multilibs conflict (Bug #343221, patch by Kevin Kofler) + +* Sun Sep 09 2007 Deji Akingunola - 0.5.5-2 +- Rebuild for BuildID changes + +* Sat Aug 11 2007 Deji Akingunola - 0.5.5-1 +- Update to 0.5.5 release + +* Mon Aug 06 2007 Deji Akingunola - 0.5.4-1 +- Update to 0.5.4 proper +- License tag update + +* Sun Jul 29 2007 Deji Akingunola - 0.5.4-0.1.svn20070729 +- New KDE SVN snapshot version for KDE 4.0 beta 1 (bz#20015) + +* Wed May 16 2007 Deji Akingunola - 0.5.1-5 +- Split out a strigi-libs subpackage as suggested in BZ#223586 +_ Include a strigidaemon autostart desktop file + +* Sat May 05 2007 Deji Akingunola - 0.5.1-4 +- Add dbus-devel BR. + +* Sat May 05 2007 Deji Akingunola - 0.5.1-3 +- Misc. fixes from package review + +* Fri May 04 2007 Deji Akingunola - 0.5.1-2 +- Updates from reviews: +- Have the -devel subpackage require pkgconfig +- Add a versioned dependency on cmake and remove dbus-qt buildrequire + +* Fri May 04 2007 Deji Akingunola - 0.5.1-1 +- New release + +* Wed May 02 2007 Deji Akingunola - 0.3.11-3 +- Allow building on FC6 + +* Thu Feb 22 2007 Deji Akingunola - 0.3.11-2 +- Assorted fixed arising from reviews + +* Wed Jan 17 2007 Deji Akingunola - 0.3.11-1 +- Initial packaging for Fedora Extras