Blame SOURCES/0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch

ee4fba
From 17e9cdbdee9b3b3cdccab416004a99547a7196a7 Mon Sep 17 00:00:00 2001
ee4fba
From: Ignacio Serantes <kde@aynoa.net>
ee4fba
Date: Sun, 17 Jun 2012 20:39:36 +0200
ee4fba
Subject: [PATCH 6/8] Fix: non numeric genres in id3 v2 mp3 are ignored.
ee4fba
 REVIEW:105242.
ee4fba
ee4fba
---
ee4fba
 id3endanalyzer.cpp |  646 ++++++++++++++++++++++++++++++++++++++++++++++++++++
ee4fba
 1 file changed, 646 insertions(+)
ee4fba
 create mode 100644 id3endanalyzer.cpp
ee4fba
ee4fba
diff --git a/id3endanalyzer.cpp b/id3endanalyzer.cpp
ee4fba
new file mode 100644
ee4fba
index 0000000..677ece0
ee4fba
--- /dev/null
ee4fba
+++ b/id3endanalyzer.cpp
ee4fba
@@ -0,0 +1,646 @@
ee4fba
+/* This file is part of Strigi Desktop Search
ee4fba
+ *
ee4fba
+ * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
ee4fba
+ *               2009 Evgeny Egorochkin <phreedom.stdin@gmail.com>
ee4fba
+ *
ee4fba
+ * This library is free software; you can redistribute it and/or
ee4fba
+ * modify it under the terms of the GNU Library General Public
ee4fba
+ * License as published by the Free Software Foundation; either
ee4fba
+ * version 2 of the License, or (at your option) any later version.
ee4fba
+ *
ee4fba
+ * This library is distributed in the hope that it will be useful,
ee4fba
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
ee4fba
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
ee4fba
+ * Library General Public License for more details.
ee4fba
+ *
ee4fba
+ * You should have received a copy of the GNU Library General Public License
ee4fba
+ * along with this library; see the file COPYING.LIB.  If not, write to
ee4fba
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
ee4fba
+ * Boston, MA 02110-1301, USA.
ee4fba
+ */
ee4fba
+
ee4fba
+#ifdef HAVE_CONFIG_H
ee4fba
+# include "config.h"
ee4fba
+#endif
ee4fba
+
ee4fba
+#include "id3endanalyzer.h"
ee4fba
+#include "analysisresult.h"
ee4fba
+#include "../rdfnamespaces.h"
ee4fba
+#include <strigi/strigiconfig.h>
ee4fba
+#include <strigi/textutils.h>
ee4fba
+#include <strigi/stringstream.h>
ee4fba
+#include <iostream>
ee4fba
+#include <sstream>
ee4fba
+#include <cstring>
ee4fba
+#include <cstdlib>
ee4fba
+#include <iconv.h>
ee4fba
+
ee4fba
+#ifdef ICONV_SECOND_ARGUMENT_IS_CONST
ee4fba
+     #define ICONV_CONST const
ee4fba
+#else
ee4fba
+     #define ICONV_CONST
ee4fba
+#endif
ee4fba
+
ee4fba
+using namespace Strigi;
ee4fba
+using namespace std;
ee4fba
+
ee4fba
+const string
ee4fba
+    typePropertyName(
ee4fba
+	RDF "type"),
ee4fba
+    fullnamePropertyName(
ee4fba
+	NCO "fullname"),
ee4fba
+    titlePropertyName(
ee4fba
+	NIE "title"),
ee4fba
+    albumTrackCountPropertyName(
ee4fba
+	NMM_DRAFT "albumTrackCount"),
ee4fba
+    discNumberPropertyName(
ee4fba
+	NMM_DRAFT "setNumber"),
ee4fba
+    discCountPropertyName(
ee4fba
+	NMM_DRAFT "setSize"),
ee4fba
+
ee4fba
+    musicClassName(
ee4fba
+	NMM_DRAFT "MusicPiece"),
ee4fba
+    audioClassName(
ee4fba
+	NFO "Audio"),
ee4fba
+    albumClassName(
ee4fba
+	NMM_DRAFT "MusicAlbum"),
ee4fba
+    contactClassName(
ee4fba
+	NCO "Contact");
ee4fba
+
ee4fba
+/*
ee4fba
+ENCA autodetection of broken encodings. First, need to make sure it's going to be actually useful.
ee4fba
+ID3v2.0
ee4fba
+play counter:needs nepomuk resolution
ee4fba
+replaygain
ee4fba
++lyrics
ee4fba
++Improve:
ee4fba
+  creation date:
ee4fba
+  language: support multiple
ee4fba
+  Genre
ee4fba
+  album art type handling
ee4fba
+VBR detection
ee4fba
+*/
ee4fba
+
ee4fba
+static const string genres[148] = {
ee4fba
+  "Blues",
ee4fba
+  "Classic Rock",
ee4fba
+  "Country",
ee4fba
+  "Dance",
ee4fba
+  "Disco",
ee4fba
+  "Funk",
ee4fba
+  "Grunge",
ee4fba
+  "Hip-Hop",
ee4fba
+  "Jazz",
ee4fba
+  "Metal",
ee4fba
+  "New Age",
ee4fba
+  "Oldies",
ee4fba
+  "Other",
ee4fba
+  "Pop",
ee4fba
+  "R&B",
ee4fba
+  "Rap",
ee4fba
+  "Reggae",
ee4fba
+  "Rock",
ee4fba
+  "Techno",
ee4fba
+  "Industrial",
ee4fba
+  "Alternative",
ee4fba
+  "Ska",
ee4fba
+  "Death Metal",
ee4fba
+  "Pranks",
ee4fba
+  "Soundtrack",
ee4fba
+  "Euro-Techno",
ee4fba
+  "Ambient",
ee4fba
+  "Trip-Hop",
ee4fba
+  "Vocal",
ee4fba
+  "Jazz+Funk",
ee4fba
+  "Fusion",
ee4fba
+  "Trance",
ee4fba
+  "Classical",
ee4fba
+  "Instrumental",
ee4fba
+  "Acid",
ee4fba
+  "House",
ee4fba
+  "Game",
ee4fba
+  "Sound Clip",
ee4fba
+  "Gospel",
ee4fba
+  "Noise",
ee4fba
+  "Alternative Rock",
ee4fba
+  "Bass",
ee4fba
+  "Soul",
ee4fba
+  "Punk",
ee4fba
+  "Space",
ee4fba
+  "Meditative",
ee4fba
+  "Instrumental Pop",
ee4fba
+  "Instrumental Rock",
ee4fba
+  "Ethnic",
ee4fba
+  "Gothic",
ee4fba
+  "Darkwave",
ee4fba
+  "Techno-Industrial",
ee4fba
+  "Electronic",
ee4fba
+  "Pop-Folk",
ee4fba
+  "Eurodance",
ee4fba
+  "Dream",
ee4fba
+  "Southern Rock",
ee4fba
+  "Comedy",
ee4fba
+  "Cult",
ee4fba
+  "Gangsta",
ee4fba
+  "Top 40",
ee4fba
+  "Christian Rap",
ee4fba
+  "Pop/Funk",
ee4fba
+  "Jungle",
ee4fba
+  "Native American",
ee4fba
+  "Cabaret",
ee4fba
+  "New Wave",
ee4fba
+  "Psychedelic",
ee4fba
+  "Rave",
ee4fba
+  "Showtunes",
ee4fba
+  "Trailer",
ee4fba
+  "Lo-Fi",
ee4fba
+  "Tribal",
ee4fba
+  "Acid Punk",
ee4fba
+  "Acid Jazz",
ee4fba
+  "Polka",
ee4fba
+  "Retro",
ee4fba
+  "Musical",
ee4fba
+  "Rock & Roll",
ee4fba
+  "Hard Rock",
ee4fba
+  "Folk",
ee4fba
+  "Folk/Rock",
ee4fba
+  "National Folk",
ee4fba
+  "Swing",
ee4fba
+  "Fusion",
ee4fba
+  "Bebop",
ee4fba
+  "Latin",
ee4fba
+  "Revival",
ee4fba
+  "Celtic",
ee4fba
+  "Bluegrass",
ee4fba
+  "Avantgarde",
ee4fba
+  "Gothic Rock",
ee4fba
+  "Progressive Rock",
ee4fba
+  "Psychedelic Rock",
ee4fba
+  "Symphonic Rock",
ee4fba
+  "Slow Rock",
ee4fba
+  "Big Band",
ee4fba
+  "Chorus",
ee4fba
+  "Easy Listening",
ee4fba
+  "Acoustic",
ee4fba
+  "Humour",
ee4fba
+  "Speech",
ee4fba
+  "Chanson",
ee4fba
+  "Opera",
ee4fba
+  "Chamber Music",
ee4fba
+  "Sonata",
ee4fba
+  "Symphony",
ee4fba
+  "Booty Bass",
ee4fba
+  "Primus",
ee4fba
+  "Porn Groove",
ee4fba
+  "Satire",
ee4fba
+  "Slow Jam",
ee4fba
+  "Club",
ee4fba
+  "Tango",
ee4fba
+  "Samba",
ee4fba
+  "Folklore",
ee4fba
+  "Ballad",
ee4fba
+  "Power Ballad",
ee4fba
+  "Rhythmic Soul",
ee4fba
+  "Freestyle",
ee4fba
+  "Duet",
ee4fba
+  "Punk Rock",
ee4fba
+  "Drum Solo",
ee4fba
+  "A Cappella",
ee4fba
+  "Euro-House",
ee4fba
+  "Dance Hall",
ee4fba
+  "Goa",
ee4fba
+  "Drum & Bass",
ee4fba
+  "Club-House",
ee4fba
+  "Hardcore",
ee4fba
+  "Terror",
ee4fba
+  "Indie",
ee4fba
+  "BritPop",
ee4fba
+  "Negerpunk",
ee4fba
+  "Polsk Punk",
ee4fba
+  "Beat",
ee4fba
+  "Christian Gangsta Rap",
ee4fba
+  "Heavy Metal",
ee4fba
+  "Black Metal",
ee4fba
+  "Crossover",
ee4fba
+  "Contemporary Christian",
ee4fba
+  "Christian Rock",
ee4fba
+  "Merengue",
ee4fba
+  "Salsa",
ee4fba
+  "Thrash Metal",
ee4fba
+  "Anime",
ee4fba
+  "Jpop",
ee4fba
+  "Synthpop"
ee4fba
+};
ee4fba
+
ee4fba
+const uint32_t bitrate [15] = {0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000 };
ee4fba
+
ee4fba
+const uint32_t samplerate[3] = {44100, 48000, 32000};
ee4fba
+
ee4fba
+const char * encodings[5] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8", "UTF-16LE"};
ee4fba
+
ee4fba
+#ifndef _GNU_SOURCE
ee4fba
+size_t
ee4fba
+strnlen(const char *s, size_t maxlen) {
ee4fba
+    for(size_t i=0; i
ee4fba
+	if (s[i]==0)
ee4fba
+	    return i;
ee4fba
+    return maxlen;
ee4fba
+}
ee4fba
+#endif
ee4fba
+
ee4fba
+class UTF8Convertor {
ee4fba
+  private:
ee4fba
+    iconv_t const conv;
ee4fba
+    char *out;
ee4fba
+    size_t capacity;
ee4fba
+  public:
ee4fba
+     UTF8Convertor(const char *encoding);
ee4fba
+     const string convert(const char *data, size_t len);
ee4fba
+     ~UTF8Convertor();
ee4fba
+};
ee4fba
+UTF8Convertor::UTF8Convertor(const char *encoding) :conv(iconv_open("UTF-8", encoding)), out(0), capacity(0) {
ee4fba
+}
ee4fba
+UTF8Convertor::~UTF8Convertor() {
ee4fba
+    iconv_close(conv);
ee4fba
+    if (out) free(out);
ee4fba
+}
ee4fba
+const string
ee4fba
+UTF8Convertor::convert(const char *data, size_t len) {
ee4fba
+  if (!len)
ee4fba
+      return string();
ee4fba
+  if ( capacity
ee4fba
+      (capacity>10000 && capacity>len*8) ) {
ee4fba
+      capacity = len*3;
ee4fba
+      out = (char*)realloc(out, len*3);
ee4fba
+  }
ee4fba
+
ee4fba
+  char *result = out;
ee4fba
+  size_t reslen = capacity;
ee4fba
+
ee4fba
+  ICONV_CONST char *input = (char *)data;
ee4fba
+  iconv(conv, &input, &len, &result, &reslen);
ee4fba
+
ee4fba
+  return string(out,capacity-reslen);
ee4fba
+}
ee4fba
+
ee4fba
+void
ee4fba
+ID3EndAnalyzerFactory::registerFields(FieldRegister& r) {
ee4fba
+    createdField	= r.registerField(NIE "contentCreated");
ee4fba
+    subjectField	= r.registerField(NIE "subject");
ee4fba
+    titleField		= r.registerField(titlePropertyName);
ee4fba
+    descriptionField	= r.registerField(NIE "description");
ee4fba
+    commentField	= r.registerField(NIE "comment");
ee4fba
+    albumField		= r.registerField(NMM_DRAFT "musicAlbum");
ee4fba
+    genreField		= r.registerField(NMM_DRAFT "genre");
ee4fba
+    composerField	= r.registerField(NMM_DRAFT "composer");
ee4fba
+    performerField	= r.registerField(NMM_DRAFT "performer");
ee4fba
+    lyricistField	= r.registerField(NMM_DRAFT "lyricist");
ee4fba
+    publisherField	= r.registerField(NCO "publisher");
ee4fba
+    languageField	= r.registerField(NIE "language");
ee4fba
+    copyrightField	= r.registerField(NIE "copyright");
ee4fba
+    trackNumberField	= r.registerField(NMM_DRAFT "trackNumber");
ee4fba
+    discNumberField	= r.registerField(discNumberPropertyName);
ee4fba
+    durationField	= r.registerField(NFO "duration");
ee4fba
+    typeField		= r.typeField;
ee4fba
+
ee4fba
+    bitrateField	= r.registerField(NFO "averageBitrate");
ee4fba
+    samplerateField	= r.registerField(NFO "sampleRate");
ee4fba
+    codecField		= r.registerField(NFO "codec");
ee4fba
+    channelsField	= r.registerField(NFO "channels");
ee4fba
+}
ee4fba
+
ee4fba
+inline
ee4fba
+void
ee4fba
+addStatement(AnalysisResult &indexable, string& subject, const string& predicate, const string& object) {
ee4fba
+    if (subject.empty())
ee4fba
+	subject = indexable.newAnonymousUri();
ee4fba
+    indexable.addTriplet(subject, predicate, object);
ee4fba
+}
ee4fba
+
ee4fba
+inline
ee4fba
+int32_t readAsyncSize(const unsigned char* b) {
ee4fba
+    return (((int32_t)b[0])<<21) + (((int32_t)b[1])<<14)
ee4fba
+	    + (((int32_t)b[2])<<7) + ((int32_t)b[3]);
ee4fba
+}
ee4fba
+
ee4fba
+int32_t
ee4fba
+readSize(const unsigned char* b, bool async) {
ee4fba
+    const signed char* c = (const signed char*)b;
ee4fba
+    if (async) {
ee4fba
+        if (c[0] < 0 || c[1] < 0 || c[2] < 0 || c[3] < 0)
ee4fba
+            return -1;
ee4fba
+        return readAsyncSize(b);
ee4fba
+    }
ee4fba
+    return readBigEndianInt32(b);
ee4fba
+}
ee4fba
+bool
ee4fba
+ID3EndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
ee4fba
+  const unsigned char* usbuf = (const unsigned char*)header;
ee4fba
+  int32_t i;
ee4fba
+
ee4fba
+  for(i=0; (header[i] == '\0') && (i
ee4fba
+  return (headersize>=6+i)
ee4fba
+	  && (
ee4fba
+	    (strncmp("ID3", header+i, 3) == 0	// check that it's ID3
ee4fba
+	      && usbuf[3+i] <= 4 				// only handle version <= 4
ee4fba
+	      && (usbuf[5+i]&~0x80) == 0)  // we're too dumb too handle other flags
ee4fba
+	    ||
ee4fba
+	    ((unsigned char)header[0+i] == 0xff && ((unsigned char)header[1+i]&0xfe) == 0xfa
ee4fba
+	      && (unsigned char)header[2+i]>>4 != 0xf	// MP3 frame header is ok too
ee4fba
+	      && (((unsigned char)header[2+i]>>2)&3) != 3)
ee4fba
+	  );
ee4fba
+
ee4fba
+}
ee4fba
+
ee4fba
+static void trim(string& s,const string& drop = " ")
ee4fba
+{
ee4fba
+    string r = s.erase(s.find_last_not_of(drop)+1);
ee4fba
+    r.erase(0, r.find_first_not_of(drop));
ee4fba
+}
ee4fba
+
ee4fba
+static bool extract_and_trim(const char* buf, int offset, int length, string& s)
ee4fba
+{
ee4fba
+    // We're extracting here the ID3v1 tags and doing some sanity checks:
ee4fba
+    // 1) Strip of all leading and prefixed spaces
ee4fba
+    // 2) Test if string contains at least something
ee4fba
+    if (!buf[offset])
ee4fba
+	return false;
ee4fba
+    
ee4fba
+    s = string(buf + offset, strnlen(buf + offset, length));
ee4fba
+    trim(s);
ee4fba
+    // Return true if the extracted value is not empty (read: contains something)
ee4fba
+    return !s.empty();
ee4fba
+}
ee4fba
+
ee4fba
+signed char
ee4fba
+ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) {
ee4fba
+  const int max_padding = 1000;
ee4fba
+    if(!in)
ee4fba
+        return -1;
ee4fba
+
ee4fba
+    bool found_title = false, found_artist = false,
ee4fba
+	  found_album = false, found_comment = false,
ee4fba
+	  found_year = false, found_track = false,
ee4fba
+	  found_genre = false, found_tag = false;
ee4fba
+    string albumUri;
ee4fba
+    char albumArtNum = '\0';
ee4fba
+
ee4fba
+    // read 10 byte header
ee4fba
+    const char* buf;
ee4fba
+    int32_t nread = in->read(buf, 10+max_padding, 10+max_padding);
ee4fba
+
ee4fba
+    // parse ID3v2* tag
ee4fba
+
ee4fba
+    if (nread == 10+max_padding && strncmp("ID3", buf, 3) == 0) { // check for ID3 header
ee4fba
+
ee4fba
+	bool async = buf[3] >= 4;
ee4fba
+	bool unsync = (buf[5] & 0x80)!=0;
ee4fba
+
ee4fba
+	// calculate size from 4 syncsafe bytes
ee4fba
+	int32_t size = readAsyncSize((unsigned char*)buf+6);
ee4fba
+	if (size < 0 || size > 5000000)
ee4fba
+	    return -1;
ee4fba
+	size += 10+4+max_padding; // add the size of the ID3 header, MP3 frame header and possible padding generated by LAME(workaround)
ee4fba
+
ee4fba
+	// read the entire tag
ee4fba
+	in->reset(0);
ee4fba
+	nread = in->read(buf, size, size);
ee4fba
+	if (nread != size)
ee4fba
+	    return -1;
ee4fba
+
ee4fba
+	found_tag = true;
ee4fba
+
ee4fba
+	const char* p = buf + 10;
ee4fba
+	buf += size-4-max_padding;
ee4fba
+	while (p < buf && *p) {
ee4fba
+	    size = readSize((unsigned char*)p+4, async);
ee4fba
+	    if (size <= 0 || size > (buf-p)-10) {
ee4fba
+		//cerr << "size < 0: " << size << endl;
ee4fba
+		break;
ee4fba
+	    }
ee4fba
+
ee4fba
+	    string value;
ee4fba
+	    uint8_t enc = p[10];
ee4fba
+	    const char *encoding = enc>4 ? encodings[0] : encodings[enc] ;
ee4fba
+	    UTF8Convertor conv(encoding);
ee4fba
+	    const char *decoded_value;
ee4fba
+	    int32_t decoded_value_size;
ee4fba
+	    string deunsyncbuf;
ee4fba
+	    if (unsync) {
ee4fba
+	      deunsyncbuf.reserve(size-1);
ee4fba
+	      for(int32_t i = 0; i
ee4fba
+		if ( (i==0) || (p[11+i]!=0) || (p[10+i]!=0xff) )
ee4fba
+		  deunsyncbuf.push_back(p[11+i]);
ee4fba
+	      decoded_value = deunsyncbuf.c_str();
ee4fba
+	      decoded_value_size = deunsyncbuf.length();
ee4fba
+	    } else {
ee4fba
+	      decoded_value = p+11;
ee4fba
+	      decoded_value_size = size-1;
ee4fba
+	    };
ee4fba
+
ee4fba
+	    if (strncmp("APIC", p, 4) == 0) {
ee4fba
+		size_t mimelen = strnlen(decoded_value, decoded_value_size);
ee4fba
+                if ((int32_t)mimelen < decoded_value_size-3) {
ee4fba
+		    const char *desc = decoded_value+mimelen+1+1;
ee4fba
+//		    uint8_t pictype = p[11+mimelen+1];
ee4fba
+		    size_t desclen = strnlen(desc,decoded_value_size-mimelen-2-1);
ee4fba
+		    const char *content = desc + desclen + 1 + (enc == 0 || enc == 3 ? 0:1) ;
ee4fba
+
ee4fba
+		    if(content
ee4fba
+                        StringInputStream picstream(content,
ee4fba
+                                          (uint32_t)(decoded_value+decoded_value_size-content), false);
ee4fba
+			string picname;
ee4fba
+			picname = (char)('0'+albumArtNum++);
ee4fba
+			indexable.indexChild(picname, indexable.mTime(), &picstream);
ee4fba
+
ee4fba
+			if (desclen && indexable.child()) {
ee4fba
+			    if (enc == 0 || enc == 3) {
ee4fba
+				indexable.child()->addValue(factory->descriptionField, string(desc, desclen) );
ee4fba
+			    } else {
ee4fba
+				indexable.child()->addValue(factory->descriptionField, conv.convert(desc, desclen) );
ee4fba
+			    }
ee4fba
+			}
ee4fba
+
ee4fba
+                        indexable.finishIndexChild();
ee4fba
+		    }
ee4fba
+		}
ee4fba
+	    }
ee4fba
+
ee4fba
+	    if (enc == 0 || enc == 3) {
ee4fba
+		value = string(decoded_value, strnlen(decoded_value, decoded_value_size));
ee4fba
+	    } else {
ee4fba
+		value = conv.convert(decoded_value, decoded_value_size); // FIXME: add similar workaround
ee4fba
+	    }
ee4fba
+
ee4fba
+	    if (!value.empty()) {
ee4fba
+		if (strncmp("TIT1", p, 4) == 0) {
ee4fba
+		    indexable.addValue(factory->subjectField, value);
ee4fba
+		} else if (strncmp("TIT2", p, 4) == 0) {
ee4fba
+		    indexable.addValue(factory->titleField, value);
ee4fba
+		    found_title = true;
ee4fba
+		} else if (strncmp("TIT3", p, 4) == 0) {
ee4fba
+		    indexable.addValue(factory->descriptionField, value);
ee4fba
+		} else if (strncmp("TLAN", p, 4) == 0) {
ee4fba
+		    indexable.addValue(factory->languageField, value);
ee4fba
+		} else if (strncmp("TCOP", p, 4) == 0) {
ee4fba
+		    indexable.addValue(factory->copyrightField, value);
ee4fba
+		} else if ((strncmp("TDRL", p, 4) == 0) ||
ee4fba
+			    (strncmp("TDAT", p, 4) == 0) ||
ee4fba
+			    (strncmp("TYER", p, 4) == 0) ||
ee4fba
+			    (strncmp("TDRC", p, 4) == 0)) {
ee4fba
+		    indexable.addValue(factory->createdField, value);
ee4fba
+		    found_year = true;
ee4fba
+		} else if ((strncmp("TPE1", p, 4) == 0) ||
ee4fba
+			    (strncmp("TPE2", p, 4) == 0) ||
ee4fba
+			    (strncmp("TPE3", p, 4) == 0) ||
ee4fba
+			    (strncmp("TPE4", p, 4) == 0)) {
ee4fba
+		    string performerUri = indexable.newAnonymousUri();
ee4fba
+
ee4fba
+		    indexable.addValue(factory->performerField, performerUri);
ee4fba
+		    indexable.addTriplet(performerUri, typePropertyName, contactClassName);
ee4fba
+		    indexable.addTriplet(performerUri, fullnamePropertyName, value);
ee4fba
+		    found_artist = true;
ee4fba
+		} else if ((strncmp("TPUB", p, 4) == 0) ||
ee4fba
+			    (strncmp("TENC", p, 4) == 0)) {
ee4fba
+		    string publisherUri = indexable.newAnonymousUri();
ee4fba
+
ee4fba
+		    indexable.addValue(factory->publisherField, publisherUri);
ee4fba
+		    indexable.addTriplet(publisherUri, typePropertyName, contactClassName);
ee4fba
+		    indexable.addTriplet(publisherUri, fullnamePropertyName, value);
ee4fba
+		} else if ((strncmp("TALB", p, 4) == 0) ||
ee4fba
+			    (strncmp("TOAL", p, 4) == 0)) {
ee4fba
+		    addStatement(indexable, albumUri, titlePropertyName, value);
ee4fba
+		    found_album = true;
ee4fba
+		} else if (strncmp("TCON", p, 4) == 0) {
ee4fba
+		    // The Genre is stored as (number)
ee4fba
+		    if( value[0] == '(' && value[value.length()-1] == ')' ) {
ee4fba
+			//vHanda: Maybe one should check if all the characters in between are digits
ee4fba
+			int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
ee4fba
+			indexable.addValue(factory->genreField, genres[ genreIndex ]);
ee4fba
+			found_genre = true;
ee4fba
+		    } else {
ee4fba
+			// We must not forget that genre could be a string.
ee4fba
+			if (!value.empty()) {
ee4fba
+			    indexable.addValue(factory->genreField, value);
ee4fba
+			    found_genre = true;
ee4fba
+			}
ee4fba
+		    }
ee4fba
+		} else if (strncmp("TLEN", p, 4) == 0) {
ee4fba
+		    indexable.addValue(factory->durationField, value);
ee4fba
+		} else if (strncmp("TEXT", p, 4) == 0) {
ee4fba
+		    string lyricistUri = indexable.newAnonymousUri();
ee4fba
+
ee4fba
+		    indexable.addValue(factory->lyricistField, lyricistUri);
ee4fba
+		    indexable.addTriplet(lyricistUri, typePropertyName, contactClassName);
ee4fba
+		    indexable.addTriplet(lyricistUri, fullnamePropertyName, value);
ee4fba
+		} else if (strncmp("TCOM", p, 4) == 0) {
ee4fba
+		    string composerUri = indexable.newAnonymousUri();
ee4fba
+
ee4fba
+		    indexable.addValue(factory->composerField, composerUri);
ee4fba
+		    indexable.addTriplet(composerUri, typePropertyName, contactClassName);
ee4fba
+		    indexable.addTriplet(composerUri, fullnamePropertyName, value);
ee4fba
+		} else if (strncmp("TRCK", p, 4) == 0) {
ee4fba
+		    istringstream ins(value);
ee4fba
+		    int tnum;
ee4fba
+		    ins >> tnum;
ee4fba
+		    if (!ins.fail()) {
ee4fba
+			indexable.addValue(factory->trackNumberField, tnum);
ee4fba
+			found_track = true;
ee4fba
+			ins.ignore(10,'/');
ee4fba
+			int tcount;
ee4fba
+			ins >> tcount;
ee4fba
+			if (!ins.fail()) {
ee4fba
+			    ostringstream outs;
ee4fba
+			    outs << tcount;
ee4fba
+			    addStatement(indexable, albumUri, albumTrackCountPropertyName, outs.str());
ee4fba
+			}
ee4fba
+		    }
ee4fba
+		} else if (strncmp("TPOS", p, 4) == 0) {
ee4fba
+		    istringstream ins(value);
ee4fba
+		    int dnum;
ee4fba
+		    ins >> dnum;
ee4fba
+		    if (!ins.fail()) {
ee4fba
+			indexable.addValue(factory->discNumberField, dnum);
ee4fba
+			ins.ignore(10,'/');
ee4fba
+			int dcount;
ee4fba
+			ins >> dcount;
ee4fba
+			if (!ins.fail()) {
ee4fba
+			    ostringstream outs;
ee4fba
+			    outs << dcount;
ee4fba
+			    addStatement(indexable, albumUri, discCountPropertyName, outs.str());
ee4fba
+			}
ee4fba
+		    }
ee4fba
+		}
ee4fba
+	    }
ee4fba
+	    p += size + 10;
ee4fba
+	}
ee4fba
+    }
ee4fba
+    // parse MP3 frame header
ee4fba
+
ee4fba
+    int bitrateindex, samplerateindex;
ee4fba
+    int i;
ee4fba
+    for(i=0; (buf[i]=='\0') && (i
ee4fba
+    if (((unsigned char)buf[0+i] == 0xff) && (((unsigned char)buf[1+i]&0xfe) == 0xfa)
ee4fba
+      && ((bitrateindex = ((unsigned char)buf[2+i]>>4)) != 0xf)
ee4fba
+      && ((samplerateindex = (((unsigned char)buf[2+i]>>2)&3)) != 3 )) { // is this MP3?
ee4fba
+
ee4fba
+	indexable.addValue(factory->typeField, audioClassName);
ee4fba
+	// FIXME: no support for VBR :(
ee4fba
+	// ideas: compare bitrate from the frame with stream size/duration from ID3 tags
ee4fba
+	// check several consecutive frames to see if bitrate is different
ee4fba
+	// in neither case you can be sure to properly detected VBR :(
ee4fba
+	indexable.addValue(factory->bitrateField, bitrate[bitrateindex]);
ee4fba
+	indexable.addValue(factory->samplerateField, samplerate[samplerateindex]);
ee4fba
+	indexable.addValue(factory->codecField, "MP3");
ee4fba
+	indexable.addValue(factory->channelsField, ((buf[3+i]>>6) == 3 ? 1:2 ) );
ee4fba
+    }
ee4fba
+
ee4fba
+    // Parse ID3v1 tag
ee4fba
+
ee4fba
+    int64_t insize;
ee4fba
+    if ( (insize = in->size()) > (128+nread)) {
ee4fba
+
ee4fba
+      // read the tag and check signature
ee4fba
+	int64_t nskip = insize-128-nread;
ee4fba
+	if (nskip == in->skip(nskip))
ee4fba
+	if (in->read(buf, 128, 128)==128)
ee4fba
+	if (!strncmp("TAG", buf, 3)) {
ee4fba
+
ee4fba
+	    found_tag = true;
ee4fba
+	    
ee4fba
+	    std::string s;
ee4fba
+
ee4fba
+	    if (!found_title && extract_and_trim(buf, 3, 30, s)) {
ee4fba
+		indexable.addValue(factory->titleField, s);
ee4fba
+	    }
ee4fba
+	    if (!found_artist && extract_and_trim(buf, 33, 30, s)) {
ee4fba
+                const string performerUri = indexable.newAnonymousUri();
ee4fba
+                indexable.addValue(factory->performerField, performerUri);
ee4fba
+                indexable.addTriplet(performerUri, typePropertyName, contactClassName);
ee4fba
+                indexable.addTriplet(performerUri, fullnamePropertyName, s);
ee4fba
+            }
ee4fba
+	    if (!found_album && extract_and_trim(buf, 63, 30, s))
ee4fba
+		addStatement(indexable, albumUri, titlePropertyName, s);
ee4fba
+	    if (!found_year && extract_and_trim(buf, 93, 4, s))
ee4fba
+		indexable.addValue(factory->createdField, s);
ee4fba
+	    if (!found_comment && extract_and_trim(buf, 97, 30, s)) {
ee4fba
+		indexable.addValue(factory->commentField, s);
ee4fba
+	    }
ee4fba
+	    if (!found_track && !buf[125] && buf[126]) {
ee4fba
+		indexable.addValue(factory->trackNumberField, (int)(buf[126]));
ee4fba
+	    }
ee4fba
+	    if (!found_genre && (unsigned char)(buf[127]) < 148)
ee4fba
+		indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]);
ee4fba
+	}
ee4fba
+    }
ee4fba
+
ee4fba
+    if(!albumUri.empty()) {
ee4fba
+	indexable.addValue(factory->albumField, albumUri);
ee4fba
+	indexable.addTriplet(albumUri, typePropertyName, albumClassName);
ee4fba
+    }
ee4fba
+
ee4fba
+    if (found_tag)
ee4fba
+	indexable.addValue(factory->typeField, musicClassName);
ee4fba
+
ee4fba
+    return 0;
ee4fba
+}
ee4fba
-- 
ee4fba
1.7.10.4
ee4fba