|
|
ee4fba |
From 17e9cdbdee9b3b3cdccab416004a99547a7196a7 Mon Sep 17 00:00:00 2001
|
|
|
ee4fba |
From: Ignacio Serantes <kde@aynoa.net>
|
|
|
ee4fba |
Date: Sun, 17 Jun 2012 20:39:36 +0200
|
|
|
ee4fba |
Subject: [PATCH 6/8] Fix: non numeric genres in id3 v2 mp3 are ignored.
|
|
|
ee4fba |
REVIEW:105242.
|
|
|
ee4fba |
|
|
|
ee4fba |
---
|
|
|
ee4fba |
id3endanalyzer.cpp | 646 ++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
|
ee4fba |
1 file changed, 646 insertions(+)
|
|
|
ee4fba |
create mode 100644 id3endanalyzer.cpp
|
|
|
ee4fba |
|
|
|
ee4fba |
diff --git a/id3endanalyzer.cpp b/id3endanalyzer.cpp
|
|
|
ee4fba |
new file mode 100644
|
|
|
ee4fba |
index 0000000..677ece0
|
|
|
ee4fba |
--- /dev/null
|
|
|
ee4fba |
+++ b/id3endanalyzer.cpp
|
|
|
ee4fba |
@@ -0,0 +1,646 @@
|
|
|
ee4fba |
+/* This file is part of Strigi Desktop Search
|
|
|
ee4fba |
+ *
|
|
|
ee4fba |
+ * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
|
|
|
ee4fba |
+ * 2009 Evgeny Egorochkin <phreedom.stdin@gmail.com>
|
|
|
ee4fba |
+ *
|
|
|
ee4fba |
+ * This library is free software; you can redistribute it and/or
|
|
|
ee4fba |
+ * modify it under the terms of the GNU Library General Public
|
|
|
ee4fba |
+ * License as published by the Free Software Foundation; either
|
|
|
ee4fba |
+ * version 2 of the License, or (at your option) any later version.
|
|
|
ee4fba |
+ *
|
|
|
ee4fba |
+ * This library is distributed in the hope that it will be useful,
|
|
|
ee4fba |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
ee4fba |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
ee4fba |
+ * Library General Public License for more details.
|
|
|
ee4fba |
+ *
|
|
|
ee4fba |
+ * You should have received a copy of the GNU Library General Public License
|
|
|
ee4fba |
+ * along with this library; see the file COPYING.LIB. If not, write to
|
|
|
ee4fba |
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
|
ee4fba |
+ * Boston, MA 02110-1301, USA.
|
|
|
ee4fba |
+ */
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+#ifdef HAVE_CONFIG_H
|
|
|
ee4fba |
+# include "config.h"
|
|
|
ee4fba |
+#endif
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+#include "id3endanalyzer.h"
|
|
|
ee4fba |
+#include "analysisresult.h"
|
|
|
ee4fba |
+#include "../rdfnamespaces.h"
|
|
|
ee4fba |
+#include <strigi/strigiconfig.h>
|
|
|
ee4fba |
+#include <strigi/textutils.h>
|
|
|
ee4fba |
+#include <strigi/stringstream.h>
|
|
|
ee4fba |
+#include <iostream>
|
|
|
ee4fba |
+#include <sstream>
|
|
|
ee4fba |
+#include <cstring>
|
|
|
ee4fba |
+#include <cstdlib>
|
|
|
ee4fba |
+#include <iconv.h>
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+#ifdef ICONV_SECOND_ARGUMENT_IS_CONST
|
|
|
ee4fba |
+ #define ICONV_CONST const
|
|
|
ee4fba |
+#else
|
|
|
ee4fba |
+ #define ICONV_CONST
|
|
|
ee4fba |
+#endif
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+using namespace Strigi;
|
|
|
ee4fba |
+using namespace std;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+const string
|
|
|
ee4fba |
+ typePropertyName(
|
|
|
ee4fba |
+ RDF "type"),
|
|
|
ee4fba |
+ fullnamePropertyName(
|
|
|
ee4fba |
+ NCO "fullname"),
|
|
|
ee4fba |
+ titlePropertyName(
|
|
|
ee4fba |
+ NIE "title"),
|
|
|
ee4fba |
+ albumTrackCountPropertyName(
|
|
|
ee4fba |
+ NMM_DRAFT "albumTrackCount"),
|
|
|
ee4fba |
+ discNumberPropertyName(
|
|
|
ee4fba |
+ NMM_DRAFT "setNumber"),
|
|
|
ee4fba |
+ discCountPropertyName(
|
|
|
ee4fba |
+ NMM_DRAFT "setSize"),
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ musicClassName(
|
|
|
ee4fba |
+ NMM_DRAFT "MusicPiece"),
|
|
|
ee4fba |
+ audioClassName(
|
|
|
ee4fba |
+ NFO "Audio"),
|
|
|
ee4fba |
+ albumClassName(
|
|
|
ee4fba |
+ NMM_DRAFT "MusicAlbum"),
|
|
|
ee4fba |
+ contactClassName(
|
|
|
ee4fba |
+ NCO "Contact");
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+/*
|
|
|
ee4fba |
+ENCA autodetection of broken encodings. First, need to make sure it's going to be actually useful.
|
|
|
ee4fba |
+ID3v2.0
|
|
|
ee4fba |
+play counter:needs nepomuk resolution
|
|
|
ee4fba |
+replaygain
|
|
|
ee4fba |
++lyrics
|
|
|
ee4fba |
++Improve:
|
|
|
ee4fba |
+ creation date:
|
|
|
ee4fba |
+ language: support multiple
|
|
|
ee4fba |
+ Genre
|
|
|
ee4fba |
+ album art type handling
|
|
|
ee4fba |
+VBR detection
|
|
|
ee4fba |
+*/
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+static const string genres[148] = {
|
|
|
ee4fba |
+ "Blues",
|
|
|
ee4fba |
+ "Classic Rock",
|
|
|
ee4fba |
+ "Country",
|
|
|
ee4fba |
+ "Dance",
|
|
|
ee4fba |
+ "Disco",
|
|
|
ee4fba |
+ "Funk",
|
|
|
ee4fba |
+ "Grunge",
|
|
|
ee4fba |
+ "Hip-Hop",
|
|
|
ee4fba |
+ "Jazz",
|
|
|
ee4fba |
+ "Metal",
|
|
|
ee4fba |
+ "New Age",
|
|
|
ee4fba |
+ "Oldies",
|
|
|
ee4fba |
+ "Other",
|
|
|
ee4fba |
+ "Pop",
|
|
|
ee4fba |
+ "R&B",
|
|
|
ee4fba |
+ "Rap",
|
|
|
ee4fba |
+ "Reggae",
|
|
|
ee4fba |
+ "Rock",
|
|
|
ee4fba |
+ "Techno",
|
|
|
ee4fba |
+ "Industrial",
|
|
|
ee4fba |
+ "Alternative",
|
|
|
ee4fba |
+ "Ska",
|
|
|
ee4fba |
+ "Death Metal",
|
|
|
ee4fba |
+ "Pranks",
|
|
|
ee4fba |
+ "Soundtrack",
|
|
|
ee4fba |
+ "Euro-Techno",
|
|
|
ee4fba |
+ "Ambient",
|
|
|
ee4fba |
+ "Trip-Hop",
|
|
|
ee4fba |
+ "Vocal",
|
|
|
ee4fba |
+ "Jazz+Funk",
|
|
|
ee4fba |
+ "Fusion",
|
|
|
ee4fba |
+ "Trance",
|
|
|
ee4fba |
+ "Classical",
|
|
|
ee4fba |
+ "Instrumental",
|
|
|
ee4fba |
+ "Acid",
|
|
|
ee4fba |
+ "House",
|
|
|
ee4fba |
+ "Game",
|
|
|
ee4fba |
+ "Sound Clip",
|
|
|
ee4fba |
+ "Gospel",
|
|
|
ee4fba |
+ "Noise",
|
|
|
ee4fba |
+ "Alternative Rock",
|
|
|
ee4fba |
+ "Bass",
|
|
|
ee4fba |
+ "Soul",
|
|
|
ee4fba |
+ "Punk",
|
|
|
ee4fba |
+ "Space",
|
|
|
ee4fba |
+ "Meditative",
|
|
|
ee4fba |
+ "Instrumental Pop",
|
|
|
ee4fba |
+ "Instrumental Rock",
|
|
|
ee4fba |
+ "Ethnic",
|
|
|
ee4fba |
+ "Gothic",
|
|
|
ee4fba |
+ "Darkwave",
|
|
|
ee4fba |
+ "Techno-Industrial",
|
|
|
ee4fba |
+ "Electronic",
|
|
|
ee4fba |
+ "Pop-Folk",
|
|
|
ee4fba |
+ "Eurodance",
|
|
|
ee4fba |
+ "Dream",
|
|
|
ee4fba |
+ "Southern Rock",
|
|
|
ee4fba |
+ "Comedy",
|
|
|
ee4fba |
+ "Cult",
|
|
|
ee4fba |
+ "Gangsta",
|
|
|
ee4fba |
+ "Top 40",
|
|
|
ee4fba |
+ "Christian Rap",
|
|
|
ee4fba |
+ "Pop/Funk",
|
|
|
ee4fba |
+ "Jungle",
|
|
|
ee4fba |
+ "Native American",
|
|
|
ee4fba |
+ "Cabaret",
|
|
|
ee4fba |
+ "New Wave",
|
|
|
ee4fba |
+ "Psychedelic",
|
|
|
ee4fba |
+ "Rave",
|
|
|
ee4fba |
+ "Showtunes",
|
|
|
ee4fba |
+ "Trailer",
|
|
|
ee4fba |
+ "Lo-Fi",
|
|
|
ee4fba |
+ "Tribal",
|
|
|
ee4fba |
+ "Acid Punk",
|
|
|
ee4fba |
+ "Acid Jazz",
|
|
|
ee4fba |
+ "Polka",
|
|
|
ee4fba |
+ "Retro",
|
|
|
ee4fba |
+ "Musical",
|
|
|
ee4fba |
+ "Rock & Roll",
|
|
|
ee4fba |
+ "Hard Rock",
|
|
|
ee4fba |
+ "Folk",
|
|
|
ee4fba |
+ "Folk/Rock",
|
|
|
ee4fba |
+ "National Folk",
|
|
|
ee4fba |
+ "Swing",
|
|
|
ee4fba |
+ "Fusion",
|
|
|
ee4fba |
+ "Bebop",
|
|
|
ee4fba |
+ "Latin",
|
|
|
ee4fba |
+ "Revival",
|
|
|
ee4fba |
+ "Celtic",
|
|
|
ee4fba |
+ "Bluegrass",
|
|
|
ee4fba |
+ "Avantgarde",
|
|
|
ee4fba |
+ "Gothic Rock",
|
|
|
ee4fba |
+ "Progressive Rock",
|
|
|
ee4fba |
+ "Psychedelic Rock",
|
|
|
ee4fba |
+ "Symphonic Rock",
|
|
|
ee4fba |
+ "Slow Rock",
|
|
|
ee4fba |
+ "Big Band",
|
|
|
ee4fba |
+ "Chorus",
|
|
|
ee4fba |
+ "Easy Listening",
|
|
|
ee4fba |
+ "Acoustic",
|
|
|
ee4fba |
+ "Humour",
|
|
|
ee4fba |
+ "Speech",
|
|
|
ee4fba |
+ "Chanson",
|
|
|
ee4fba |
+ "Opera",
|
|
|
ee4fba |
+ "Chamber Music",
|
|
|
ee4fba |
+ "Sonata",
|
|
|
ee4fba |
+ "Symphony",
|
|
|
ee4fba |
+ "Booty Bass",
|
|
|
ee4fba |
+ "Primus",
|
|
|
ee4fba |
+ "Porn Groove",
|
|
|
ee4fba |
+ "Satire",
|
|
|
ee4fba |
+ "Slow Jam",
|
|
|
ee4fba |
+ "Club",
|
|
|
ee4fba |
+ "Tango",
|
|
|
ee4fba |
+ "Samba",
|
|
|
ee4fba |
+ "Folklore",
|
|
|
ee4fba |
+ "Ballad",
|
|
|
ee4fba |
+ "Power Ballad",
|
|
|
ee4fba |
+ "Rhythmic Soul",
|
|
|
ee4fba |
+ "Freestyle",
|
|
|
ee4fba |
+ "Duet",
|
|
|
ee4fba |
+ "Punk Rock",
|
|
|
ee4fba |
+ "Drum Solo",
|
|
|
ee4fba |
+ "A Cappella",
|
|
|
ee4fba |
+ "Euro-House",
|
|
|
ee4fba |
+ "Dance Hall",
|
|
|
ee4fba |
+ "Goa",
|
|
|
ee4fba |
+ "Drum & Bass",
|
|
|
ee4fba |
+ "Club-House",
|
|
|
ee4fba |
+ "Hardcore",
|
|
|
ee4fba |
+ "Terror",
|
|
|
ee4fba |
+ "Indie",
|
|
|
ee4fba |
+ "BritPop",
|
|
|
ee4fba |
+ "Negerpunk",
|
|
|
ee4fba |
+ "Polsk Punk",
|
|
|
ee4fba |
+ "Beat",
|
|
|
ee4fba |
+ "Christian Gangsta Rap",
|
|
|
ee4fba |
+ "Heavy Metal",
|
|
|
ee4fba |
+ "Black Metal",
|
|
|
ee4fba |
+ "Crossover",
|
|
|
ee4fba |
+ "Contemporary Christian",
|
|
|
ee4fba |
+ "Christian Rock",
|
|
|
ee4fba |
+ "Merengue",
|
|
|
ee4fba |
+ "Salsa",
|
|
|
ee4fba |
+ "Thrash Metal",
|
|
|
ee4fba |
+ "Anime",
|
|
|
ee4fba |
+ "Jpop",
|
|
|
ee4fba |
+ "Synthpop"
|
|
|
ee4fba |
+};
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+const uint32_t bitrate [15] = {0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000 };
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+const uint32_t samplerate[3] = {44100, 48000, 32000};
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+const char * encodings[5] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8", "UTF-16LE"};
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+#ifndef _GNU_SOURCE
|
|
|
ee4fba |
+size_t
|
|
|
ee4fba |
+strnlen(const char *s, size_t maxlen) {
|
|
|
ee4fba |
+ for(size_t i=0; i
|
|
|
ee4fba |
+ if (s[i]==0)
|
|
|
ee4fba |
+ return i;
|
|
|
ee4fba |
+ return maxlen;
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+#endif
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+class UTF8Convertor {
|
|
|
ee4fba |
+ private:
|
|
|
ee4fba |
+ iconv_t const conv;
|
|
|
ee4fba |
+ char *out;
|
|
|
ee4fba |
+ size_t capacity;
|
|
|
ee4fba |
+ public:
|
|
|
ee4fba |
+ UTF8Convertor(const char *encoding);
|
|
|
ee4fba |
+ const string convert(const char *data, size_t len);
|
|
|
ee4fba |
+ ~UTF8Convertor();
|
|
|
ee4fba |
+};
|
|
|
ee4fba |
+UTF8Convertor::UTF8Convertor(const char *encoding) :conv(iconv_open("UTF-8", encoding)), out(0), capacity(0) {
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+UTF8Convertor::~UTF8Convertor() {
|
|
|
ee4fba |
+ iconv_close(conv);
|
|
|
ee4fba |
+ if (out) free(out);
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+const string
|
|
|
ee4fba |
+UTF8Convertor::convert(const char *data, size_t len) {
|
|
|
ee4fba |
+ if (!len)
|
|
|
ee4fba |
+ return string();
|
|
|
ee4fba |
+ if ( capacity
|
|
|
ee4fba |
+ (capacity>10000 && capacity>len*8) ) {
|
|
|
ee4fba |
+ capacity = len*3;
|
|
|
ee4fba |
+ out = (char*)realloc(out, len*3);
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ char *result = out;
|
|
|
ee4fba |
+ size_t reslen = capacity;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ ICONV_CONST char *input = (char *)data;
|
|
|
ee4fba |
+ iconv(conv, &input, &len, &result, &reslen);
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ return string(out,capacity-reslen);
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+void
|
|
|
ee4fba |
+ID3EndAnalyzerFactory::registerFields(FieldRegister& r) {
|
|
|
ee4fba |
+ createdField = r.registerField(NIE "contentCreated");
|
|
|
ee4fba |
+ subjectField = r.registerField(NIE "subject");
|
|
|
ee4fba |
+ titleField = r.registerField(titlePropertyName);
|
|
|
ee4fba |
+ descriptionField = r.registerField(NIE "description");
|
|
|
ee4fba |
+ commentField = r.registerField(NIE "comment");
|
|
|
ee4fba |
+ albumField = r.registerField(NMM_DRAFT "musicAlbum");
|
|
|
ee4fba |
+ genreField = r.registerField(NMM_DRAFT "genre");
|
|
|
ee4fba |
+ composerField = r.registerField(NMM_DRAFT "composer");
|
|
|
ee4fba |
+ performerField = r.registerField(NMM_DRAFT "performer");
|
|
|
ee4fba |
+ lyricistField = r.registerField(NMM_DRAFT "lyricist");
|
|
|
ee4fba |
+ publisherField = r.registerField(NCO "publisher");
|
|
|
ee4fba |
+ languageField = r.registerField(NIE "language");
|
|
|
ee4fba |
+ copyrightField = r.registerField(NIE "copyright");
|
|
|
ee4fba |
+ trackNumberField = r.registerField(NMM_DRAFT "trackNumber");
|
|
|
ee4fba |
+ discNumberField = r.registerField(discNumberPropertyName);
|
|
|
ee4fba |
+ durationField = r.registerField(NFO "duration");
|
|
|
ee4fba |
+ typeField = r.typeField;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ bitrateField = r.registerField(NFO "averageBitrate");
|
|
|
ee4fba |
+ samplerateField = r.registerField(NFO "sampleRate");
|
|
|
ee4fba |
+ codecField = r.registerField(NFO "codec");
|
|
|
ee4fba |
+ channelsField = r.registerField(NFO "channels");
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+inline
|
|
|
ee4fba |
+void
|
|
|
ee4fba |
+addStatement(AnalysisResult &indexable, string& subject, const string& predicate, const string& object) {
|
|
|
ee4fba |
+ if (subject.empty())
|
|
|
ee4fba |
+ subject = indexable.newAnonymousUri();
|
|
|
ee4fba |
+ indexable.addTriplet(subject, predicate, object);
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+inline
|
|
|
ee4fba |
+int32_t readAsyncSize(const unsigned char* b) {
|
|
|
ee4fba |
+ return (((int32_t)b[0])<<21) + (((int32_t)b[1])<<14)
|
|
|
ee4fba |
+ + (((int32_t)b[2])<<7) + ((int32_t)b[3]);
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+int32_t
|
|
|
ee4fba |
+readSize(const unsigned char* b, bool async) {
|
|
|
ee4fba |
+ const signed char* c = (const signed char*)b;
|
|
|
ee4fba |
+ if (async) {
|
|
|
ee4fba |
+ if (c[0] < 0 || c[1] < 0 || c[2] < 0 || c[3] < 0)
|
|
|
ee4fba |
+ return -1;
|
|
|
ee4fba |
+ return readAsyncSize(b);
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ return readBigEndianInt32(b);
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+bool
|
|
|
ee4fba |
+ID3EndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
|
|
|
ee4fba |
+ const unsigned char* usbuf = (const unsigned char*)header;
|
|
|
ee4fba |
+ int32_t i;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ for(i=0; (header[i] == '\0') && (i
|
|
|
ee4fba |
+ return (headersize>=6+i)
|
|
|
ee4fba |
+ && (
|
|
|
ee4fba |
+ (strncmp("ID3", header+i, 3) == 0 // check that it's ID3
|
|
|
ee4fba |
+ && usbuf[3+i] <= 4 // only handle version <= 4
|
|
|
ee4fba |
+ && (usbuf[5+i]&~0x80) == 0) // we're too dumb too handle other flags
|
|
|
ee4fba |
+ ||
|
|
|
ee4fba |
+ ((unsigned char)header[0+i] == 0xff && ((unsigned char)header[1+i]&0xfe) == 0xfa
|
|
|
ee4fba |
+ && (unsigned char)header[2+i]>>4 != 0xf // MP3 frame header is ok too
|
|
|
ee4fba |
+ && (((unsigned char)header[2+i]>>2)&3) != 3)
|
|
|
ee4fba |
+ );
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+static void trim(string& s,const string& drop = " ")
|
|
|
ee4fba |
+{
|
|
|
ee4fba |
+ string r = s.erase(s.find_last_not_of(drop)+1);
|
|
|
ee4fba |
+ r.erase(0, r.find_first_not_of(drop));
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+static bool extract_and_trim(const char* buf, int offset, int length, string& s)
|
|
|
ee4fba |
+{
|
|
|
ee4fba |
+ // We're extracting here the ID3v1 tags and doing some sanity checks:
|
|
|
ee4fba |
+ // 1) Strip of all leading and prefixed spaces
|
|
|
ee4fba |
+ // 2) Test if string contains at least something
|
|
|
ee4fba |
+ if (!buf[offset])
|
|
|
ee4fba |
+ return false;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ s = string(buf + offset, strnlen(buf + offset, length));
|
|
|
ee4fba |
+ trim(s);
|
|
|
ee4fba |
+ // Return true if the extracted value is not empty (read: contains something)
|
|
|
ee4fba |
+ return !s.empty();
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+signed char
|
|
|
ee4fba |
+ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) {
|
|
|
ee4fba |
+ const int max_padding = 1000;
|
|
|
ee4fba |
+ if(!in)
|
|
|
ee4fba |
+ return -1;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ bool found_title = false, found_artist = false,
|
|
|
ee4fba |
+ found_album = false, found_comment = false,
|
|
|
ee4fba |
+ found_year = false, found_track = false,
|
|
|
ee4fba |
+ found_genre = false, found_tag = false;
|
|
|
ee4fba |
+ string albumUri;
|
|
|
ee4fba |
+ char albumArtNum = '\0';
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ // read 10 byte header
|
|
|
ee4fba |
+ const char* buf;
|
|
|
ee4fba |
+ int32_t nread = in->read(buf, 10+max_padding, 10+max_padding);
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ // parse ID3v2* tag
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if (nread == 10+max_padding && strncmp("ID3", buf, 3) == 0) { // check for ID3 header
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ bool async = buf[3] >= 4;
|
|
|
ee4fba |
+ bool unsync = (buf[5] & 0x80)!=0;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ // calculate size from 4 syncsafe bytes
|
|
|
ee4fba |
+ int32_t size = readAsyncSize((unsigned char*)buf+6);
|
|
|
ee4fba |
+ if (size < 0 || size > 5000000)
|
|
|
ee4fba |
+ return -1;
|
|
|
ee4fba |
+ size += 10+4+max_padding; // add the size of the ID3 header, MP3 frame header and possible padding generated by LAME(workaround)
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ // read the entire tag
|
|
|
ee4fba |
+ in->reset(0);
|
|
|
ee4fba |
+ nread = in->read(buf, size, size);
|
|
|
ee4fba |
+ if (nread != size)
|
|
|
ee4fba |
+ return -1;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ found_tag = true;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ const char* p = buf + 10;
|
|
|
ee4fba |
+ buf += size-4-max_padding;
|
|
|
ee4fba |
+ while (p < buf && *p) {
|
|
|
ee4fba |
+ size = readSize((unsigned char*)p+4, async);
|
|
|
ee4fba |
+ if (size <= 0 || size > (buf-p)-10) {
|
|
|
ee4fba |
+ //cerr << "size < 0: " << size << endl;
|
|
|
ee4fba |
+ break;
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ string value;
|
|
|
ee4fba |
+ uint8_t enc = p[10];
|
|
|
ee4fba |
+ const char *encoding = enc>4 ? encodings[0] : encodings[enc] ;
|
|
|
ee4fba |
+ UTF8Convertor conv(encoding);
|
|
|
ee4fba |
+ const char *decoded_value;
|
|
|
ee4fba |
+ int32_t decoded_value_size;
|
|
|
ee4fba |
+ string deunsyncbuf;
|
|
|
ee4fba |
+ if (unsync) {
|
|
|
ee4fba |
+ deunsyncbuf.reserve(size-1);
|
|
|
ee4fba |
+ for(int32_t i = 0; i
|
|
|
ee4fba |
+ if ( (i==0) || (p[11+i]!=0) || (p[10+i]!=0xff) )
|
|
|
ee4fba |
+ deunsyncbuf.push_back(p[11+i]);
|
|
|
ee4fba |
+ decoded_value = deunsyncbuf.c_str();
|
|
|
ee4fba |
+ decoded_value_size = deunsyncbuf.length();
|
|
|
ee4fba |
+ } else {
|
|
|
ee4fba |
+ decoded_value = p+11;
|
|
|
ee4fba |
+ decoded_value_size = size-1;
|
|
|
ee4fba |
+ };
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if (strncmp("APIC", p, 4) == 0) {
|
|
|
ee4fba |
+ size_t mimelen = strnlen(decoded_value, decoded_value_size);
|
|
|
ee4fba |
+ if ((int32_t)mimelen < decoded_value_size-3) {
|
|
|
ee4fba |
+ const char *desc = decoded_value+mimelen+1+1;
|
|
|
ee4fba |
+// uint8_t pictype = p[11+mimelen+1];
|
|
|
ee4fba |
+ size_t desclen = strnlen(desc,decoded_value_size-mimelen-2-1);
|
|
|
ee4fba |
+ const char *content = desc + desclen + 1 + (enc == 0 || enc == 3 ? 0:1) ;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if(content
|
|
|
ee4fba |
+ StringInputStream picstream(content,
|
|
|
ee4fba |
+ (uint32_t)(decoded_value+decoded_value_size-content), false);
|
|
|
ee4fba |
+ string picname;
|
|
|
ee4fba |
+ picname = (char)('0'+albumArtNum++);
|
|
|
ee4fba |
+ indexable.indexChild(picname, indexable.mTime(), &picstream);
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if (desclen && indexable.child()) {
|
|
|
ee4fba |
+ if (enc == 0 || enc == 3) {
|
|
|
ee4fba |
+ indexable.child()->addValue(factory->descriptionField, string(desc, desclen) );
|
|
|
ee4fba |
+ } else {
|
|
|
ee4fba |
+ indexable.child()->addValue(factory->descriptionField, conv.convert(desc, desclen) );
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ indexable.finishIndexChild();
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if (enc == 0 || enc == 3) {
|
|
|
ee4fba |
+ value = string(decoded_value, strnlen(decoded_value, decoded_value_size));
|
|
|
ee4fba |
+ } else {
|
|
|
ee4fba |
+ value = conv.convert(decoded_value, decoded_value_size); // FIXME: add similar workaround
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if (!value.empty()) {
|
|
|
ee4fba |
+ if (strncmp("TIT1", p, 4) == 0) {
|
|
|
ee4fba |
+ indexable.addValue(factory->subjectField, value);
|
|
|
ee4fba |
+ } else if (strncmp("TIT2", p, 4) == 0) {
|
|
|
ee4fba |
+ indexable.addValue(factory->titleField, value);
|
|
|
ee4fba |
+ found_title = true;
|
|
|
ee4fba |
+ } else if (strncmp("TIT3", p, 4) == 0) {
|
|
|
ee4fba |
+ indexable.addValue(factory->descriptionField, value);
|
|
|
ee4fba |
+ } else if (strncmp("TLAN", p, 4) == 0) {
|
|
|
ee4fba |
+ indexable.addValue(factory->languageField, value);
|
|
|
ee4fba |
+ } else if (strncmp("TCOP", p, 4) == 0) {
|
|
|
ee4fba |
+ indexable.addValue(factory->copyrightField, value);
|
|
|
ee4fba |
+ } else if ((strncmp("TDRL", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TDAT", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TYER", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TDRC", p, 4) == 0)) {
|
|
|
ee4fba |
+ indexable.addValue(factory->createdField, value);
|
|
|
ee4fba |
+ found_year = true;
|
|
|
ee4fba |
+ } else if ((strncmp("TPE1", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TPE2", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TPE3", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TPE4", p, 4) == 0)) {
|
|
|
ee4fba |
+ string performerUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ indexable.addValue(factory->performerField, performerUri);
|
|
|
ee4fba |
+ indexable.addTriplet(performerUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
+ indexable.addTriplet(performerUri, fullnamePropertyName, value);
|
|
|
ee4fba |
+ found_artist = true;
|
|
|
ee4fba |
+ } else if ((strncmp("TPUB", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TENC", p, 4) == 0)) {
|
|
|
ee4fba |
+ string publisherUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ indexable.addValue(factory->publisherField, publisherUri);
|
|
|
ee4fba |
+ indexable.addTriplet(publisherUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
+ indexable.addTriplet(publisherUri, fullnamePropertyName, value);
|
|
|
ee4fba |
+ } else if ((strncmp("TALB", p, 4) == 0) ||
|
|
|
ee4fba |
+ (strncmp("TOAL", p, 4) == 0)) {
|
|
|
ee4fba |
+ addStatement(indexable, albumUri, titlePropertyName, value);
|
|
|
ee4fba |
+ found_album = true;
|
|
|
ee4fba |
+ } else if (strncmp("TCON", p, 4) == 0) {
|
|
|
ee4fba |
+ // The Genre is stored as (number)
|
|
|
ee4fba |
+ if( value[0] == '(' && value[value.length()-1] == ')' ) {
|
|
|
ee4fba |
+ //vHanda: Maybe one should check if all the characters in between are digits
|
|
|
ee4fba |
+ int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
|
|
|
ee4fba |
+ indexable.addValue(factory->genreField, genres[ genreIndex ]);
|
|
|
ee4fba |
+ found_genre = true;
|
|
|
ee4fba |
+ } else {
|
|
|
ee4fba |
+ // We must not forget that genre could be a string.
|
|
|
ee4fba |
+ if (!value.empty()) {
|
|
|
ee4fba |
+ indexable.addValue(factory->genreField, value);
|
|
|
ee4fba |
+ found_genre = true;
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ } else if (strncmp("TLEN", p, 4) == 0) {
|
|
|
ee4fba |
+ indexable.addValue(factory->durationField, value);
|
|
|
ee4fba |
+ } else if (strncmp("TEXT", p, 4) == 0) {
|
|
|
ee4fba |
+ string lyricistUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ indexable.addValue(factory->lyricistField, lyricistUri);
|
|
|
ee4fba |
+ indexable.addTriplet(lyricistUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
+ indexable.addTriplet(lyricistUri, fullnamePropertyName, value);
|
|
|
ee4fba |
+ } else if (strncmp("TCOM", p, 4) == 0) {
|
|
|
ee4fba |
+ string composerUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ indexable.addValue(factory->composerField, composerUri);
|
|
|
ee4fba |
+ indexable.addTriplet(composerUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
+ indexable.addTriplet(composerUri, fullnamePropertyName, value);
|
|
|
ee4fba |
+ } else if (strncmp("TRCK", p, 4) == 0) {
|
|
|
ee4fba |
+ istringstream ins(value);
|
|
|
ee4fba |
+ int tnum;
|
|
|
ee4fba |
+ ins >> tnum;
|
|
|
ee4fba |
+ if (!ins.fail()) {
|
|
|
ee4fba |
+ indexable.addValue(factory->trackNumberField, tnum);
|
|
|
ee4fba |
+ found_track = true;
|
|
|
ee4fba |
+ ins.ignore(10,'/');
|
|
|
ee4fba |
+ int tcount;
|
|
|
ee4fba |
+ ins >> tcount;
|
|
|
ee4fba |
+ if (!ins.fail()) {
|
|
|
ee4fba |
+ ostringstream outs;
|
|
|
ee4fba |
+ outs << tcount;
|
|
|
ee4fba |
+ addStatement(indexable, albumUri, albumTrackCountPropertyName, outs.str());
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ } else if (strncmp("TPOS", p, 4) == 0) {
|
|
|
ee4fba |
+ istringstream ins(value);
|
|
|
ee4fba |
+ int dnum;
|
|
|
ee4fba |
+ ins >> dnum;
|
|
|
ee4fba |
+ if (!ins.fail()) {
|
|
|
ee4fba |
+ indexable.addValue(factory->discNumberField, dnum);
|
|
|
ee4fba |
+ ins.ignore(10,'/');
|
|
|
ee4fba |
+ int dcount;
|
|
|
ee4fba |
+ ins >> dcount;
|
|
|
ee4fba |
+ if (!ins.fail()) {
|
|
|
ee4fba |
+ ostringstream outs;
|
|
|
ee4fba |
+ outs << dcount;
|
|
|
ee4fba |
+ addStatement(indexable, albumUri, discCountPropertyName, outs.str());
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ p += size + 10;
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ // parse MP3 frame header
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ int bitrateindex, samplerateindex;
|
|
|
ee4fba |
+ int i;
|
|
|
ee4fba |
+ for(i=0; (buf[i]=='\0') && (i
|
|
|
ee4fba |
+ if (((unsigned char)buf[0+i] == 0xff) && (((unsigned char)buf[1+i]&0xfe) == 0xfa)
|
|
|
ee4fba |
+ && ((bitrateindex = ((unsigned char)buf[2+i]>>4)) != 0xf)
|
|
|
ee4fba |
+ && ((samplerateindex = (((unsigned char)buf[2+i]>>2)&3)) != 3 )) { // is this MP3?
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ indexable.addValue(factory->typeField, audioClassName);
|
|
|
ee4fba |
+ // FIXME: no support for VBR :(
|
|
|
ee4fba |
+ // ideas: compare bitrate from the frame with stream size/duration from ID3 tags
|
|
|
ee4fba |
+ // check several consecutive frames to see if bitrate is different
|
|
|
ee4fba |
+ // in neither case you can be sure to properly detected VBR :(
|
|
|
ee4fba |
+ indexable.addValue(factory->bitrateField, bitrate[bitrateindex]);
|
|
|
ee4fba |
+ indexable.addValue(factory->samplerateField, samplerate[samplerateindex]);
|
|
|
ee4fba |
+ indexable.addValue(factory->codecField, "MP3");
|
|
|
ee4fba |
+ indexable.addValue(factory->channelsField, ((buf[3+i]>>6) == 3 ? 1:2 ) );
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ // Parse ID3v1 tag
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ int64_t insize;
|
|
|
ee4fba |
+ if ( (insize = in->size()) > (128+nread)) {
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ // read the tag and check signature
|
|
|
ee4fba |
+ int64_t nskip = insize-128-nread;
|
|
|
ee4fba |
+ if (nskip == in->skip(nskip))
|
|
|
ee4fba |
+ if (in->read(buf, 128, 128)==128)
|
|
|
ee4fba |
+ if (!strncmp("TAG", buf, 3)) {
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ found_tag = true;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ std::string s;
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if (!found_title && extract_and_trim(buf, 3, 30, s)) {
|
|
|
ee4fba |
+ indexable.addValue(factory->titleField, s);
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ if (!found_artist && extract_and_trim(buf, 33, 30, s)) {
|
|
|
ee4fba |
+ const string performerUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
+ indexable.addValue(factory->performerField, performerUri);
|
|
|
ee4fba |
+ indexable.addTriplet(performerUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
+ indexable.addTriplet(performerUri, fullnamePropertyName, s);
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ if (!found_album && extract_and_trim(buf, 63, 30, s))
|
|
|
ee4fba |
+ addStatement(indexable, albumUri, titlePropertyName, s);
|
|
|
ee4fba |
+ if (!found_year && extract_and_trim(buf, 93, 4, s))
|
|
|
ee4fba |
+ indexable.addValue(factory->createdField, s);
|
|
|
ee4fba |
+ if (!found_comment && extract_and_trim(buf, 97, 30, s)) {
|
|
|
ee4fba |
+ indexable.addValue(factory->commentField, s);
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ if (!found_track && !buf[125] && buf[126]) {
|
|
|
ee4fba |
+ indexable.addValue(factory->trackNumberField, (int)(buf[126]));
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ if (!found_genre && (unsigned char)(buf[127]) < 148)
|
|
|
ee4fba |
+ indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]);
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if(!albumUri.empty()) {
|
|
|
ee4fba |
+ indexable.addValue(factory->albumField, albumUri);
|
|
|
ee4fba |
+ indexable.addTriplet(albumUri, typePropertyName, albumClassName);
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ if (found_tag)
|
|
|
ee4fba |
+ indexable.addValue(factory->typeField, musicClassName);
|
|
|
ee4fba |
+
|
|
|
ee4fba |
+ return 0;
|
|
|
ee4fba |
+}
|
|
|
ee4fba |
--
|
|
|
ee4fba |
1.7.10.4
|
|
|
ee4fba |
|