|
|
ee4fba |
From 907162391395412c058d7339c4f84533ef92023d Mon Sep 17 00:00:00 2001
|
|
|
ee4fba |
From: Ignacio Serantes <kde@aynoa.net>
|
|
|
ee4fba |
Date: Sun, 17 Jun 2012 21:38:31 +0200
|
|
|
ee4fba |
Subject: [PATCH 7/8] Opps! Rmoving a wrong commited file,
|
|
|
ee4fba |
"id3endanalyzer.cpp", and updating the right one
|
|
|
ee4fba |
"lib/endanalyzers/id3endanalyzer.cpp".
|
|
|
ee4fba |
|
|
|
ee4fba |
---
|
|
|
ee4fba |
id3endanalyzer.cpp | 646 -----------------------------------
|
|
|
ee4fba |
lib/endanalyzers/id3endanalyzer.cpp | 20 +-
|
|
|
ee4fba |
2 files changed, 13 insertions(+), 653 deletions(-)
|
|
|
ee4fba |
delete mode 100644 id3endanalyzer.cpp
|
|
|
ee4fba |
|
|
|
ee4fba |
diff --git a/id3endanalyzer.cpp b/id3endanalyzer.cpp
|
|
|
ee4fba |
deleted file mode 100644
|
|
|
ee4fba |
index 677ece0..0000000
|
|
|
ee4fba |
--- a/id3endanalyzer.cpp
|
|
|
ee4fba |
+++ /dev/null
|
|
|
ee4fba |
@@ -1,646 +0,0 @@
|
|
|
ee4fba |
-/* This file is part of Strigi Desktop Search
|
|
|
ee4fba |
- *
|
|
|
ee4fba |
- * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
|
|
|
ee4fba |
- * 2009 Evgeny Egorochkin <phreedom.stdin@gmail.com>
|
|
|
ee4fba |
- *
|
|
|
ee4fba |
- * This library is free software; you can redistribute it and/or
|
|
|
ee4fba |
- * modify it under the terms of the GNU Library General Public
|
|
|
ee4fba |
- * License as published by the Free Software Foundation; either
|
|
|
ee4fba |
- * version 2 of the License, or (at your option) any later version.
|
|
|
ee4fba |
- *
|
|
|
ee4fba |
- * This library is distributed in the hope that it will be useful,
|
|
|
ee4fba |
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
ee4fba |
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
ee4fba |
- * Library General Public License for more details.
|
|
|
ee4fba |
- *
|
|
|
ee4fba |
- * You should have received a copy of the GNU Library General Public License
|
|
|
ee4fba |
- * along with this library; see the file COPYING.LIB. If not, write to
|
|
|
ee4fba |
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
|
ee4fba |
- * Boston, MA 02110-1301, USA.
|
|
|
ee4fba |
- */
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-#ifdef HAVE_CONFIG_H
|
|
|
ee4fba |
-# include "config.h"
|
|
|
ee4fba |
-#endif
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-#include "id3endanalyzer.h"
|
|
|
ee4fba |
-#include "analysisresult.h"
|
|
|
ee4fba |
-#include "../rdfnamespaces.h"
|
|
|
ee4fba |
-#include <strigi/strigiconfig.h>
|
|
|
ee4fba |
-#include <strigi/textutils.h>
|
|
|
ee4fba |
-#include <strigi/stringstream.h>
|
|
|
ee4fba |
-#include <iostream>
|
|
|
ee4fba |
-#include <sstream>
|
|
|
ee4fba |
-#include <cstring>
|
|
|
ee4fba |
-#include <cstdlib>
|
|
|
ee4fba |
-#include <iconv.h>
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-#ifdef ICONV_SECOND_ARGUMENT_IS_CONST
|
|
|
ee4fba |
- #define ICONV_CONST const
|
|
|
ee4fba |
-#else
|
|
|
ee4fba |
- #define ICONV_CONST
|
|
|
ee4fba |
-#endif
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-using namespace Strigi;
|
|
|
ee4fba |
-using namespace std;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-const string
|
|
|
ee4fba |
- typePropertyName(
|
|
|
ee4fba |
- RDF "type"),
|
|
|
ee4fba |
- fullnamePropertyName(
|
|
|
ee4fba |
- NCO "fullname"),
|
|
|
ee4fba |
- titlePropertyName(
|
|
|
ee4fba |
- NIE "title"),
|
|
|
ee4fba |
- albumTrackCountPropertyName(
|
|
|
ee4fba |
- NMM_DRAFT "albumTrackCount"),
|
|
|
ee4fba |
- discNumberPropertyName(
|
|
|
ee4fba |
- NMM_DRAFT "setNumber"),
|
|
|
ee4fba |
- discCountPropertyName(
|
|
|
ee4fba |
- NMM_DRAFT "setSize"),
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- musicClassName(
|
|
|
ee4fba |
- NMM_DRAFT "MusicPiece"),
|
|
|
ee4fba |
- audioClassName(
|
|
|
ee4fba |
- NFO "Audio"),
|
|
|
ee4fba |
- albumClassName(
|
|
|
ee4fba |
- NMM_DRAFT "MusicAlbum"),
|
|
|
ee4fba |
- contactClassName(
|
|
|
ee4fba |
- NCO "Contact");
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-/*
|
|
|
ee4fba |
-ENCA autodetection of broken encodings. First, need to make sure it's going to be actually useful.
|
|
|
ee4fba |
-ID3v2.0
|
|
|
ee4fba |
-play counter:needs nepomuk resolution
|
|
|
ee4fba |
-replaygain
|
|
|
ee4fba |
-+lyrics
|
|
|
ee4fba |
-+Improve:
|
|
|
ee4fba |
- creation date:
|
|
|
ee4fba |
- language: support multiple
|
|
|
ee4fba |
- Genre
|
|
|
ee4fba |
- album art type handling
|
|
|
ee4fba |
-VBR detection
|
|
|
ee4fba |
-*/
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-static const string genres[148] = {
|
|
|
ee4fba |
- "Blues",
|
|
|
ee4fba |
- "Classic Rock",
|
|
|
ee4fba |
- "Country",
|
|
|
ee4fba |
- "Dance",
|
|
|
ee4fba |
- "Disco",
|
|
|
ee4fba |
- "Funk",
|
|
|
ee4fba |
- "Grunge",
|
|
|
ee4fba |
- "Hip-Hop",
|
|
|
ee4fba |
- "Jazz",
|
|
|
ee4fba |
- "Metal",
|
|
|
ee4fba |
- "New Age",
|
|
|
ee4fba |
- "Oldies",
|
|
|
ee4fba |
- "Other",
|
|
|
ee4fba |
- "Pop",
|
|
|
ee4fba |
- "R&B",
|
|
|
ee4fba |
- "Rap",
|
|
|
ee4fba |
- "Reggae",
|
|
|
ee4fba |
- "Rock",
|
|
|
ee4fba |
- "Techno",
|
|
|
ee4fba |
- "Industrial",
|
|
|
ee4fba |
- "Alternative",
|
|
|
ee4fba |
- "Ska",
|
|
|
ee4fba |
- "Death Metal",
|
|
|
ee4fba |
- "Pranks",
|
|
|
ee4fba |
- "Soundtrack",
|
|
|
ee4fba |
- "Euro-Techno",
|
|
|
ee4fba |
- "Ambient",
|
|
|
ee4fba |
- "Trip-Hop",
|
|
|
ee4fba |
- "Vocal",
|
|
|
ee4fba |
- "Jazz+Funk",
|
|
|
ee4fba |
- "Fusion",
|
|
|
ee4fba |
- "Trance",
|
|
|
ee4fba |
- "Classical",
|
|
|
ee4fba |
- "Instrumental",
|
|
|
ee4fba |
- "Acid",
|
|
|
ee4fba |
- "House",
|
|
|
ee4fba |
- "Game",
|
|
|
ee4fba |
- "Sound Clip",
|
|
|
ee4fba |
- "Gospel",
|
|
|
ee4fba |
- "Noise",
|
|
|
ee4fba |
- "Alternative Rock",
|
|
|
ee4fba |
- "Bass",
|
|
|
ee4fba |
- "Soul",
|
|
|
ee4fba |
- "Punk",
|
|
|
ee4fba |
- "Space",
|
|
|
ee4fba |
- "Meditative",
|
|
|
ee4fba |
- "Instrumental Pop",
|
|
|
ee4fba |
- "Instrumental Rock",
|
|
|
ee4fba |
- "Ethnic",
|
|
|
ee4fba |
- "Gothic",
|
|
|
ee4fba |
- "Darkwave",
|
|
|
ee4fba |
- "Techno-Industrial",
|
|
|
ee4fba |
- "Electronic",
|
|
|
ee4fba |
- "Pop-Folk",
|
|
|
ee4fba |
- "Eurodance",
|
|
|
ee4fba |
- "Dream",
|
|
|
ee4fba |
- "Southern Rock",
|
|
|
ee4fba |
- "Comedy",
|
|
|
ee4fba |
- "Cult",
|
|
|
ee4fba |
- "Gangsta",
|
|
|
ee4fba |
- "Top 40",
|
|
|
ee4fba |
- "Christian Rap",
|
|
|
ee4fba |
- "Pop/Funk",
|
|
|
ee4fba |
- "Jungle",
|
|
|
ee4fba |
- "Native American",
|
|
|
ee4fba |
- "Cabaret",
|
|
|
ee4fba |
- "New Wave",
|
|
|
ee4fba |
- "Psychedelic",
|
|
|
ee4fba |
- "Rave",
|
|
|
ee4fba |
- "Showtunes",
|
|
|
ee4fba |
- "Trailer",
|
|
|
ee4fba |
- "Lo-Fi",
|
|
|
ee4fba |
- "Tribal",
|
|
|
ee4fba |
- "Acid Punk",
|
|
|
ee4fba |
- "Acid Jazz",
|
|
|
ee4fba |
- "Polka",
|
|
|
ee4fba |
- "Retro",
|
|
|
ee4fba |
- "Musical",
|
|
|
ee4fba |
- "Rock & Roll",
|
|
|
ee4fba |
- "Hard Rock",
|
|
|
ee4fba |
- "Folk",
|
|
|
ee4fba |
- "Folk/Rock",
|
|
|
ee4fba |
- "National Folk",
|
|
|
ee4fba |
- "Swing",
|
|
|
ee4fba |
- "Fusion",
|
|
|
ee4fba |
- "Bebop",
|
|
|
ee4fba |
- "Latin",
|
|
|
ee4fba |
- "Revival",
|
|
|
ee4fba |
- "Celtic",
|
|
|
ee4fba |
- "Bluegrass",
|
|
|
ee4fba |
- "Avantgarde",
|
|
|
ee4fba |
- "Gothic Rock",
|
|
|
ee4fba |
- "Progressive Rock",
|
|
|
ee4fba |
- "Psychedelic Rock",
|
|
|
ee4fba |
- "Symphonic Rock",
|
|
|
ee4fba |
- "Slow Rock",
|
|
|
ee4fba |
- "Big Band",
|
|
|
ee4fba |
- "Chorus",
|
|
|
ee4fba |
- "Easy Listening",
|
|
|
ee4fba |
- "Acoustic",
|
|
|
ee4fba |
- "Humour",
|
|
|
ee4fba |
- "Speech",
|
|
|
ee4fba |
- "Chanson",
|
|
|
ee4fba |
- "Opera",
|
|
|
ee4fba |
- "Chamber Music",
|
|
|
ee4fba |
- "Sonata",
|
|
|
ee4fba |
- "Symphony",
|
|
|
ee4fba |
- "Booty Bass",
|
|
|
ee4fba |
- "Primus",
|
|
|
ee4fba |
- "Porn Groove",
|
|
|
ee4fba |
- "Satire",
|
|
|
ee4fba |
- "Slow Jam",
|
|
|
ee4fba |
- "Club",
|
|
|
ee4fba |
- "Tango",
|
|
|
ee4fba |
- "Samba",
|
|
|
ee4fba |
- "Folklore",
|
|
|
ee4fba |
- "Ballad",
|
|
|
ee4fba |
- "Power Ballad",
|
|
|
ee4fba |
- "Rhythmic Soul",
|
|
|
ee4fba |
- "Freestyle",
|
|
|
ee4fba |
- "Duet",
|
|
|
ee4fba |
- "Punk Rock",
|
|
|
ee4fba |
- "Drum Solo",
|
|
|
ee4fba |
- "A Cappella",
|
|
|
ee4fba |
- "Euro-House",
|
|
|
ee4fba |
- "Dance Hall",
|
|
|
ee4fba |
- "Goa",
|
|
|
ee4fba |
- "Drum & Bass",
|
|
|
ee4fba |
- "Club-House",
|
|
|
ee4fba |
- "Hardcore",
|
|
|
ee4fba |
- "Terror",
|
|
|
ee4fba |
- "Indie",
|
|
|
ee4fba |
- "BritPop",
|
|
|
ee4fba |
- "Negerpunk",
|
|
|
ee4fba |
- "Polsk Punk",
|
|
|
ee4fba |
- "Beat",
|
|
|
ee4fba |
- "Christian Gangsta Rap",
|
|
|
ee4fba |
- "Heavy Metal",
|
|
|
ee4fba |
- "Black Metal",
|
|
|
ee4fba |
- "Crossover",
|
|
|
ee4fba |
- "Contemporary Christian",
|
|
|
ee4fba |
- "Christian Rock",
|
|
|
ee4fba |
- "Merengue",
|
|
|
ee4fba |
- "Salsa",
|
|
|
ee4fba |
- "Thrash Metal",
|
|
|
ee4fba |
- "Anime",
|
|
|
ee4fba |
- "Jpop",
|
|
|
ee4fba |
- "Synthpop"
|
|
|
ee4fba |
-};
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-const uint32_t bitrate [15] = {0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000 };
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-const uint32_t samplerate[3] = {44100, 48000, 32000};
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-const char * encodings[5] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8", "UTF-16LE"};
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-#ifndef _GNU_SOURCE
|
|
|
ee4fba |
-size_t
|
|
|
ee4fba |
-strnlen(const char *s, size_t maxlen) {
|
|
|
ee4fba |
- for(size_t i=0; i
|
|
|
ee4fba |
- if (s[i]==0)
|
|
|
ee4fba |
- return i;
|
|
|
ee4fba |
- return maxlen;
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-#endif
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-class UTF8Convertor {
|
|
|
ee4fba |
- private:
|
|
|
ee4fba |
- iconv_t const conv;
|
|
|
ee4fba |
- char *out;
|
|
|
ee4fba |
- size_t capacity;
|
|
|
ee4fba |
- public:
|
|
|
ee4fba |
- UTF8Convertor(const char *encoding);
|
|
|
ee4fba |
- const string convert(const char *data, size_t len);
|
|
|
ee4fba |
- ~UTF8Convertor();
|
|
|
ee4fba |
-};
|
|
|
ee4fba |
-UTF8Convertor::UTF8Convertor(const char *encoding) :conv(iconv_open("UTF-8", encoding)), out(0), capacity(0) {
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-UTF8Convertor::~UTF8Convertor() {
|
|
|
ee4fba |
- iconv_close(conv);
|
|
|
ee4fba |
- if (out) free(out);
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-const string
|
|
|
ee4fba |
-UTF8Convertor::convert(const char *data, size_t len) {
|
|
|
ee4fba |
- if (!len)
|
|
|
ee4fba |
- return string();
|
|
|
ee4fba |
- if ( capacity
|
|
|
ee4fba |
- (capacity>10000 && capacity>len*8) ) {
|
|
|
ee4fba |
- capacity = len*3;
|
|
|
ee4fba |
- out = (char*)realloc(out, len*3);
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- char *result = out;
|
|
|
ee4fba |
- size_t reslen = capacity;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- ICONV_CONST char *input = (char *)data;
|
|
|
ee4fba |
- iconv(conv, &input, &len, &result, &reslen);
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- return string(out,capacity-reslen);
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-void
|
|
|
ee4fba |
-ID3EndAnalyzerFactory::registerFields(FieldRegister& r) {
|
|
|
ee4fba |
- createdField = r.registerField(NIE "contentCreated");
|
|
|
ee4fba |
- subjectField = r.registerField(NIE "subject");
|
|
|
ee4fba |
- titleField = r.registerField(titlePropertyName);
|
|
|
ee4fba |
- descriptionField = r.registerField(NIE "description");
|
|
|
ee4fba |
- commentField = r.registerField(NIE "comment");
|
|
|
ee4fba |
- albumField = r.registerField(NMM_DRAFT "musicAlbum");
|
|
|
ee4fba |
- genreField = r.registerField(NMM_DRAFT "genre");
|
|
|
ee4fba |
- composerField = r.registerField(NMM_DRAFT "composer");
|
|
|
ee4fba |
- performerField = r.registerField(NMM_DRAFT "performer");
|
|
|
ee4fba |
- lyricistField = r.registerField(NMM_DRAFT "lyricist");
|
|
|
ee4fba |
- publisherField = r.registerField(NCO "publisher");
|
|
|
ee4fba |
- languageField = r.registerField(NIE "language");
|
|
|
ee4fba |
- copyrightField = r.registerField(NIE "copyright");
|
|
|
ee4fba |
- trackNumberField = r.registerField(NMM_DRAFT "trackNumber");
|
|
|
ee4fba |
- discNumberField = r.registerField(discNumberPropertyName);
|
|
|
ee4fba |
- durationField = r.registerField(NFO "duration");
|
|
|
ee4fba |
- typeField = r.typeField;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- bitrateField = r.registerField(NFO "averageBitrate");
|
|
|
ee4fba |
- samplerateField = r.registerField(NFO "sampleRate");
|
|
|
ee4fba |
- codecField = r.registerField(NFO "codec");
|
|
|
ee4fba |
- channelsField = r.registerField(NFO "channels");
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-inline
|
|
|
ee4fba |
-void
|
|
|
ee4fba |
-addStatement(AnalysisResult &indexable, string& subject, const string& predicate, const string& object) {
|
|
|
ee4fba |
- if (subject.empty())
|
|
|
ee4fba |
- subject = indexable.newAnonymousUri();
|
|
|
ee4fba |
- indexable.addTriplet(subject, predicate, object);
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-inline
|
|
|
ee4fba |
-int32_t readAsyncSize(const unsigned char* b) {
|
|
|
ee4fba |
- return (((int32_t)b[0])<<21) + (((int32_t)b[1])<<14)
|
|
|
ee4fba |
- + (((int32_t)b[2])<<7) + ((int32_t)b[3]);
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-int32_t
|
|
|
ee4fba |
-readSize(const unsigned char* b, bool async) {
|
|
|
ee4fba |
- const signed char* c = (const signed char*)b;
|
|
|
ee4fba |
- if (async) {
|
|
|
ee4fba |
- if (c[0] < 0 || c[1] < 0 || c[2] < 0 || c[3] < 0)
|
|
|
ee4fba |
- return -1;
|
|
|
ee4fba |
- return readAsyncSize(b);
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- return readBigEndianInt32(b);
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-bool
|
|
|
ee4fba |
-ID3EndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
|
|
|
ee4fba |
- const unsigned char* usbuf = (const unsigned char*)header;
|
|
|
ee4fba |
- int32_t i;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- for(i=0; (header[i] == '\0') && (i
|
|
|
ee4fba |
- return (headersize>=6+i)
|
|
|
ee4fba |
- && (
|
|
|
ee4fba |
- (strncmp("ID3", header+i, 3) == 0 // check that it's ID3
|
|
|
ee4fba |
- && usbuf[3+i] <= 4 // only handle version <= 4
|
|
|
ee4fba |
- && (usbuf[5+i]&~0x80) == 0) // we're too dumb too handle other flags
|
|
|
ee4fba |
- ||
|
|
|
ee4fba |
- ((unsigned char)header[0+i] == 0xff && ((unsigned char)header[1+i]&0xfe) == 0xfa
|
|
|
ee4fba |
- && (unsigned char)header[2+i]>>4 != 0xf // MP3 frame header is ok too
|
|
|
ee4fba |
- && (((unsigned char)header[2+i]>>2)&3) != 3)
|
|
|
ee4fba |
- );
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-static void trim(string& s,const string& drop = " ")
|
|
|
ee4fba |
-{
|
|
|
ee4fba |
- string r = s.erase(s.find_last_not_of(drop)+1);
|
|
|
ee4fba |
- r.erase(0, r.find_first_not_of(drop));
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-static bool extract_and_trim(const char* buf, int offset, int length, string& s)
|
|
|
ee4fba |
-{
|
|
|
ee4fba |
- // We're extracting here the ID3v1 tags and doing some sanity checks:
|
|
|
ee4fba |
- // 1) Strip of all leading and prefixed spaces
|
|
|
ee4fba |
- // 2) Test if string contains at least something
|
|
|
ee4fba |
- if (!buf[offset])
|
|
|
ee4fba |
- return false;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- s = string(buf + offset, strnlen(buf + offset, length));
|
|
|
ee4fba |
- trim(s);
|
|
|
ee4fba |
- // Return true if the extracted value is not empty (read: contains something)
|
|
|
ee4fba |
- return !s.empty();
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
-
|
|
|
ee4fba |
-signed char
|
|
|
ee4fba |
-ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) {
|
|
|
ee4fba |
- const int max_padding = 1000;
|
|
|
ee4fba |
- if(!in)
|
|
|
ee4fba |
- return -1;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- bool found_title = false, found_artist = false,
|
|
|
ee4fba |
- found_album = false, found_comment = false,
|
|
|
ee4fba |
- found_year = false, found_track = false,
|
|
|
ee4fba |
- found_genre = false, found_tag = false;
|
|
|
ee4fba |
- string albumUri;
|
|
|
ee4fba |
- char albumArtNum = '\0';
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- // read 10 byte header
|
|
|
ee4fba |
- const char* buf;
|
|
|
ee4fba |
- int32_t nread = in->read(buf, 10+max_padding, 10+max_padding);
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- // parse ID3v2* tag
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if (nread == 10+max_padding && strncmp("ID3", buf, 3) == 0) { // check for ID3 header
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- bool async = buf[3] >= 4;
|
|
|
ee4fba |
- bool unsync = (buf[5] & 0x80)!=0;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- // calculate size from 4 syncsafe bytes
|
|
|
ee4fba |
- int32_t size = readAsyncSize((unsigned char*)buf+6);
|
|
|
ee4fba |
- if (size < 0 || size > 5000000)
|
|
|
ee4fba |
- return -1;
|
|
|
ee4fba |
- size += 10+4+max_padding; // add the size of the ID3 header, MP3 frame header and possible padding generated by LAME(workaround)
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- // read the entire tag
|
|
|
ee4fba |
- in->reset(0);
|
|
|
ee4fba |
- nread = in->read(buf, size, size);
|
|
|
ee4fba |
- if (nread != size)
|
|
|
ee4fba |
- return -1;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- found_tag = true;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- const char* p = buf + 10;
|
|
|
ee4fba |
- buf += size-4-max_padding;
|
|
|
ee4fba |
- while (p < buf && *p) {
|
|
|
ee4fba |
- size = readSize((unsigned char*)p+4, async);
|
|
|
ee4fba |
- if (size <= 0 || size > (buf-p)-10) {
|
|
|
ee4fba |
- //cerr << "size < 0: " << size << endl;
|
|
|
ee4fba |
- break;
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- string value;
|
|
|
ee4fba |
- uint8_t enc = p[10];
|
|
|
ee4fba |
- const char *encoding = enc>4 ? encodings[0] : encodings[enc] ;
|
|
|
ee4fba |
- UTF8Convertor conv(encoding);
|
|
|
ee4fba |
- const char *decoded_value;
|
|
|
ee4fba |
- int32_t decoded_value_size;
|
|
|
ee4fba |
- string deunsyncbuf;
|
|
|
ee4fba |
- if (unsync) {
|
|
|
ee4fba |
- deunsyncbuf.reserve(size-1);
|
|
|
ee4fba |
- for(int32_t i = 0; i
|
|
|
ee4fba |
- if ( (i==0) || (p[11+i]!=0) || (p[10+i]!=0xff) )
|
|
|
ee4fba |
- deunsyncbuf.push_back(p[11+i]);
|
|
|
ee4fba |
- decoded_value = deunsyncbuf.c_str();
|
|
|
ee4fba |
- decoded_value_size = deunsyncbuf.length();
|
|
|
ee4fba |
- } else {
|
|
|
ee4fba |
- decoded_value = p+11;
|
|
|
ee4fba |
- decoded_value_size = size-1;
|
|
|
ee4fba |
- };
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if (strncmp("APIC", p, 4) == 0) {
|
|
|
ee4fba |
- size_t mimelen = strnlen(decoded_value, decoded_value_size);
|
|
|
ee4fba |
- if ((int32_t)mimelen < decoded_value_size-3) {
|
|
|
ee4fba |
- const char *desc = decoded_value+mimelen+1+1;
|
|
|
ee4fba |
-// uint8_t pictype = p[11+mimelen+1];
|
|
|
ee4fba |
- size_t desclen = strnlen(desc,decoded_value_size-mimelen-2-1);
|
|
|
ee4fba |
- const char *content = desc + desclen + 1 + (enc == 0 || enc == 3 ? 0:1) ;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if(content
|
|
|
ee4fba |
- StringInputStream picstream(content,
|
|
|
ee4fba |
- (uint32_t)(decoded_value+decoded_value_size-content), false);
|
|
|
ee4fba |
- string picname;
|
|
|
ee4fba |
- picname = (char)('0'+albumArtNum++);
|
|
|
ee4fba |
- indexable.indexChild(picname, indexable.mTime(), &picstream);
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if (desclen && indexable.child()) {
|
|
|
ee4fba |
- if (enc == 0 || enc == 3) {
|
|
|
ee4fba |
- indexable.child()->addValue(factory->descriptionField, string(desc, desclen) );
|
|
|
ee4fba |
- } else {
|
|
|
ee4fba |
- indexable.child()->addValue(factory->descriptionField, conv.convert(desc, desclen) );
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- indexable.finishIndexChild();
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if (enc == 0 || enc == 3) {
|
|
|
ee4fba |
- value = string(decoded_value, strnlen(decoded_value, decoded_value_size));
|
|
|
ee4fba |
- } else {
|
|
|
ee4fba |
- value = conv.convert(decoded_value, decoded_value_size); // FIXME: add similar workaround
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if (!value.empty()) {
|
|
|
ee4fba |
- if (strncmp("TIT1", p, 4) == 0) {
|
|
|
ee4fba |
- indexable.addValue(factory->subjectField, value);
|
|
|
ee4fba |
- } else if (strncmp("TIT2", p, 4) == 0) {
|
|
|
ee4fba |
- indexable.addValue(factory->titleField, value);
|
|
|
ee4fba |
- found_title = true;
|
|
|
ee4fba |
- } else if (strncmp("TIT3", p, 4) == 0) {
|
|
|
ee4fba |
- indexable.addValue(factory->descriptionField, value);
|
|
|
ee4fba |
- } else if (strncmp("TLAN", p, 4) == 0) {
|
|
|
ee4fba |
- indexable.addValue(factory->languageField, value);
|
|
|
ee4fba |
- } else if (strncmp("TCOP", p, 4) == 0) {
|
|
|
ee4fba |
- indexable.addValue(factory->copyrightField, value);
|
|
|
ee4fba |
- } else if ((strncmp("TDRL", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TDAT", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TYER", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TDRC", p, 4) == 0)) {
|
|
|
ee4fba |
- indexable.addValue(factory->createdField, value);
|
|
|
ee4fba |
- found_year = true;
|
|
|
ee4fba |
- } else if ((strncmp("TPE1", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TPE2", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TPE3", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TPE4", p, 4) == 0)) {
|
|
|
ee4fba |
- string performerUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- indexable.addValue(factory->performerField, performerUri);
|
|
|
ee4fba |
- indexable.addTriplet(performerUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
- indexable.addTriplet(performerUri, fullnamePropertyName, value);
|
|
|
ee4fba |
- found_artist = true;
|
|
|
ee4fba |
- } else if ((strncmp("TPUB", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TENC", p, 4) == 0)) {
|
|
|
ee4fba |
- string publisherUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- indexable.addValue(factory->publisherField, publisherUri);
|
|
|
ee4fba |
- indexable.addTriplet(publisherUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
- indexable.addTriplet(publisherUri, fullnamePropertyName, value);
|
|
|
ee4fba |
- } else if ((strncmp("TALB", p, 4) == 0) ||
|
|
|
ee4fba |
- (strncmp("TOAL", p, 4) == 0)) {
|
|
|
ee4fba |
- addStatement(indexable, albumUri, titlePropertyName, value);
|
|
|
ee4fba |
- found_album = true;
|
|
|
ee4fba |
- } else if (strncmp("TCON", p, 4) == 0) {
|
|
|
ee4fba |
- // The Genre is stored as (number)
|
|
|
ee4fba |
- if( value[0] == '(' && value[value.length()-1] == ')' ) {
|
|
|
ee4fba |
- //vHanda: Maybe one should check if all the characters in between are digits
|
|
|
ee4fba |
- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
|
|
|
ee4fba |
- indexable.addValue(factory->genreField, genres[ genreIndex ]);
|
|
|
ee4fba |
- found_genre = true;
|
|
|
ee4fba |
- } else {
|
|
|
ee4fba |
- // We must not forget that genre could be a string.
|
|
|
ee4fba |
- if (!value.empty()) {
|
|
|
ee4fba |
- indexable.addValue(factory->genreField, value);
|
|
|
ee4fba |
- found_genre = true;
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- } else if (strncmp("TLEN", p, 4) == 0) {
|
|
|
ee4fba |
- indexable.addValue(factory->durationField, value);
|
|
|
ee4fba |
- } else if (strncmp("TEXT", p, 4) == 0) {
|
|
|
ee4fba |
- string lyricistUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- indexable.addValue(factory->lyricistField, lyricistUri);
|
|
|
ee4fba |
- indexable.addTriplet(lyricistUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
- indexable.addTriplet(lyricistUri, fullnamePropertyName, value);
|
|
|
ee4fba |
- } else if (strncmp("TCOM", p, 4) == 0) {
|
|
|
ee4fba |
- string composerUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- indexable.addValue(factory->composerField, composerUri);
|
|
|
ee4fba |
- indexable.addTriplet(composerUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
- indexable.addTriplet(composerUri, fullnamePropertyName, value);
|
|
|
ee4fba |
- } else if (strncmp("TRCK", p, 4) == 0) {
|
|
|
ee4fba |
- istringstream ins(value);
|
|
|
ee4fba |
- int tnum;
|
|
|
ee4fba |
- ins >> tnum;
|
|
|
ee4fba |
- if (!ins.fail()) {
|
|
|
ee4fba |
- indexable.addValue(factory->trackNumberField, tnum);
|
|
|
ee4fba |
- found_track = true;
|
|
|
ee4fba |
- ins.ignore(10,'/');
|
|
|
ee4fba |
- int tcount;
|
|
|
ee4fba |
- ins >> tcount;
|
|
|
ee4fba |
- if (!ins.fail()) {
|
|
|
ee4fba |
- ostringstream outs;
|
|
|
ee4fba |
- outs << tcount;
|
|
|
ee4fba |
- addStatement(indexable, albumUri, albumTrackCountPropertyName, outs.str());
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- } else if (strncmp("TPOS", p, 4) == 0) {
|
|
|
ee4fba |
- istringstream ins(value);
|
|
|
ee4fba |
- int dnum;
|
|
|
ee4fba |
- ins >> dnum;
|
|
|
ee4fba |
- if (!ins.fail()) {
|
|
|
ee4fba |
- indexable.addValue(factory->discNumberField, dnum);
|
|
|
ee4fba |
- ins.ignore(10,'/');
|
|
|
ee4fba |
- int dcount;
|
|
|
ee4fba |
- ins >> dcount;
|
|
|
ee4fba |
- if (!ins.fail()) {
|
|
|
ee4fba |
- ostringstream outs;
|
|
|
ee4fba |
- outs << dcount;
|
|
|
ee4fba |
- addStatement(indexable, albumUri, discCountPropertyName, outs.str());
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- p += size + 10;
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- // parse MP3 frame header
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- int bitrateindex, samplerateindex;
|
|
|
ee4fba |
- int i;
|
|
|
ee4fba |
- for(i=0; (buf[i]=='\0') && (i
|
|
|
ee4fba |
- if (((unsigned char)buf[0+i] == 0xff) && (((unsigned char)buf[1+i]&0xfe) == 0xfa)
|
|
|
ee4fba |
- && ((bitrateindex = ((unsigned char)buf[2+i]>>4)) != 0xf)
|
|
|
ee4fba |
- && ((samplerateindex = (((unsigned char)buf[2+i]>>2)&3)) != 3 )) { // is this MP3?
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- indexable.addValue(factory->typeField, audioClassName);
|
|
|
ee4fba |
- // FIXME: no support for VBR :(
|
|
|
ee4fba |
- // ideas: compare bitrate from the frame with stream size/duration from ID3 tags
|
|
|
ee4fba |
- // check several consecutive frames to see if bitrate is different
|
|
|
ee4fba |
- // in neither case you can be sure to properly detected VBR :(
|
|
|
ee4fba |
- indexable.addValue(factory->bitrateField, bitrate[bitrateindex]);
|
|
|
ee4fba |
- indexable.addValue(factory->samplerateField, samplerate[samplerateindex]);
|
|
|
ee4fba |
- indexable.addValue(factory->codecField, "MP3");
|
|
|
ee4fba |
- indexable.addValue(factory->channelsField, ((buf[3+i]>>6) == 3 ? 1:2 ) );
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- // Parse ID3v1 tag
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- int64_t insize;
|
|
|
ee4fba |
- if ( (insize = in->size()) > (128+nread)) {
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- // read the tag and check signature
|
|
|
ee4fba |
- int64_t nskip = insize-128-nread;
|
|
|
ee4fba |
- if (nskip == in->skip(nskip))
|
|
|
ee4fba |
- if (in->read(buf, 128, 128)==128)
|
|
|
ee4fba |
- if (!strncmp("TAG", buf, 3)) {
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- found_tag = true;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- std::string s;
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if (!found_title && extract_and_trim(buf, 3, 30, s)) {
|
|
|
ee4fba |
- indexable.addValue(factory->titleField, s);
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- if (!found_artist && extract_and_trim(buf, 33, 30, s)) {
|
|
|
ee4fba |
- const string performerUri = indexable.newAnonymousUri();
|
|
|
ee4fba |
- indexable.addValue(factory->performerField, performerUri);
|
|
|
ee4fba |
- indexable.addTriplet(performerUri, typePropertyName, contactClassName);
|
|
|
ee4fba |
- indexable.addTriplet(performerUri, fullnamePropertyName, s);
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- if (!found_album && extract_and_trim(buf, 63, 30, s))
|
|
|
ee4fba |
- addStatement(indexable, albumUri, titlePropertyName, s);
|
|
|
ee4fba |
- if (!found_year && extract_and_trim(buf, 93, 4, s))
|
|
|
ee4fba |
- indexable.addValue(factory->createdField, s);
|
|
|
ee4fba |
- if (!found_comment && extract_and_trim(buf, 97, 30, s)) {
|
|
|
ee4fba |
- indexable.addValue(factory->commentField, s);
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- if (!found_track && !buf[125] && buf[126]) {
|
|
|
ee4fba |
- indexable.addValue(factory->trackNumberField, (int)(buf[126]));
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- if (!found_genre && (unsigned char)(buf[127]) < 148)
|
|
|
ee4fba |
- indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]);
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if(!albumUri.empty()) {
|
|
|
ee4fba |
- indexable.addValue(factory->albumField, albumUri);
|
|
|
ee4fba |
- indexable.addTriplet(albumUri, typePropertyName, albumClassName);
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- if (found_tag)
|
|
|
ee4fba |
- indexable.addValue(factory->typeField, musicClassName);
|
|
|
ee4fba |
-
|
|
|
ee4fba |
- return 0;
|
|
|
ee4fba |
-}
|
|
|
ee4fba |
diff --git a/lib/endanalyzers/id3endanalyzer.cpp b/lib/endanalyzers/id3endanalyzer.cpp
|
|
|
ee4fba |
index d8487b5..677ece0 100644
|
|
|
ee4fba |
--- a/lib/endanalyzers/id3endanalyzer.cpp
|
|
|
ee4fba |
+++ b/lib/endanalyzers/id3endanalyzer.cpp
|
|
|
ee4fba |
@@ -512,13 +512,19 @@ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
|
|
|
ee4fba |
addStatement(indexable, albumUri, titlePropertyName, value);
|
|
|
ee4fba |
found_album = true;
|
|
|
ee4fba |
} else if (strncmp("TCON", p, 4) == 0) {
|
|
|
ee4fba |
- // The Genre is stored as (number)
|
|
|
ee4fba |
- if( value[0] == '(' && value[value.length()-1] == ')' ) {
|
|
|
ee4fba |
- //vHanda: Maybe one should check if all the characters in between are digits
|
|
|
ee4fba |
- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
|
|
|
ee4fba |
- indexable.addValue(factory->genreField, genres[ genreIndex ]);
|
|
|
ee4fba |
- found_genre = true;
|
|
|
ee4fba |
- }
|
|
|
ee4fba |
+ // The Genre is stored as (number)
|
|
|
ee4fba |
+ if( value[0] == '(' && value[value.length()-1] == ')' ) {
|
|
|
ee4fba |
+ //vHanda: Maybe one should check if all the characters in between are digits
|
|
|
ee4fba |
+ int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
|
|
|
ee4fba |
+ indexable.addValue(factory->genreField, genres[ genreIndex ]);
|
|
|
ee4fba |
+ found_genre = true;
|
|
|
ee4fba |
+ } else {
|
|
|
ee4fba |
+ // We must not forget that genre could be a string.
|
|
|
ee4fba |
+ if (!value.empty()) {
|
|
|
ee4fba |
+ indexable.addValue(factory->genreField, value);
|
|
|
ee4fba |
+ found_genre = true;
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
+ }
|
|
|
ee4fba |
} else if (strncmp("TLEN", p, 4) == 0) {
|
|
|
ee4fba |
indexable.addValue(factory->durationField, value);
|
|
|
ee4fba |
} else if (strncmp("TEXT", p, 4) == 0) {
|
|
|
ee4fba |
--
|
|
|
ee4fba |
1.7.10.4
|
|
|
ee4fba |
|