New upstream version 1.4.7.

parent 6e6a6bae
2011-12-20 RELEASE OF VERSION 1.4.7
2011-12-17 Added keyword excerpt view
2011-12-13 Optimized keyword extraction citer cleanup
2011-12-05 Propagated --conf flags to child processes
2011-11-18 Improved citer navigation
2011-09-18 Added document similarity view
2011-09-18 Added POS tagger and keyword patterns
2011-08-14 Added automatic keyword extraction from documents
2011-08-03 RELEASE OF VERSION 1.4.6
2011-08-02 Set minor improvements for volume, number, page recognition
2011-08-01 Made shortcuts customizable
......
......@@ -281,7 +281,7 @@ CI|Ceram. Int.|Ceramics International
CI|Chem. Ind. (London)|Chemistry and Industry
CI|Compos. Interfaces|Composite Interfaces
CI|Concr. Int.|Concrete International
CILS|Chemom. Intell. Lab. Syst.|Chemometrics and Intelligent Laborary Systems
CILS|Chemom. Intell. Lab. Syst.|Chemometrics and Intelligent Laboratory Systems
CIMB|CIM Bull.|CIM Bulletin
CIT|Chem. Ing. Tech.|Chemie Ingenieur Technik
CITRM|Cah. Inf. Tech./Rev Metall|Cahiers d'Informations Techniques / Revue de Metallurgie
......
This diff is collapsed.
......@@ -13,7 +13,7 @@
# - The non BibTeX tag <<selection>> is the placeholder for the current text selection.
# - Some special characters, such as |, might need percent encoding to properly work.
# Bookmark Examples:
bookmark=PubMed Citation Finder|http://www.ncbi.nlm.nih.gov/entrez/query/static/citmatch.html
bookmark=PubMed Citation Finder|http://www.ncbi.nlm.nih.gov/pubmed/citmatch
bookmark=HighWire Press -- Search|http://highwire.stanford.edu/cgi/search/
bookmark=DBLP Bibliography|http://www.informatik.uni-trier.de/~ley/db/indices/query.html
bookmark=arXiv.org e-Print archive|http://arxiv.org/
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "bookmarkPlugin.h"
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef BOOKMARKPLUGIN_H
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "c2b.h"
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef C2B_H
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
*
* Class implementation of the approximate search algorithm
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
*
* Class implementation of the approximate search algorithm
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "authorString.h"
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef AUTHORSTRING_H
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "bibExtractor.h"
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef BIBEXTRACTOR_H
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "bibParser.h"
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef BIBPARSER_H
......@@ -51,6 +51,10 @@ public:
{
return _journal_dbP->retrieve(name);
}
inline const QStringList& abbreviatedSimplifiedJournalList() const
{
return _journal_dbP->abbreviatedSimplifiedList();
}
inline QString fullJournal(const QString& name) const
{
return _journal_dbP->retrieveFull(name);
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "bibPreparser.h"
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef BIBPREPARSER_H
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef BIBREFERENCE_H
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "bibSearcher.h"
#include "bibParser.h"
#include "cb2bib_utilities.h"
#include "settings.h"
#include <QCoreApplication>
......@@ -117,6 +116,11 @@ bibSearcher::bibSearcher(bibParser* bp, const QString& bib_dir, QObject* parento
_do_search_similar_citeid = '@';
}
bibSearcher::bibSearcher() : _bpP(0), _do_search_similar(false)
{
clear();
}
void bibSearcher::addPattern(bool Not, bool caseSensitive, const QString& patternType, const QString& scope,
const QChar& yearScope, const QString& pattern)
......@@ -150,18 +154,12 @@ void bibSearcher::exec()
}
// Search In Files
QFileInfoList flist;
if (_all_bibtex_files)
{
QDir bibdir(_bibtex_dir);
flist = bibdir.entryInfoList(QStringList() << "*.bib");
}
else
flist.append(QFileInfo(_bibtex_file));
const QStringList flist(_all_bibtex_files ?
c2bUtils::filesInDir(_bibtex_dir, QStringList() << "*.bib") :
c2bUtils::filesInDir(_bibtex_file, QStringList() << "*.bib"));
for (int i = 0; i < flist.count(); ++i)
{
search(flist.at(i).absoluteFilePath());
search(flist.at(i));
if (_aborted)
{
clear();
......@@ -181,6 +179,31 @@ void bibSearcher::exec()
_log_string += tr("% Total Unique Hits: %1\n").arg(_hits_map.count());
}
QString bibSearcher::searchDocumentKeyword(const QString& bibtexfn, const QString& documentfn, const QString& keyword)
{
bibSearcher bs;
QString exc;
bs._documents.load(bibtexfn, documentContents::Raw);
if (bs._documents.setCurrent(documentfn))
{
QString p(keyword);
p.replace(QRegExp("\\W"), ".{0,5}");
p.replace("s", ".?");
p = "\\b" + p + "\\w*\\b";
bs.addPattern(false, false, searchPattern::type(searchPattern::RegularExpression), "all", QChar(), p);
QString document(bs._documents.current().text());
c2bUtils::stripDiacritics(document);
if (bs._patterns.at(0).matches(document))
{
exc = c2bUtils::fileToString(":/htm/htm/excerpts.html");
exc.replace("GET_EXCERPTS_TITLE", keyword);
exc.replace("GET_EXCERPTS", bs.excerpts(document).mid(20));
}
}
bs._documents.unload();
return exc;
}
void bibSearcher::abort()
{
_aborted = true;
......@@ -356,8 +379,8 @@ const QString bibSearcher::excerpts(const QString& contents) const
for (int i = 0; i < _patterns.count(); ++i)
{
const searchPattern& pattern = _patterns.at(i);
const searchPattern::modifiers& modifier = pattern.modifier();
const searchPattern& pattern(_patterns.at(i));
const searchPattern::modifiers& modifier(pattern.modifier());
if (modifier.NOT)
continue;
if (modifier.scope != "all" && modifier.scope != "file")
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef BIBSEARCHER_H
......@@ -29,6 +29,8 @@ public:
bibSearcher(bibParser* bp, const QString& bib_dir, QObject* parento = 0);
inline ~bibSearcher() {}
static QString searchDocumentKeyword(const QString& bibtexfn, const QString& documentfn, const QString& keyword);
const QString highlight(const QString& abstract) const;
void addPattern(bool Not, bool caseSensitive, const QString& patternType, const QString& scope,
const QChar& yearScope, const QString& pattern);
......@@ -94,6 +96,8 @@ public slots:
private:
bibSearcher();
QList<searchPattern> _patterns;
QMap<QString, QString> _hits_map;
QString _bibtex_dir;
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef CB2BIB_CONF_PARAMETERS_H
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef CB2BIB_PARAMETERS_H
......@@ -33,7 +33,7 @@ const QString C2B_ICON_DISCONNECT_B("connect_no.png");
const QString C2B_ICON_VIEWBIB_B("viewbib.png");
const QString C2B_ICON_VIEWC2B_B("viewcb.png");
const QString C2B_ORGANIZATION("MOLspaces");
const QString C2B_VERSION("1.4.6");
const QString C2B_VERSION("1.4.7");
// File Manager Client
#ifdef Q_WS_X11
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "cb2bib_utilities.h"
......@@ -56,6 +56,57 @@ QString& simplifyString(QString& str)
return str;
}
QString& fillString(QString& str, const QStringMatcher& pattern, const QChar& ch)
{
if (str.length() == 0)
return str;
const int pl(pattern.pattern().length());
const ushort uch(ch.unicode());
ushort* const c0((ushort*)str.data());
int p(0);
while (p >= 0)
{
p = pattern.indexIn(str, p);
if (p > -1)
{
ushort* c(c0 + p);
const ushort* const cpl(c + pl);
while (c < cpl)
*c++ = uch;
p += pl;
}
}
return str;
}
QString& fillString(QString& str, const QString& pattern, const QChar& ch)
{
return fillString(str, QStringMatcher(pattern, Qt::CaseSensitive), ch);
}
QString& fillString(QString& str, const QRegExp& pattern, const QChar& ch)
{
if (str.length() == 0)
return str;
const ushort uch(ch.unicode());
ushort* const c0((ushort*)str.data());
int p(0);
while (p >= 0)
{
p = pattern.indexIn(str, p);
if (p > -1)
{
const int pl(pattern.matchedLength());
ushort* c(c0 + p);
const ushort* const cpl(c + pl);
while (c < cpl)
*c++ = uch;
p += pl;
}
}
return str;
}
static const unsigned short _cyrillic_to_ascii[] =
{
// Code points 1024 to 1309
......@@ -231,6 +282,12 @@ QString toAscii(const QString& str, const AsciiConversion type)
return ascii;
}
QString& stripDiacritics(QString& str)
{
_to_ascii_transliterate(str);
return str;
}
QString& c2bToBib(QString& str)
{
// Escape common Extended Latin Characters
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.6. Licensed under the GNU GPL version 3.
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#ifndef CB2BIB_UTILITIES_H
......@@ -28,7 +28,11 @@ extern QString setCapitalization(const QString& str);
extern QString toAscii(const QString& str, const AsciiConversion type);
extern QString& bibToC2b(QString& str);
extern QString& c2bToBib(QString& str);
extern QString& fillString(QString& str, const QRegExp& pattern, const QChar& ch);
extern QString& fillString(QString& str, const QString& pattern, const QChar& ch);
extern QString& fillString(QString& str, const QStringMatcher& pattern, const QChar& ch);
extern QString& simplifyString(QString& str);
extern QString& stripDiacritics(QString& str);
static const QRegExp pnasGreekLetters("[\\{\\[\\(](alpha|beta|gamma|delta|varepsilon|"
"zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|"
......@@ -270,6 +274,24 @@ inline bool stringToFile(const QString& str, const QString& fn)
return false;
}
inline QStringList filesInDir(const QString& dir, const QStringList& filters)
{
QStringList files;
QFileInfo fi(dir);
if (!fi.exists())
return files;
if (fi.isDir())
{
QDir d(dir);
const QFileInfoList dil(d.entryInfoList(filters));
for (int i = 0; i < dil.count(); ++i)
files.append(QDir::toNativeSeparators(QDir::cleanPath(dil.at(i).absoluteFilePath())));
}
else if (fi.isFile())
files.append(QDir::toNativeSeparators(QDir::cleanPath(fi.absoluteFilePath())));
return files;
}
inline int nearInteger(double a)
{
int ia = int(a);
......
/***************************************************************************
* Copyright (C) 2004-2011 by Pere Constans
* constans@molspaces.com
* cb2Bib version 1.4.7. Licensed under the GNU GPL version 3.
* See the LICENSE file that comes with this distribution.
***************************************************************************/
#include "collectionAnalyzer.h"
#include "settings.h"
#include <QVector>
#include <cmath>
template <typename T> class lengthsorting
{
public:
lengthsorting(const T& data) : _data(data) {}
inline bool operator()(const int i, const int j)
{
const int ni(_data[i].length());
const int nj(_data[j].length());
if (ni == nj)
return _data[i] < _data[j];
else
return ni < nj;
}
private:
const T& _data;
};
// Template double_sort was adapted from qSortHelper by Pere Constans.
// qSortHelper is copyrighted by:
// Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies),
// and released under GLP/LGPL license.
// http://www.qt.gitorious.org/qt/qt/blobs/4.7/src/corelib/tools/qalgorithms.h
template <typename T1, typename T2>
void double_sort(int start, int end, T1* v1, T2* v2)
{
top:
int span = end - start;
if (span < 2) return;
--end;
int low = start;
int high = end - 1;
int pivot = start + span / 2;
if ((*v1)[end] > (*v1)[start])
{
qSwap((*v1)[end], (*v1)[start]);
qSwap((*v2)[end], (*v2)[start]);
}
if (span == 2) return;
if ((*v1)[pivot] > (*v1)[start])
{
qSwap((*v1)[pivot], (*v1)[start]);
qSwap((*v2)[pivot], (*v2)[start]);
}
if ((*v1)[end] > (*v1)[pivot])
{
qSwap((*v1)[end], (*v1)[pivot]);
qSwap((*v2)[end], (*v2)[pivot]);
}
if (span == 3) return;
qSwap((*v1)[pivot], (*v1)[end]);
qSwap((*v2)[pivot], (*v2)[end]);
while (low < high)
{
while (low < high && (*v1)[low] > (*v1)[end]) ++low;
while (high > low && (*v1)[end] > (*v1)[high]) --high;
if (low < high)
{
qSwap((*v1)[low], (*v1)[high]);
qSwap((*v2)[low], (*v2)[high]);
++low;
--high;
}
else break;
}
if ((*v1)[low] > (*v1)[end]) ++low;
qSwap((*v1)[end], (*v1)[low]);
qSwap((*v2)[end], (*v2)[low]);
double_sort(start, low, v1, v2);
start = low + 1;
++end;
goto top;
}
collectionAnalyzer::collectionAnalyzer() :
_ndocuments(0),
_nsentences(0),
_nsimilar(0),
_settingsP(settings::instance())
{}
void collectionAnalyzer::reload()
{
_ndocuments = 0;
_nsentences = 0;
_nsimilar = 0;
_document_keyword.clear();
_keyword_document_fn.clear();
_similar_document_fn.clear();
const QString cache_dir(_settingsP->fileName("cb2Bib/CacheDirectory"));
_documentslf_fn = QDir::cleanPath(cache_dir + "/documents.lc2b");
QFile documentslf(_documentslf_fn);
if (documentslf.open(QIODevice::ReadOnly))
{
QDataStream stream(&documentslf);
stream >> _ndocuments;
_documents.resize(_ndocuments);
for (int i = 0; i < _ndocuments; ++i)
stream >> _documents[i];
documentslf.close();
}
_similar.resize(_ndocuments);
_similarity.resize(_ndocuments);
_sentenceslf_fn = QDir::cleanPath(cache_dir + "/sentences.lc2b");
QFile sentenceslf(_sentenceslf_fn);
if (sentenceslf.open(QIODevice::ReadOnly))
{
QDataStream stream(&sentenceslf);
stream >> _nsentences;
_sentences.resize(_nsentences);
for (int i = 0; i < _nsentences; ++i)
stream >> _sentences[i];
sentenceslf.close();
}
_documentsif_fn = QDir::cleanPath(cache_dir + "/documents.ic2b");
QFile documentsif(_documentsif_fn);
if (documentsif.open(QIODevice::ReadOnly))
{
QDataStream stream(&documentsif);
_document_norms.resize(_ndocuments);
int ns;
int ss;
for (int d = 0; d < _ndocuments; ++d)
{
stream >> ns;
_document_norms[d] = ns;
for (int s = 0; s < ns; ++s)
stream >> ss;
}
documentsif.close();
}
_sentencesif_fn = QDir::cleanPath(cache_dir + "/sentences.ic2b");
}
void collectionAnalyzer::setDocumentKeywords(const QString& documentfn)
{
if (documentfn == _keyword_document_fn)
return;
_keyword_document_fn = documentfn;
_document_keyword_ids.clear();
QVector<int> dsentences;
grepDocuments(documentfn, &dsentences);
const int ndsentences(dsentences.size());
if (ndsentences == 0)
return;
lengthsorting< QVarLengthArray<QString> > ls(_sentences);
qSort(dsentences.begin(), dsentences.end(), ls);
for (int i = 0; i < ndsentences; ++i)
{
const QString& si(_sentences[dsentences.at(i)]);
bool redundant(false);
for (int j = i + 1; j < ndsentences; ++j)
if (_sentences[dsentences.at(j)].contains(si))
{
redundant = true;
break;
}
if (!redundant)
_document_keyword_ids.append(dsentences.at(i));
}
qSort(_document_keyword_ids);
}
void collectionAnalyzer::setKeywordDocuments(const QString& keyword)
{
if (keyword == _document_keyword)
return;
_document_keyword = keyword;
grepSentences(keyword, &_sdocuments);
}
void collectionAnalyzer::setSimilarDocuments(const QString& documentfn)
{
if (documentfn == _similar_document_fn)
return;
_similar_document_fn = documentfn;
_nsimilar = 0;