// // A simple bibtex duplicates checking tool - just for @articles // Input has to be pre-parsed by the bibparse program from the BibTooL package http://cgi.di.uoa.gr/~charnik/oss/bibtool/#downloads // // Usage: bibparse file1.bib [file2.bib ...] |tr '{}' ' '| bdc // //TODO: would be good to treat arXiv entries - but different people define them differently /* Version 0.3 Copyright (C) 2015 Jiri Pittner * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #include #include #include #include #include #include #include #include #include #include #include #include #include //#include //#include #include using namespace std; class ITEM { public: bool deleted; string label; string author; string title; }; class ARTICLE : public ITEM { public: string journal; int volume; int page; int year; void clear(void) {deleted=0; volume=0; page=0; year=0; author=""; journal="";} }; istream& operator>>(istream &s, ARTICLE &a) { string line; a.clear(); while(getline(s,line)) { if(s.eof()) exit(1); vector fields; boost::split(fields, line, boost::is_any_of("="), boost::token_compress_off); if(fields.size() != 2) break; if(fields[0] == "volume") a.volume=atoi(fields[1].c_str()); if(fields[0] == "pages") a.page=atoi(fields[1].c_str()); if(fields[0] == "year") a.year=atoi(fields[1].c_str()); if(fields[0] == "journal") a.journal=fields[1]; if(fields[0] == "author") a.author=fields[1]; if(fields[0] == "title") a.title=fields[1]; } return s; } ostream& operator<<(std::ostream &s, const ARTICLE &a) { s < articles; string line; while(getline(cin,line)) { if(cin.eof()) exit(0); vector fields; boost::split(fields, line, boost::is_any_of(" "), boost::token_compress_off); //cout << fields.size()<<" "<> tmp; //cout <:: iterator i,j; for(i=articles.begin(); i != articles.end(); ++i) { for(j=articles.begin(); j != i; ++j) if(!j->deleted) { if(i->label == j->label ) cout << "DUPLICATE LABEL::"<label<year == j->year && i->page == j->page && i->volume == j->volume && i->year!=0 && i->page!=0 && i->volume!=0 ) { string sure= j->journal == i->journal ? "" : "SUSPECT"; cerr<< sure<<"DUPLICATE @ARTICLE:: " << *i << " & "<<*j<:: iterator ii,jj; ii=i; jj=j; //normally keep the first one and delete later ones if(j->title!="" && i->title=="") {ii=j; jj=i;} //but delete preferably the entry with less information (missing title) cout << "\\bibalias{"<label<<"}{"<label<<"} %remove " <label<deleted=1; //indicate deletion to reasonably handle triplicates and higher } } } }