/* * Copyright (C) 2009 Arnoldo Jose Muller Molina * This program is free software: you can * redistribute it and/or modify it under the terms of the GNU General Public * License as published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. This program is distributed * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. You should have received * a copy of the GNU General Public License along with this program. If not, see * . */ #ifndef FTED_HPP #define FTED_HPP #include #include #include #include #include #include "SliceLexer.hpp" #include "SliceParser.hpp" #include "SliceAST.hpp" ANTLR_USING_NAMESPACE(antlr) using namespace std; class FTed{ public: vector data; int size; // size of the DB int getSize(){ return size; } /** * Loads the trees of the given file into memory. * */ void loadDB(string url){ ifstream myfile (url.c_str(), ios::in); string line; int lines = 0; if (myfile.is_open()) { while (! myfile.eof() ) { getline (myfile,line); lines++; } myfile.close(); cout << "Loading : " << lines << " trees: " << endl; } else { cout << "Unable to open file for newline count"; } size = lines; // now we can load the trees. //data = new RefSliceAST[lines + 1]; data = vector(lines + 1); ifstream myfile2 (url.c_str(), ifstream::in); lines = 1; if (myfile2.is_open()) { while (! myfile2.eof() && lines <= size) { getline (myfile2,line); if(! (line == "\n") && !line.empty()){ data[lines] = createSliceAST(line); lines++; } if((lines % 10000) == 0){ cout << "Loading : " << lines << endl; } } cout << "Loaded DB!" << endl; myfile2.close(); } else { cout << "Unable to open file file for loading trees"; } } void setQuery(string x){ setQuery(createSliceAST(x)); } /** * Sets the query */ void setQuery(RefSliceAST query){ if(data[0] != NULL){ //data.erase(data.begin()); } data[0] = query; } /* * Clean all data */ void clean(){ cout << "will delete data..." << endl; data.erase(data.begin(), data.end()); cout << "deleted data" << endl; } /** * Distance based on the object ids. */ int ted(int a, int b){ return tedAux(data[a], data[b]); } int ted(string a, string b){ return tedAux(createSliceAST(a), createSliceAST(b)); } int tedAux(RefSliceAST a, RefSliceAST b){ // a->updateDecendantInformation(); // b->updateDecendantInformation(); list aExpanded = a->depthFirst(); list bExpanded = b->depthFirst(); list bExpanded2 = b->depthFirst(); int Na = aExpanded.size() * 2; int Nb = bExpanded.size() * 2; list::iterator ait = aExpanded.begin(); int res = 0; while(ait !=aExpanded.end() ){ RefSliceAST aTree = *ait++; list::iterator bit = bExpanded.begin(); while(bit != bExpanded.end()){ RefSliceAST bTree = *bit; if(aTree->fEq(bTree)){ res++; bExpanded.erase(bit); break; } bit++; } bit = bExpanded2.begin(); while (bit != bExpanded2.end()) { RefSliceAST bTree = *bit; if (aTree->getText() == bTree->getText()) { res++; bExpanded2.erase(bit); break; } bit++; } } //tedsAmount++; //return ceil((float)( (Na + Nb) - ( 2 * res) ) / 2); //return max(Na, Nb) - res; int r = (int)(((Na + Nb) - (2 * res)) / 2); assert(r >= 0); return r; } /** * Create a Tree from a string. */ RefSliceAST createSliceAST(string x){ istringstream in; in.str(x); SliceLexer lexer(in); SliceParser parser(lexer); // set up the ast factory to use a custom AST type per default // note that here the Ref prefix for the reference counter is // strippped off. ASTFactory ast_factory("SliceAST", SliceAST::factory); // let the parser add it's stuff to the factory... parser.initializeASTFactory(ast_factory); parser.setASTFactory(&ast_factory); parser.slice(); RefSliceAST res(parser.getAST()); res->updateDecendantInformation(); return res; } }; #endif