#include "parser.h"
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <algorithm>
#include <iterator>
#include <math.h>

#include <sys/stat.h>
#include <stdlib.h>
#include <fstream>
#include <map>
#include <algorithm>

using namespace std;

//============================================================================
// Name        : datastruct.cpp
// Author      : btsui
// Version     :
// Copyright   : (c) 2010 The Trustees of Princeton University
// Description : IGET data structures
//============================================================================


bool file_exists(string filename);
void verbose_output(string output); // unused
void error_message(string error); // unused
ipage_general parse_ipage(string filename);
vector<string> split_whitespace (string line);
vector<string> split_slash (string line);
vector<string> split_tab (string line);
vector<string> split_semicolon (string line);
vector<string> split_comma (string line);
void dump_ipage(ipage_general results);
void dump_fire(fire_results results);
void dump_firepro(firepro_results results);
void dump_prmg(prmg_general results);
void dump_prmg_protein(prmg_general_protein results);
void dump_all(string filename);
fire_results parse_fire(string filename, string type);
firepro_results parse_firepro(string filename);
prmg_general parse_prmg(fire_results fire, ipage_general ipage, string filename);
prmg_general_protein parse_prmg_protein(firepro_results fire, ipage_general ipage, string filename);
string determine_results (string filename);

string determine_results (string filename) {
    if (file_exists(filename)) {
            string presence = "";

            fire_results results1a = parse_fire(filename, "DNA");
            if (results1a.general.status == 1 && results1a.general.motifs.size() > 0) {
                presence += "1";
            } else {
                presence += "0";
            }

            fire_results results1b = parse_fire(filename, "RNA");
            if (results1b.general.status == 1 && results1b.general.motifs.size() > 0) {
                presence += "1";
            } else {
                presence += "0";
            }

            fire_results results1 = parse_fire(filename, "DNA_RNA");
            if (results1.general.status == 1 && results1.general.motifs.size() > 0) {
                presence += "1";
            } else {
                presence += "0";
            }

            firepro_results results2 = parse_firepro(filename);
            if (results2.general.status == 1 && results2.general.motifs.size() > 0) {
                presence += "1";
            } else {
                presence += "0";
            }

            ipage_general results3 = parse_ipage(filename);
            if (results3.status == 1 && results3.pathways.size() > 0) {
                presence += "1";
            } else {
                presence += "0";
            }

            prmg_general results4 = parse_prmg(results1, results3, filename);
            if (results4.status == 1 && results4.interaction_matrix.size() > 0) {
                presence += "1";
            } else {
                presence += "0";
            }

            prmg_general_protein results5 = parse_prmg_protein(results2, results3, filename);
            if (results5.status == 1 && results5.interaction_matrix.size() > 0) {
                presence += "1";
            } else {
                presence += "0";
            }
            return presence;
    } else {
            return "0000000";
    }
}

void dump_all(string filename) {
        if (file_exists(filename)) {
                fire_results results1 = parse_fire(filename, "DNA_RNA");
                dump_fire(results1);
                firepro_results results2 = parse_firepro(filename);
                dump_firepro(results2);
                ipage_general results3 = parse_ipage(filename);
                dump_ipage(results3);
                prmg_general results4 = parse_prmg(results1, results3, filename);
                dump_prmg(results4);
                prmg_general_protein results5 = parse_prmg_protein(results2, results3, filename);
                dump_prmg_protein(results5);
        } else {
                error_message("Starting expfile path does not exist or is inaccessible.");
        }
}

void dump_prmg(prmg_general results) {
        if(results.status == 0 || (results.status == 1 && (results.motifs.size() == 0 || results.pathways.size() == 0))) {
                printf("No results or parsing unsuccessful.\n");
        } else {
                printf("PRMG general results ***\n");
                printf("%d motifs; %d pathways\n", results.motifs.size(), results.pathways.size());
                int interaction_motif_count = results.motifs.size();
                for (int i = 0; i < interaction_motif_count; i++) {
                        printf("\t%s", results.motifs[i].candidate.c_str());
                }
                printf("\n");
                int interaction_pathway_count = results.pathways.size();
                for (int i = 0; i < interaction_pathway_count; i++) {
                        printf("%s", results.pathways[i].pathway.c_str());
                        for (int j = 0; j < interaction_motif_count; j++) {
                                printf("\t%f", results.interaction_matrix[i][j]);
                        }
                        printf("\n");
                }
        }
}

void dump_prmg_protein(prmg_general_protein results) {
        if(results.status == 0 || (results.status == 1 && (results.motifs.size() == 0 || results.pathways.size() == 0))) {
                printf("No results or parsing unsuccessful.\n");
        } else {
                printf("PRMG general results ***\n");
                printf("%d motifs; %d pathways\n", results.motifs.size(), results.pathways.size());
                int interaction_motif_count = results.motifs.size();
                for (int i = 0; i < interaction_motif_count; i++) {
                        printf("\t%s", results.motifs[i].candidate.c_str());
                }
                printf("\n");
                int interaction_pathway_count = results.pathways.size();
                for (int i = 0; i < interaction_pathway_count; i++) {
                        printf("%s", results.pathways[i].pathway.c_str());
                        for (int j = 0; j < interaction_motif_count; j++) {
                                printf("\t%f", results.interaction_matrix[i][j]);
                        }
                        printf("\n");
                }
        }
}

prmg_general parse_prmg(fire_results fire, ipage_general ipage, string filename) {
        prmg_general holder;

        if (fire.general.motifs.size() == 0 || ipage.pathways.size() == 0) {
                // either no motifs or no pathways -- no need to continue
                holder.status = 1;
                return holder;
        }

        // populate motif2array
        map<string, int> motif2array;
        int motifcount = fire.general.motifs.size();
        vector<nucleotide_motif> motif_repo;
        for (int i = 0; i < motifcount; i++) {
                motif_repo.push_back(fire.general.motifs[i]);
                motif2array.insert(make_pair(fire.general.motifs[i].candidate, i));
        }
        // populate pathway2array
        map<string, int> pathway2array;
        int pathwaycount = ipage.pathways.size();
        vector<pathway> pathway_repo;
        for (int i = 0; i < pathwaycount; i++) {
                pathway_repo.push_back(ipage.pathways[i]);
                pathway2array.insert(make_pair(ipage.pathways[i].pathway, i));
        }
        string output_cdt = filename + "_PAGE/motif_cat.cdt";
        if(file_exists(output_cdt)) {
                // PARSE OUTPUT CDT
                fstream file_output_cdt;
                file_output_cdt.open(output_cdt.c_str(), fstream::in);
                if(!file_output_cdt) {
                        holder.status = 0;
                        return holder;
                } else {
                        string line;

                        getline(file_output_cdt, line);
                        vector<string> motifdata = split_tab(line);
                        motifdata.erase(motifdata.begin());
                        int relevantmotifcount = motifdata.size();
                        for(int i = 0; i < relevantmotifcount; i++) {
                                vector<string> motifsplit = split_slash(motifdata[i]);
                                int search;
                                string ms = motifsplit[0];
                                // search and replace from http://www.java2s.com/Code/Cpp/Data-Type/StringFindandreplace.htm
                                while((search = ms.find('U')) != string::npos) {
                                        ms.replace(search, 1, "T");
                                }
                                holder.motifs.push_back(motif_repo[motif2array[ms]]);
                        }

                        while(getline(file_output_cdt, line)) {
                                vector<string> linedata = split_tab(line);
                                vector<string> scrambledpathway = split_whitespace(linedata[0]);
                                int scrambledpathwaysize = scrambledpathway.size();
                                string pathwayname = scrambledpathway[scrambledpathwaysize -1];
                                for (int i = 0; i < scrambledpathwaysize -1; i++) {
                                        pathwayname += " "+scrambledpathway[i];
                                }
                                vector<double> perpathway;
                                for (int i = 0; i < relevantmotifcount; i++) {
                                        perpathway.push_back(atof(linedata[i+1].c_str()));
                                }
                                holder.interaction_matrix.push_back(perpathway);
                                holder.pathways.push_back(pathway_repo[pathway2array[pathwayname]]);
                        }
                }
                // END PARSE OUTPUT CDT

                holder.status = 1;
                return holder;
        } else {
                error_message("Files are missing or inaccessible for PRMG results.");
                holder.status = 0;
                return holder;
        }
}

prmg_general_protein parse_prmg_protein(firepro_results fire, ipage_general ipage, string filename) {
        prmg_general_protein holder;

        if (fire.general.motifs.size() == 0 || ipage.pathways.size() == 0) {
                // either no motifs or no pathways -- no need to continue
                holder.status = 1;
                return holder;
        }

        // populate motif2array
        map<string, int> motif2array;
        int motifcount = fire.general.motifs.size();
        vector<protein_motif> motif_repo;
        for (int i = 0; i < motifcount; i++) {
                motif_repo.push_back(fire.general.motifs[i]);
                motif2array.insert(make_pair(fire.general.motifs[i].candidate, i));
        }
        // populate pathway2array
        map<string, int> pathway2array;
        int pathwaycount = ipage.pathways.size();
        vector<pathway> pathway_repo;
        for (int i = 0; i < pathwaycount; i++) {
                pathway_repo.push_back(ipage.pathways[i]);
                pathway2array.insert(make_pair(ipage.pathways[i].pathway, i));
        }
        string output_cdt = filename + "_PAGE/motif_cat.cdt";
        if(file_exists(output_cdt)) {
                // PARSE OUTPUT CDT
                fstream file_output_cdt;
                file_output_cdt.open(output_cdt.c_str(), fstream::in);
                if(!file_output_cdt) {
                        holder.status = 0;
                        return holder;
                } else {
                        string line;

                        getline(file_output_cdt, line);
                        vector<string> motifdata = split_tab(line);
                        motifdata.erase(motifdata.begin());
                        int relevantmotifcount = motifdata.size();
                        for(int i = 0; i < relevantmotifcount; i++) {
                                vector<string> motifsplit = split_slash(motifdata[i]);
                                holder.motifs.push_back(motif_repo[motif2array[motifsplit[0]]]);
                        }

                        while(getline(file_output_cdt, line)) {
                                vector<string> linedata = split_tab(line);
                                vector<string> scrambledpathway = split_whitespace(linedata[0]);
                                int scrambledpathwaysize = scrambledpathway.size();
                                string pathwayname = scrambledpathway[scrambledpathwaysize -1];
                                for (int i = 0; i < scrambledpathwaysize -1; i++) {
                                        pathwayname += " "+scrambledpathway[i];
                                }
                                vector<double> perpathway;
                                for (int i = 0; i < relevantmotifcount; i++) {
                                        perpathway.push_back(atof(linedata[i+1].c_str()));
                                }
                                holder.interaction_matrix.push_back(perpathway);
                                holder.pathways.push_back(pathway_repo[pathway2array[pathwayname]]);
                        }
                }
                // END PARSE OUTPUT CDT

                holder.status = 1;
                return holder;
        } else {
                error_message("Files are missing or inaccessible for PRMG protein results.");
                holder.status = 0;
                return holder;
        }
}

void dump_firepro(firepro_results results) {
        if(results.general.status == 0 || (results.general.status == 1 && results.general.motifs.size() == 0)) {
                printf("No results or parsing unsuccessful.\n");
        } else {
                printf("FIRE-pro general results ***\n");
                printf("%d motifs; %d bins; %d clusters; %s\n", results.general.motifs.size(), results.general.metabins.size(), results.general.clusters, results.general.exptype.c_str());
                int bincount = results.general.metabins.size();
                for (int i = 0; i < bincount; i++) {
                        printf("metabin #%d ", i);
                        if (results.general.exptype == "discrete") {
                                printf("%d\n", results.general.metabins[i].bin);
                                int metabin_genecount = results.general.metabins[i].genes.size();
                                for (int j = 0; j < metabin_genecount; j++) {
                                        printf("%s\t%s\n", results.general.metabins[i].genes[j].c_str(), results.general.metabins[i].evalues[j].c_str());
                                }
                        } else {
                                printf("%f %f\n", results.general.metabins[i].min, results.general.metabins[i].max);
                                int metabin_genecount = results.general.metabins[i].genes.size();
                                for (int j = 0; j < metabin_genecount; j++) {
                                        printf("%s\t%s\n", results.general.metabins[i].genes[j].c_str(), results.general.metabins[i].evalues[j].c_str());
                                }
                        }
                }
                int motifcount = results.general.motifs.size();
                for (int i = 0; i < motifcount; i++) {
                        printf("motif #%d %s %f %f %d/%d\n", i, results.general.motifs[i].candidate.c_str(), results.general.motifs[i].mi, results.general.motifs[i].z_score, results.general.motifs[i].robustness, results.general.motifs[i].robustness_max);
                        printf("cluster %d\n", results.general.motifs[i].cluster);
                        printf("name :");
                        int namecount = results.general.motifs[i].names.size();
                        for (int l = 0; l < namecount; l++) {
                                printf(" %s %s", results.general.motifs[i].names[l].c_str(), results.general.motifs[i].details[l].c_str());
                        }
                        printf("\n");
                        for (int j = 0; j < bincount; j++) {
                                printf("bin #%d %s %f %f\n", j, results.general.motifs[i].bins[j].state.c_str(), results.general.motifs[i].bins[j].value, results.general.motifs[i].bins[j].p_value);
                                int bin_genecount = results.general.motifs[i].bins[j].genes.size();
                                for (int k = 0; k < bin_genecount; k++) {
                                        printf("%s\n", results.general.motifs[i].bins[j].genes[k].c_str());
                                }
                        }
                }

                // interaction results
                if(results.interaction.status == 1) {
                        printf("FIRE-pro interaction results ***\n");
                        int interaction_motif_count = results.interaction.motifs.size();
                        for (int i = 0; i < interaction_motif_count; i++) {
                                printf("\t%s", results.interaction.motifs[i].candidate.c_str());
                        }
                        printf("\n");
                        for (int i = 0; i < interaction_motif_count; i++) {
                                printf("%s", results.interaction.motifs[i].candidate.c_str());
                                for (int j = 0; j < interaction_motif_count; j++) {
                                        printf("\t%f/%f", results.interaction.interaction_matrix_r0[i][j],results.interaction.interaction_matrix_r3[i][j]);
                                }
                                printf("\n");
                        }
                } else {
                        printf("FIRE-pro interaction results unavailable ***\n");
                }
                // end interaction results
        }
}

firepro_results parse_firepro(string filename) {
        firepro_results holder;
        string fireprodir = filename + "_FIREPRO";

        string input_discrete_nodups = filename + ".nodups";
        string input_continuous_nodups = filename + ".quantized.nodups";
        string input_nodups;
        if (file_exists(input_discrete_nodups)) {
                input_nodups = input_discrete_nodups;
                holder.general.exptype = "discrete";
        } else {
                input_nodups = input_continuous_nodups;
                holder.general.exptype = "continuous";
        }

        vector<string> expfilefinder = split_slash(filename);
        int expfilefinder_index = expfilefinder.size()-1;
        string expfilename = expfilefinder[expfilefinder_index];

        string output_summary = fireprodir + "/" + expfilename + ".final.motifs";
        string output_cluster = fireprodir + "/" + expfilename + "-mi_combine.cdt";
        string output_fullmatrix = fireprodir + "/" + expfilename + "-formatted.pvalues";
        string output_motif_profile = fireprodir + "/" + expfilename + "-motif_profiles.txt";
        string output_motif_names = fireprodir + "/" + expfilename + ".names";
        string output_fullmimatrix = fireprodir + "/" + expfilename + "-mi_combine.fullmatrix";
        string output_mimatrix = fireprodir + "/" + expfilename + "-mi_combine.matrix";

        string input_commandline = fireprodir + "/../" + "fire_aa_log.txt";

        if (file_exists(input_nodups) && file_exists(filename) && file_exists(output_summary) && file_exists(output_cluster) && file_exists(output_fullmatrix) && file_exists(output_motif_profile) && file_exists(output_motif_names) && file_exists(input_commandline) && file_exists(output_fullmimatrix) && file_exists(output_mimatrix)) {

                // PARSE SUMMARY FILE
                map<string, int> motif2array;
                fstream file_output_summary;
                file_output_summary.open(output_summary.c_str(), fstream::in);
                if(!file_output_summary) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        int counter = 0;
                        while(getline(file_output_summary, line)) {
                                vector<string> linedata = split_tab(line);
                                protein_motif currentmotif;
                                currentmotif.candidate = linedata[0];
                                currentmotif.mi = atof(linedata[1].c_str());
                                currentmotif.z_score = atof(linedata[3].c_str());
                                currentmotif.robustness = atoi(linedata[4].c_str());
                                // hardcoded
                                currentmotif.robustness_max = 10;

                                holder.general.motifs.push_back(currentmotif);
                                motif2array.insert(make_pair(linedata[0], counter));
                                counter++;
                        }
                        if (counter == 0) {
                                // no motifs; no need to go on
                                holder.general.status = 1;
                                holder.interaction.status = 1;
                                return holder;
                        }
                        file_output_summary.close();
                }
                // END PARSE SUMMARY FILE

                // PARSE CLUSTER FILE
                fstream file_output_cluster;
                file_output_cluster.open(output_cluster.c_str(), fstream::in);
                if(!file_output_cluster) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        int maxclustercounter = 0;
                        while(getline(file_output_cluster, line)) {
                                vector<string> linedata = split_tab(line);
                                holder.general.motifs[motif2array[linedata[0]]].cluster = atoi(linedata[1].c_str());
                                if(atoi(linedata[1].c_str()) > maxclustercounter) {
                                        maxclustercounter = atoi(linedata[1].c_str());
                                }
                        }
                        holder.general.clusters = maxclustercounter+1;
                        file_output_cluster.close();
                }
                // END PARSE CLUSTER FILE

                // PARSE FULL MATRIX
                int bincount;
                fstream file_output_fullmatrix;
                file_output_fullmatrix.open(output_fullmatrix.c_str(), fstream::in);
                if(!file_output_fullmatrix) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        // get first line for headers, which can determine exptype
                        getline(file_output_fullmatrix, line);
                        vector<string> header = split_tab(line);
                        bincount = header.size()-1;
                        holder.general.bincount = bincount;

                        for(int i = 0; i < bincount; i++) {
                                metabin currentmetabin;
                                currentmetabin.bin = i;
                                holder.general.metabins.push_back(currentmetabin);
                        }

                        double signif_pvalue = 0.05/bincount;
                        while(getline(file_output_fullmatrix, line)) {
                                vector<string> body = split_tab(line);
                                string motif = body[0];
                                for(int i = 0; i < bincount; i++) {
                                        binner currentbin;
                                        currentbin.bin = i;
                                        double value = atof(body[i+1].c_str());
                                        double pvalue= pow(10,-fabs(value));
                                        currentbin.p_value = pvalue;
                                        currentbin.value = value;
                                        if (pvalue <= signif_pvalue) {
                                                if(currentbin.value > 0) {
                                                        currentbin.state = "over";
                                                } else {
                                                        currentbin.state = "under";
                                                }
                                        } else {
                                                currentbin.state = "neither";
                                        }
                                        holder.general.motifs[motif2array[motif]].bins.push_back(currentbin);
                                }
                        }
                        file_output_fullmatrix.close();
                }
                // END PARSE FULL MATRIX

                // PARSE QUANTIZED
                map<string, int> gene2bin;
                fstream file_input_nodups;
                file_input_nodups.open(input_nodups.c_str(), fstream::in);
                if(!file_input_nodups) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        getline(file_input_nodups, line);
                        while(getline(file_input_nodups, line)) {
                                vector<string> linedata = split_tab(line);
                                int curbin = atoi(linedata[1].c_str());
                                if (holder.general.exptype == "discrete") {
                                        holder.general.metabins[curbin].genes.push_back(linedata[0]);
                                        holder.general.metabins[curbin].evalues.push_back(linedata[1]);
                                }
                                gene2bin.insert(make_pair(linedata[0],curbin));
                        }
                        file_input_nodups.close();
                }
                // END PARSE QUANTIZED

                // PARSE EXPFILE IF NEEDED
                if (holder.general.exptype == "continuous") {
                        fstream file_filename;
                        file_filename.open(filename.c_str(), fstream::in);
                        if(!file_filename) {
                                error_message("A file that exists could not be opened.");
                                holder.general.status = 0;
                                holder.interaction.status = 0;
                                return holder;
                        } else {
                                string line;
                                getline(file_filename, line);
                                while(getline(file_filename, line)) {
                                        vector<string> linedata = split_tab(line);
                                        if (gene2bin.count(linedata[0])>0) {
                                                int curbin = gene2bin[linedata[0]];
                                                holder.general.metabins[curbin].genes.push_back(linedata[0]);
                                                holder.general.metabins[curbin].evalues.push_back(linedata[1]);
                                        }
                                }

                                for(int i = 0; i < bincount; i++) {
                                        vector<double> sort_array;
                                        int elements = holder.general.metabins[i].evalues.size();
                                        if (elements > 0) {
                                            for(int j = 0; j < elements; j++) {
                                                    double cureval = atof(holder.general.metabins[i].evalues[j].c_str());
                                                    sort_array.push_back(cureval);
                                            }
                                            sort (sort_array.begin(), sort_array.end()-1);
                                            double min = sort_array[0];
                                            double max = sort_array[elements-1];
                                            holder.general.metabins[i].min = min;
                                            holder.general.metabins[i].max = max;
                                        } else {
                                            // why would a bin have no elements?
                                            holder.general.metabins[i].min = 0;
                                            holder.general.metabins[i].max = 0;
                                        }
                                }
                        }
                        file_filename.close();
                }
                // END PARSE EXPFILE IF NEEDED

                // PARSE MOTIF PROFILE
                fstream file_output_motif_profile;
                file_output_motif_profile.open(output_motif_profile.c_str(), fstream::in);
                if(!file_output_motif_profile) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        while(getline(file_output_motif_profile, line)) {
                                vector<string> linedata = split_tab(line);
                                string curmotif = linedata[0];
                                string curgene = linedata[1];
                                if (gene2bin.count(curgene) > 0) {
                                    int curbin = gene2bin[curgene];
                                    holder.general.motifs[motif2array[curmotif]].bins[curbin].genes.push_back(curgene);
                                }
                        }
                        file_output_motif_profile.close();
                }
                // END PARSE MOTIF PROFILE

                // PARSE MOTIF NAMES
                fstream file_output_motif_names;
                file_output_motif_names.open(output_motif_names.c_str(), fstream::in);
                if(!file_output_motif_names) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        while(getline(file_output_motif_names, line)) {
                                vector<string> linedata = split_tab(line);
                                string curmotif = linedata[1];
                                string curmotif_known = linedata[2];
                                string curmotif_detail = linedata[3];
                                holder.general.motifs[motif2array[curmotif]].names.push_back(curmotif_known);
                                holder.general.motifs[motif2array[curmotif]].details.push_back(curmotif_detail);
                        }
                        file_output_motif_names.close();
                }
                // END PARSE MOTIF NAMES

                // PARSE CMDLINE
                fstream file_input_cmdline;
                file_input_cmdline.open(input_commandline.c_str(), fstream::in);
                if(!file_input_cmdline) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        getline(file_input_cmdline, line);
                        getline(file_input_cmdline, line);
                        getline(file_input_cmdline, line);
                        holder.general.querysettings = line;
                        file_input_cmdline.close();
                }
                // END PARSE CMDLINE

                // BEGIN POPULATING INTERACTION HOLDER
                holder.interaction.querysettings = holder.general.querysettings;
                int motifcount = holder.general.motifs.size();
                vector<double> tmpvector;
                vector<double> tmpvector2;
                for(int i = 0; i < motifcount; i++) {
                        holder.interaction.motifs.push_back(holder.general.motifs[i]);
                        tmpvector.push_back(0);
                        tmpvector2.push_back(-1);
                }
                for(int i = 0; i < motifcount; i++) {
                        holder.interaction.interaction_matrix_r0.push_back(tmpvector);
                        holder.interaction.interaction_matrix_r1.push_back(tmpvector2);
                        holder.interaction.interaction_matrix_r3.push_back(tmpvector);
                        holder.interaction.interaction_matrix_r4.push_back(tmpvector2);
                }
                // END BEGIN POPULATING INTERACTION HOLDER

                // PARSE FULLMIMATRIX
                fstream file_output_fullmimatrix;
                file_output_fullmimatrix.open(output_fullmimatrix.c_str(), fstream::in);
                if(!file_output_fullmimatrix) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        while(getline(file_output_fullmimatrix, line)){
                                vector<string> linedata = split_tab(line);
                                int motif1 = motif2array[linedata[0]];
                                int motif2 = motif2array[linedata[1]];
                                double r0 = atof(linedata[2].c_str());
                                double r3 = atof(linedata[5].c_str());
                                // potential space space nan issue for r3
                                holder.interaction.interaction_matrix_r0[motif1][motif2] = r0;
                                holder.interaction.interaction_matrix_r0[motif2][motif1] = r0;
                                holder.interaction.interaction_matrix_r3[motif1][motif2] = r3;
                                holder.interaction.interaction_matrix_r3[motif2][motif1] = r3;
                        }
                        file_output_fullmimatrix.close();
                }
                // END PARSE FULLMIMATRIX

                // PARSE MIMATRIX
                fstream file_output_mimatrix;
                file_output_mimatrix.open(output_mimatrix.c_str(), fstream::in);
                if(!file_output_mimatrix) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        while(getline(file_output_mimatrix, line)){
                                vector<string> linedata = split_tab(line);
                                if (motif2array.count(linedata[0]) > 0 && motif2array.count(linedata[1]) > 0) {
                                        int motif1 = motif2array[linedata[0]];
                                        int motif2 = motif2array[linedata[1]];
                                        double r4 = atof(linedata[6].c_str());
                                        double r1 = atof(linedata[3].c_str());
                                        // potential space space nan issue for r3
                                        holder.interaction.interaction_matrix_r1[motif1][motif2] = r1;
                                        holder.interaction.interaction_matrix_r1[motif2][motif1] = r1;
                                        holder.interaction.interaction_matrix_r4[motif1][motif2] = r4;
                                        holder.interaction.interaction_matrix_r4[motif2][motif1] = r4;
                                }
                        }
                        file_output_mimatrix.close();
                }
                // END PARSE MIMATRIX

                holder.general.status = 1;
                holder.interaction.status = 1;
                return holder;
        } else {
                error_message("Files are missing or inaccessible for FIRE-pro results.");
                holder.general.status = 0;
                holder.interaction.status = 0;
                return holder;
        }
}


void dump_fire(fire_results results) {
        if(results.general.status == 0 || (results.general.status == 1 && results.general.motifs.size() == 0)) {
                printf("No results or parsing unsuccessful.\n");
        } else {
                printf("FIRE general results ***\n");
                printf("%d motifs; %d bins; %d clusters; %s\n", results.general.motifs.size(), results.general.metabins.size(), results.general.clusters, results.general.exptype.c_str());
                int bincount = results.general.metabins.size();
                for (int i = 0; i < bincount; i++) {
                        printf("metabin #%d ", i);
                        if (results.general.exptype == "discrete") {
                                printf("%d\n", results.general.metabins[i].bin);
                                int metabin_genecount = results.general.metabins[i].genes.size();
                                for (int j = 0; j < metabin_genecount; j++) {
                                        printf("%s\t%s\n", results.general.metabins[i].genes[j].c_str(), results.general.metabins[i].evalues[j].c_str());
                                }
                        } else {
                                printf("%f %f\n", results.general.metabins[i].min, results.general.metabins[i].max);
                                int metabin_genecount = results.general.metabins[i].genes.size();
                                for (int j = 0; j < metabin_genecount; j++) {
                                        printf("%s\t%s\n", results.general.metabins[i].genes[j].c_str(), results.general.metabins[i].evalues[j].c_str());
                                }
                        }
                }
                int motifcount = results.general.motifs.size();
                for (int i = 0; i < motifcount; i++) {
                        printf("motif #%d %s %s %f %f %d/%d\n", i, results.general.motifs[i].candidate.c_str(), results.general.motifs[i].location.c_str(), results.general.motifs[i].mi, results.general.motifs[i].z_score, results.general.motifs[i].robustness, results.general.motifs[i].robustness_max);
                        printf("cluster %d from %s ob %d pb %d ci %f\n", results.general.motifs[i].cluster, results.general.motifs[i].seed.c_str(), results.general.motifs[i].orientation_bias, results.general.motifs[i].position_bias, results.general.motifs[i].conservation_index);
                        printf("name :");
                        int namecount = results.general.motifs[i].names.size();
                        for (int l = 0; l < namecount; l++) {
                                printf(" %s", results.general.motifs[i].names[l].c_str());
                        }
                        printf("\n");
                        printf("protein_array :");
                        int pacount = results.general.motifs[i].protein_array.size();
                        for (int l = 0; l < pacount; l++) {
                                printf(" results.general.motifs[i].protein_array[l]");
                        }
                        printf("\n");
                        for (int j = 0; j < bincount; j++) {
                                printf("bin #%d %s %f %f\n", j, results.general.motifs[i].bins[j].state.c_str(), results.general.motifs[i].bins[j].value, results.general.motifs[i].bins[j].p_value);
                                int bin_genecount = results.general.motifs[i].bins[j].genes.size();
                                for (int k = 0; k < bin_genecount; k++) {
                                        printf("%s\n", results.general.motifs[i].bins[j].genes[k].c_str());
                                }
                        }
                }

                // interaction results
                if(results.interaction.status == 1) {
                        printf("FIRE interaction results ***\n");
                        int interaction_motif_count = results.interaction.motifs.size();
                        for (int i = 0; i < interaction_motif_count; i++) {
                                printf("\t%s", results.interaction.motifs[i].candidate.c_str());
                        }
                        printf("\n");
                        for (int i = 0; i < interaction_motif_count; i++) {
                                printf("%s", results.interaction.motifs[i].candidate.c_str());
                                for (int j = 0; j < interaction_motif_count; j++) {
                                        printf("\t%f/%f", results.interaction.interaction_matrix_r0[i][j],results.interaction.interaction_matrix_r3[i][j]);
                                }
                                printf("\n");
                        }
                } else {
                        printf("FIRE interaction results unavailable ***\n");
                }
                // end interaction results
        }
}

fire_results parse_fire(string filename, string type) {
        fire_results holder;
        string firedir = filename + "_FIRE";
        string firetypedir = firedir + "/" + type;

        vector<string> expfilefinder = split_slash(filename);
        int expfilefinder_index = expfilefinder.size()-1;
        string expfilename = expfilefinder[expfilefinder_index];

        string input_nodups = firetypedir + "/"+expfilename;
        string input_quantized = firetypedir + "/"+expfilename+".quantized";
        string input_commandline = firedir + "/" + "cmdline.txt";

        string output_summary = firetypedir + "/"+expfilename+".summary";
        string output_cluster = firetypedir + "/"+expfilename+".clusters";//
        string output_fullmatrix = firetypedir + "/"+expfilename+".fullmatrix";
        string output_motif_names = firetypedir + "/"+expfilename+".motifnames";
        string output_fullmimatrix = firetypedir + "/"+expfilename+".fullmimatrix";//
        string output_mimatrix = firetypedir + "/" + expfilename + ".mimatrix";

        if (file_exists(filename) && file_exists(firedir) && file_exists(firetypedir) && file_exists(output_summary) && file_exists(output_cluster) && file_exists(input_nodups) && file_exists(input_commandline) && file_exists(output_motif_names) && file_exists(output_mimatrix)) {
                map<string, int> motif2array;
                int bincount;

                // PARSE SUMMARY FILE
                fstream file_output_summary;
                file_output_summary.open(output_summary.c_str(), fstream::in);
                if(!file_output_summary) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        int counter = 0;
                        while(getline(file_output_summary, line)) {
                                vector<string> linedata = split_tab(line);
                                nucleotide_motif currentmotif;
                                currentmotif.candidate = linedata[0];
                                if (linedata[1] == "0") {
                                        currentmotif.location = "5'";
                                } else {
                                        currentmotif.location = "3'UTR";
                                }
                                // orientation bias 2, 4 unknown, 7, 9, 10, 12
                                //if (linedata[2] )
                                currentmotif.mi = atof(linedata[3].c_str());
                                currentmotif.z_score = atof(linedata[5].c_str());
                                vector<string> robustness = split_slash(linedata[6]);
                                currentmotif.robustness = atoi(robustness[0].c_str());
                                currentmotif.robustness_max = atoi(robustness[1].c_str());
                                currentmotif.seed = linedata[8];
                                currentmotif.position_bias = atoi(linedata[9].c_str());
                                currentmotif.orientation_bias = atoi(linedata[10].c_str());
                                currentmotif.conservation_index = atof(linedata[11].c_str());

                                holder.general.motifs.push_back(currentmotif);
                                motif2array.insert(make_pair(linedata[0], counter));
                                counter++;
                        }
                        file_output_summary.close();
                        if (counter == 0) {
                                // no motifs; no need to go on
                                holder.general.status = 1;
                                holder.interaction.status = 1;
                                return holder;
                        }
                }
                // still need bins, names, protein_array, cluster
                // END PARSE SUMMARY FILE

                // PARSE CLUSTER FILE
                fstream file_output_cluster;
                file_output_cluster.open(output_cluster.c_str(), fstream::in);
                if(!file_output_cluster) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        int maxclustercounter = 0;
                        while(getline(file_output_cluster, line)) {
                                vector<string> linedata = split_tab(line);
                                holder.general.motifs[motif2array[linedata[0]]].cluster = atoi(linedata[1].c_str());
                                if(atoi(linedata[1].c_str()) > maxclustercounter) {
                                        maxclustercounter = atoi(linedata[1].c_str());
                                }
                        }
                        holder.general.clusters = maxclustercounter+1;
                        file_output_cluster.close();
                }
                // still need bins, names, protein_array
                // END PARSE CLUSTER FILE

                // PARSE FULL MATRIX
                fstream file_output_fullmatrix;
                file_output_fullmatrix.open(output_fullmatrix.c_str(), fstream::in);
                if(!file_output_fullmatrix) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        // get first line for headers, which can determine exptype
                        getline(file_output_fullmatrix, line);
                        vector<string> header = split_tab(line);
                        bincount = header.size()-1;
                        holder.general.bincount = bincount;
                        if(header[bincount][0] == '[') {
                                holder.general.exptype = "continuous";
                        } else {
                                holder.general.exptype = "discrete";
                        }

                        for(int i = 0; i < bincount; i++) {
                                metabin currentmetabin;

                                if (holder.general.exptype == "discrete") {
                                        currentmetabin.bin = i;
                                } else {
                                        vector<string> minmax = split_semicolon(header[i+1]);
                                        minmax[0].erase(minmax[0].begin());
                                        minmax[1].erase(minmax[1].end()-1);
                                        currentmetabin.min = atof(minmax[0].c_str());
                                        currentmetabin.max = atof(minmax[1].c_str());
                                        currentmetabin.bin = i;
                                }
                                holder.general.metabins.push_back(currentmetabin);
                        }
                        double signif_pvalue = 0.05/bincount;
                        while(getline(file_output_fullmatrix, line)) {
                                vector<string> body = split_tab(line);
                                string motif = body[0];
                                for(int i = 0; i < bincount; i++) {
                                        binner currentbin;
                                        currentbin.bin = i;
                                        double value = atof(body[i+1].c_str());
                                        double pvalue = pow(10,-fabs(value));
                                        currentbin.p_value = pvalue;
                                        currentbin.value = value;
                                        if (pvalue <= signif_pvalue) {
                                                if(currentbin.value > 0) {
                                                        currentbin.state = "over";
                                                } else {
                                                        currentbin.state = "under";
                                                }
                                        } else {
                                                currentbin.state = "neither";
                                        }
                                        holder.general.motifs[motif2array[motif]].bins.push_back(currentbin);
                                }
                        }
                        file_output_fullmatrix.close();
                }
                // END PARSE FULL MATRIX

                // PARSE QUANTIZED INPUT
                map<string, int> gene2bin;
                if (holder.general.exptype == "discrete") {
                        // nodups serves as both expfile and quantized file
                        fstream file_input_nodups;
                        file_input_nodups.open(input_nodups.c_str(), fstream::in);
                        if(!file_input_nodups) {
                                error_message("A file that exists could not be opened.");
                                holder.general.status = 0;
                                holder.interaction.status = 0;
                                return holder;
                        } else {
                                string line;
                                getline(file_input_nodups, line);
                                while(getline(file_input_nodups, line)) {
                                        vector<string> linedata = split_tab(line);

                                        int curbin = atoi(linedata[1].c_str());
                                        if (curbin < bincount) {
                                                holder.general.metabins[curbin].genes.push_back(linedata[0]);
                                                holder.general.metabins[curbin].evalues.push_back(linedata[1]);
                                                gene2bin.insert(make_pair(linedata[0],curbin));

                                        }

                                }
                                file_input_nodups.close();
                        }
                } else {
                        // read quantized first
                        fstream file_input_quantized;
                        file_input_quantized.open(input_quantized.c_str(), fstream::in);
                        if(!file_input_quantized) {
                                error_message("A file that should exist could not be opened.");
                                holder.general.status = 0;
                                holder.interaction.status = 0;
                                return holder;
                        } else {
                                string line;
                                getline(file_input_quantized, line);
                                while(getline(file_input_quantized, line)) {
                                        vector<string> linedata = split_tab(line);
                                        int curbin = atoi(linedata[1].c_str());
                                        gene2bin.insert(make_pair(linedata[0],curbin));
                                }
                                file_input_quantized.close();
                        }
                        // populate continuous expression file data
                        fstream file_input_nodups;
                        file_input_nodups.open(input_nodups.c_str(), fstream::in);
                        if(!file_input_nodups) {
                                error_message("A file that exists could not be opened.");
                                holder.general.status = 0;
                                holder.interaction.status = 0;
                                return holder;
                        } else {
                                string line;
                                getline(file_input_nodups, line);
                                while(getline(file_input_nodups, line)) {
                                    vector<string> linedata = split_tab(line);
                                    int curbin = gene2bin[linedata[0]];
                                    if (curbin < bincount) {
                                            holder.general.metabins[curbin].genes.push_back(linedata[0]);
                                            holder.general.metabins[curbin].evalues.push_back(linedata[1]);
                                    }
                                }
                                file_input_nodups.close();
                        }
                }
                // END PARSE QUANTIZED INPUT

                // PARSE MOTIF PROFILE
                if (type == "DNA_RNA") {
                        int profilefailcount = 0;
                        string output_motif_profile1 = firedir + "/DNA/"+expfilename+".profiles";
                        fstream file_output_motif_profile1;
                        file_output_motif_profile1.open(output_motif_profile1.c_str(), fstream::in);
                        if(!file_output_motif_profile1) {
                                profilefailcount++;
                        } else {
                                string line;
                                while(getline(file_output_motif_profile1, line)) {
                                        vector<string> linedata = split_tab(line);
                                        string curmotif = linedata[0];
                                        string curgene = linedata[1];
                                        if (gene2bin.count(curgene) > 0) {
                                            int curbin = gene2bin[curgene];
                                            holder.general.motifs[motif2array[curmotif]].bins[curbin].genes.push_back(curgene);
                                        }
                                }
                                file_output_motif_profile1.close();
                        }

                        string output_motif_profile = firedir + "/RNA/"+expfilename+".profiles";
                        fstream file_output_motif_profile;
                        file_output_motif_profile.open(output_motif_profile.c_str(), fstream::in);
                        if(!file_output_motif_profile) {
                                profilefailcount++;
                        } else {
                                string line;
                                while(getline(file_output_motif_profile, line)) {
                                        vector<string> linedata = split_tab(line);
                                        string curmotif = linedata[0];
                                        string curgene = linedata[1];
                                        if (gene2bin.count(curgene) > 0) {
                                            int curbin = gene2bin[curgene];
                                            holder.general.motifs[motif2array[curmotif]].bins[curbin].genes.push_back(curgene);
                                        }
                                }
                                file_output_motif_profile.close();
                        }

                        if (profilefailcount == 2) {
                                error_message("A file that should exist could not be opened.");
                                holder.general.status = 0;
                                holder.interaction.status = 0;
                                return holder;
                        }
                } else {
                        // read the single file
                        string output_motif_profile = firetypedir + "/"+expfilename+".profiles";
                        fstream file_output_motif_profile;
                        file_output_motif_profile.open(output_motif_profile.c_str(), fstream::in);
                        if(!file_output_motif_profile) {
                                error_message("A file that should exist could not be opened.");
                                holder.general.status = 0;
                                holder.interaction.status = 0;
                                return holder;
                        } else {
                                string line;
                                while(getline(file_output_motif_profile, line)) {
                                        vector<string> linedata = split_tab(line);
                                        string curmotif = linedata[0];
                                        string curgene = linedata[1];
                                        if (gene2bin.count(curgene) > 0) {
                                           int curbin = gene2bin[curgene];
                                           holder.general.motifs[motif2array[curmotif]].bins[curbin].genes.push_back(curgene);
                                        }
                                }
                                file_output_motif_profile.close();
                        }
                }
                // END PARSE MOTIF PROFILE

                // PARSE MOTIF NAMES
                fstream file_output_motif_names;
                file_output_motif_names.open(output_motif_names.c_str(), fstream::in);
                if(!file_output_motif_names) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        while(getline(file_output_motif_names, line)) {
                                vector<string> linedata = split_tab(line);
                                string curmotif = linedata[0];
                                vector<string> linedata2 = split_semicolon(linedata[1]);
                                vector<string> names = split_comma(linedata2[0]);
                                int namescount = names.size();
                                for (int i = 0; i < namescount; i++) {
                                        if (names[i] != "-") {
                                                if (names[i][0] == 'P' && names[i][1] == 'A' && names[i][2] == '_') {
                                                        holder.general.motifs[motif2array[curmotif]].protein_array.push_back(names[i]);
                                                } else {
                                                        holder.general.motifs[motif2array[curmotif]].names.push_back(names[i]);
                                                }
                                        }
                                }
                        }
                        file_output_motif_names.close();
                }
                // END PARSE MOTIF NAMES

                // PARSE CMDLINE
                fstream file_input_cmdline;
                file_input_cmdline.open(input_commandline.c_str(), fstream::in);
                if(!file_input_cmdline) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        getline(file_input_cmdline, line);
                        holder.general.querysettings = line;
                        file_input_cmdline.close();
                }
                // END PARSE CMDLINE

                // BEGIN POPULATING INTERACTION HOLDER
                holder.interaction.querysettings = holder.general.querysettings;
                int motifcount = holder.general.motifs.size();
                vector<double> tmpvector;
                vector<double> tmpvector2; // to avoid undefined issue

                for(int i = 0; i < motifcount; i++) {
                        holder.interaction.motifs.push_back(holder.general.motifs[i]);
                        tmpvector.push_back(0);
                        tmpvector2.push_back(-1);
                }
                for(int i = 0; i < motifcount; i++) {
                        holder.interaction.interaction_matrix_r0.push_back(tmpvector);
                        holder.interaction.interaction_matrix_r1.push_back(tmpvector2);
                        holder.interaction.interaction_matrix_r3.push_back(tmpvector);
                        holder.interaction.interaction_matrix_r4.push_back(tmpvector2);
                }
                // END BEGIN POPULATING INTERACTION HOLDER

                // PARSE FULLMIMATRIX
                fstream file_output_fullmimatrix;
                file_output_fullmimatrix.open(output_fullmimatrix.c_str(), fstream::in);
                if(!file_output_fullmimatrix) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        while(getline(file_output_fullmimatrix, line)){
                                vector<string> linedata = split_tab(line);
                                int motif1 = motif2array[linedata[0]];
                                int motif2 = motif2array[linedata[1]];
                                double r0 = atof(linedata[2].c_str());
                                double r3 = atof(linedata[5].c_str());
                                holder.interaction.interaction_matrix_r0[motif1][motif2] = r0;
                                holder.interaction.interaction_matrix_r0[motif2][motif1] = r0;
                                holder.interaction.interaction_matrix_r3[motif1][motif2] = r3;
                                holder.interaction.interaction_matrix_r3[motif2][motif1] = r3;
                        }
                        file_output_fullmimatrix.close();
                }
                // END PARSE FULLMIMATRIX

                // PARSE MIMATRIX
                fstream file_output_mimatrix;
                file_output_mimatrix.open(output_mimatrix.c_str(), fstream::in);
                if(!file_output_mimatrix) {
                        error_message("A file that exists could not be opened.");
                        holder.general.status = 0;
                        holder.interaction.status = 0;
                        return holder;
                } else {
                        string line;
                        while(getline(file_output_mimatrix, line)){
                                vector<string> linedata = split_tab(line);
                                if (motif2array.count(linedata[0]) > 0 && motif2array.count(linedata[1]) > 0) {
                                        int motif1 = motif2array[linedata[0]];
                                        int motif2 = motif2array[linedata[1]];
                                        double r1 = atof(linedata[3].c_str());
                                        double r4 = atof(linedata[6].c_str());
                                        // potential space space nan issue for r3
                                        holder.interaction.interaction_matrix_r1[motif1][motif2] = r1;
                                        holder.interaction.interaction_matrix_r1[motif2][motif1] = r1;
                                        holder.interaction.interaction_matrix_r4[motif1][motif2] = r4;
                                        holder.interaction.interaction_matrix_r4[motif2][motif1] = r4;
                                }
                        }
                        file_output_mimatrix.close();
                }
                // END PARSE MIMATRIX

                holder.general.status = 1;
                holder.interaction.status = 1;
                return holder;
        }  else {
                error_message("Files are missing or inaccessible for FIRE results.");
                holder.general.status = 0;
                holder.interaction.status = 0;
                return holder;
        }
}

void dump_ipage(ipage_general results) {
        if(results.status == 0 || (results.status == 1 && results.pathways.size() == 0)) {
                printf("No results or parsing unsuccessful.\n");
        } else {
                printf("iPAGE results ***\n");
                printf("%d pathways; %d bins; %s\n", results.pathways.size(), results.metabins.size(), results.exptype.c_str());
                int bincount = results.metabins.size();
                for (int i = 0; i < bincount; i++) {
                        printf("metabin #%d ", i);
                        if (results.exptype == "discrete") {
                                printf("%d\n", results.metabins[i].bin);
                                int metabin_genecount = results.metabins[i].genes.size();
                                for (int j = 0; j < metabin_genecount; j++) {
                                        printf("%s\t%s\n", results.metabins[i].genes[j].c_str(), results.metabins[i].evalues[j].c_str());
                                }
                        } else {
                                printf("%f %f\n", results.metabins[i].min, results.metabins[i].max);
                                int metabin_genecount = results.metabins[i].genes.size();
                                for (int j = 0; j < metabin_genecount; j++) {
                                        printf("%s\t%s\n", results.metabins[i].genes[j].c_str(), results.metabins[i].evalues[j].c_str());
                                }
                        }
                }
                int pathwaycount = results.pathways.size();
                for (int i = 0; i < pathwaycount; i++) {
                        printf("pathway #%d %s\n", i, results.pathways[i].pathway.c_str());
                        for (int j = 0; j < bincount; j++) {
                                printf("bin #%d %s %f %f\n", j, results.pathways[i].bins[j].state.c_str(), results.pathways[i].bins[j].value, results.pathways[i].bins[j].p_value);
                                int bin_genecount = results.pathways[i].bins[j].genes.size();
                                for (int k = 0; k < bin_genecount; k++) {
                                        printf("%s\n", results.pathways[i].bins[j].genes[k].c_str());
                                }
                        }
                }
        }
}

ipage_general parse_ipage(string filename) {
        ipage_general results;
        string pagedir = filename + "_PAGE";
        string input_quantized = filename + "_PAGE/input.ipage_quantized";
        string input_nodups = filename + ".nodups";
        string output_intersections = filename + "_PAGE/output.ipage_intersections";
        string output_pvmatrix = filename + "_PAGE/pvmatrix.txt";
        string querysettings = filename + "_PAGE/info.txt";

        if (file_exists(filename) && file_exists(pagedir) && file_exists(input_quantized) && file_exists(input_nodups) && file_exists(output_intersections) && file_exists(output_pvmatrix) && file_exists(querysettings)) {

                // PARSE PVMATRIX to get exptype, bincount, pathwaycount, metabin information, bin values and states
                // determine pathway count
                int bincount = 0;
                int pathwaycount = 0;
                string exptype;
                map<string, int> pathway2array;

                fstream file_output_pvmatrix;
                file_output_pvmatrix.open(output_pvmatrix.c_str(), fstream::in);
                if (!file_output_pvmatrix) {
                        error_message("A file that exists could not be opened.");
                        results.status = 0;
                        return results;
                } else {
                        string line;
                        // get first line for headers, which can determine exptype
                        getline(file_output_pvmatrix, line);
                        vector<string> header = split_tab(line);
                        bincount = header.size()-1;
                        results.bincount = bincount;
                        if(header[bincount][0] == '[') {
                                exptype = "continuous";
                        } else {
                                exptype = "discrete";
                        }

                        for(int i = 0; i < bincount; i++) {
                                metabin currentmetabin;

                                if (exptype == "discrete") {
                                        currentmetabin.bin = i;
                                } else {
                                        vector<string> minmax = split_whitespace(header[i+1]);
                                        minmax[0].erase(minmax[0].begin());
                                        minmax[1].erase(minmax[1].end()-1);
                                        currentmetabin.min = atof(minmax[0].c_str());
                                        currentmetabin.max = atof(minmax[1].c_str());
                                        currentmetabin.bin = i;
                                }

                                results.metabins.push_back(currentmetabin);
                        }

                        while(getline(file_output_pvmatrix, line)) {
                                vector<string> body = split_tab(line);
                                pathway currentpathway;
                                currentpathway.pathway = body[0];

                                pathway2array.insert(make_pair(currentpathway.pathway, pathwaycount));

                                for(int i = 0; i < bincount; i++) {
                                        binner currentbin;
                                        currentbin.bin = i;
                                        vector<string> pvalues = split_slash(body[i+1]);
                                        vector<double> dpvalues;

                                        dpvalues.push_back(atof(pvalues[0].c_str()));
                                        dpvalues.push_back(atof(pvalues[1].c_str()));

                                        double signif_pvalue = 0.05/bincount;

                                        if (fabs(dpvalues[0]) > fabs(dpvalues[1])) {
                                               dpvalues.push_back(atof(pvalues[0].c_str()));

                                               currentbin.value = -dpvalues[2];
                                               double pvalue = pow(10,-fabs(dpvalues[2]));

                                               if (pvalue <= signif_pvalue) {
                                                   currentbin.state = "over";
                                               } else {
                                                   currentbin.state = "neither";
                                               }

                                        } else {
                                               dpvalues.push_back(atof(pvalues[1].c_str()));

                                               currentbin.value = dpvalues[2];
                                               double pvalue = pow(10,-fabs(dpvalues[2]));

                                               if (pvalue <= signif_pvalue) {
                                                   currentbin.state = "under";
                                               } else {
                                                   currentbin.state = "neither";
                                               }

                                        }
                                        currentbin.p_value = pow(10,dpvalues[2]);

                                        currentpathway.bins.push_back(currentbin);
                                }
                                results.pathways.push_back(currentpathway);
                                pathwaycount++;
                        }
                        results.exptype = exptype;

                        file_output_pvmatrix.close();
                }

                if (pathwaycount == 0) {
                        // no need to continue; no pathways
                        results.status = 0;
                        return results;
                }
                // END PARSE PVMATRIX

                // PARSE INPUT QUANTIZED
                map<string, int> gene2bin;
                fstream file_input_quantized;
                file_input_quantized.open(input_quantized.c_str(), fstream::in);
                if (!file_input_quantized) {
                        error_message("A file that exists could not be opened.");
                        results.status = 0;
                        return results;
                } else {
                        string line;
                        getline(file_input_quantized, line);
                        while(getline(file_input_quantized, line)) {
                                vector<string> pair = split_tab(line);
                                int bin_reference = atoi(pair[1].c_str());
                                gene2bin.insert(make_pair(pair[0], bin_reference));
                        }
                        file_input_quantized.close();
                }
                // END PARSE INPUT QUANTIZED

                // PARSE INPUT NODUPS
                fstream file_input_nodups;
                file_input_nodups.open(input_nodups.c_str(), fstream::in);
                if (!file_input_nodups) {
                        error_message("A file that exists could not be opened.");
                        results.status = 0;
                        return results;
                } else {
                        string line;
                        getline(file_input_nodups, line);
                        while(getline(file_input_nodups, line)) {
                                vector<string> pair = split_tab(line);
                                if (gene2bin.count(pair[0]) > 0) {
                                        int bin_reference = gene2bin[pair[0]];
                                        results.metabins[bin_reference].genes.push_back(pair[0]);
                                        results.metabins[bin_reference].evalues.push_back(pair[1]);
                                }
                        }
                        file_input_nodups.close();
                }
                // END PARSE INPUT NODUPS

                // PARSE OUTPUT INTERSECTIONS
                fstream file_output_intersections;
                file_output_intersections.open(output_intersections.c_str(), fstream::in);
                if (!file_output_intersections) {
                        error_message("A file that exists could not be opened.");
                        results.status = 0;
                        return results;
                } else {
                        string line;
                        getline(file_output_intersections, line);
                        while(getline(file_output_intersections, line)) {
                                vector<string> linedata = split_tab(line);
                                int bin_reference = atoi(linedata[1].c_str());
                                int pathway_reference = pathway2array[linedata[0]];
                                int intersection_gene_count = linedata.size() - 2;
                                for (int i = 0; i < intersection_gene_count; i++) {
                                        results.pathways[pathway_reference].bins[bin_reference].genes.push_back(linedata[i+2]);
                                }
                        }
                        file_input_quantized.close();
                }
                // END PARSE OUTPUT INTERSECTIONS

                // PARSE QUERY SETTINGS
                fstream file_querysettings;
                file_querysettings.open(querysettings.c_str(), fstream::in);
                if(!file_querysettings) {
                        string line;
                        while(getline(file_querysettings, line)) {
                                vector<string> linedata = split_tab(line);
                                if (linedata.size() == 2) {
                                        results.querysettings.insert(make_pair(linedata[0], linedata[1]));
                                }
                        }
                } else {
                        file_querysettings.close();
                }
                // END PARSE QUERY SETTINGS

                results.status = 1;
                return results;

        } else {
                error_message("Files are missing or inaccessible for iPAGE results.");
                results.status = 0;
                return results;
        }
}

vector<string> split_whitespace (string line) {
        // from http://stackoverflow.com/questions/236129/c-how-to-split-a-string
        istringstream iss(line);
        vector<string> tokens;
        copy(istream_iterator<string>(iss), istream_iterator<string>(), back_inserter<vector<string> >(tokens));
        return tokens;
}

vector<string> split_slash (string line) {
        istringstream iss(line);
        istringstream ist(line);
        vector<string> tokens;
        vector<string> tokent;
        string item;
        while(getline(iss, item, '/')) {
                tokens.push_back(item);
        }
        while(getline(ist, item, '\\')) {
                tokent.push_back(item);
        }
        if (tokens.size() > tokent.size()) {
            return tokens;
        } else {
            return tokent;
        }
}

vector<string> split_semicolon (string line) {
        istringstream iss(line);
        vector<string> tokens;
        string item;
        while(getline(iss, item, ';')) {
                tokens.push_back(item);
        }
        return tokens;
}

vector<string> split_tab (string line) {
        istringstream iss(line);
        vector<string> tokens;
        string item;
        while(getline(iss, item, '\t')) {
                tokens.push_back(item);
        }
        return tokens;
}

vector<string> split_comma (string line) {
        istringstream iss(line);
        vector<string> tokens;
        string item;
        while(getline(iss, item, ',')) {
                tokens.push_back(item);
        }
        return tokens;
}

bool file_exists(string filename) {
        struct stat stFileInfo;
        if (stat(filename.c_str(),&stFileInfo) == 0) {
                return true;
        } else {
                return false;
        }
}

void error_message(string error) {
  //printf("error: %s\n", error.c_str());
  //exit(1);
}

void verbose_output(string output) {
        int verbose = 1;
        if (verbose == 1) {
                printf("%s\n",output.c_str());
        }
}