/* KamePlainTextRepository.cc */
/* Created by Enomoto Sanshiro on 1 July 2009. */
/* Last updated by Enomoto Sanshiro on 1 July 2009. */


#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <map>
#include <cstdio>
#include <cctype>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "KameSystemIO.hh"
#include "KameNtuple.hh"
#include "KameRepository.hh"
#include "KamePlainTextRepository.hh"


using namespace std;
using namespace kame;


TKamePlainTextRepository::TKamePlainTextRepository(void)
{
    // repository is a file //
    _RepositoryName = "";
    _IsRepositoryCreated = false;
    _IsCompressionEnabled = false;

    _MyHeaderProcessor = new TKamePlainTextHeaderProcessor();
    _HeaderProcessor = _MyHeaderProcessor;

    _LineTerminator = '\n';
}

TKamePlainTextRepository::TKamePlainTextRepository(const std::string& RepositoryName)
{
    // repository is a directory //
    _RepositoryName = RepositoryName;
    _IsRepositoryCreated = false;
    _IsCompressionEnabled = false;

    _MyHeaderProcessor = new TKamePlainTextHeaderProcessor();
    _HeaderProcessor = _MyHeaderProcessor;

    _LineTerminator = '\n';
}

TKamePlainTextRepository::~TKamePlainTextRepository()
{
    delete _MyHeaderProcessor;
}

void TKamePlainTextRepository::EnableCompression(void)
{
    _IsCompressionEnabled = true;
}

void TKamePlainTextRepository::SetDelimiter(const std::string& Delimiter)
{
    _Delimiter = Delimiter;
}

void TKamePlainTextRepository::SetQuote(const std::string& Quote)
{
    _Quote = Quote;
}

void TKamePlainTextRepository::SetHeaderDelimiter(const std::string& Delimiter)
{
    _HeaderDelimiter = Delimiter;
}

void TKamePlainTextRepository::SetHeaderQuote(const std::string& Quote)
{
    _HeaderQuote = Quote;
}

void TKamePlainTextRepository::SetLineTerminator(char LineTerminator)
{
    _LineTerminator = LineTerminator;
}

void TKamePlainTextRepository::SetHeaderProcessor(TKamePlainTextHeaderProcessor* HeaderProcessor)
{
    _HeaderProcessor = HeaderProcessor;
}

string TKamePlainTextRepository::FileNameOf(const string& ObjectName, int Revision)
{
    ostringstream FileNameStream;

    if (! _RepositoryName.empty()) {
	FileNameStream << _RepositoryName << "/";
    }

    FileNameStream << ObjectName;
    if (Revision >= 0) {
	FileNameStream << ";" << Revision;
    }

    if (! _RepositoryName.empty()) {
	FileNameStream << ".knt";
    }

    return FileNameStream.str();
}

ostream* TKamePlainTextRepository::OpenOutputFile(const string& ObjectName, int Revision) throw(TKameException)
{
    if ((! _IsRepositoryCreated) && (! _RepositoryName.empty())) {
	if (access(_RepositoryName.c_str(), F_OK) != 0) {
	    mkdir(_RepositoryName.c_str(), 0755);
	}
	else {
	    struct stat FileStat;
	    if (stat(_RepositoryName.c_str(), &FileStat) < 0) {
		throw TKameException(
		    "TKamePlainTextRepository::OpenOutputFile()",
		    "systemcall stat(2) failed: " + _RepositoryName
		);
	    }
	    if (! S_ISDIR(FileStat.st_mode)) {
		throw TKameException(
		    "TKamePlainTextRepository::OpenOutputFile()",
		    "file already exists: " + _RepositoryName
		);
	    }
	}
    }

    string FileName = FileNameOf(ObjectName);
    if (Revision > 0) {
	string PreviousFileName = FileNameOf(ObjectName, Revision - 1);
	if (_IsCompressionEnabled) {
	    rename((FileName + ".gz").c_str(), (PreviousFileName + ".gz").c_str());
	}
	else {
	    rename(FileName.c_str(), PreviousFileName.c_str());
	}
    }

    ostream* File = 0;
    if (_IsCompressionEnabled) {
	string Command = "gzip - > " + FileName + ".gz";
	FILE* Pipe = popen(Command.c_str(), "w");
	if ((Pipe == NULL) || (Pipe == (void*) -1)) {
	    throw TKameException(
		"TKamePlainTextRepository::OpenOutputFile()",
		"unable to start 'gzip' for file: " + FileName
	    );
	}
	File = new TKameOutputCFileStream(Pipe);
	_CommandPipeTable[File] = Pipe;
    }
    else {
	File = new ofstream(FileName.c_str());
    }

    if (! *File) {
	delete File;
	throw TKameException(
	    "TKamePlainTextRepository::OpenOutputFile()",
	    "unable to create file: " + FileName
	);
    }

    return File;
}

istream* TKamePlainTextRepository::OpenInputFile(const string& ObjectName, int Revision) throw(TKameException)
{
    string FileName = FileNameOf(ObjectName, Revision);

    istream* File = 0;
    istream* MyFile = 0;
    if (access(FileName.c_str(), R_OK) == 0) {
	File = new ifstream(FileName.c_str());
	MyFile = File;
    }
    else if (access((FileName+".gz").c_str(), R_OK) == 0) {
	string Command = "gunzip -c " + FileName + ".gz";
	FILE* Pipe = popen(Command.c_str(), "r");
	if ((Pipe == NULL) || (Pipe == (void*) -1)) {
	    throw TKameException(
		"TKamePlainTextRepository::OpenInputFile()",
		"unable to start 'gunzip' for file: " + FileName
	    );
	}
	File = new TKameInputCFileStream(Pipe);
	MyFile = File;
	_CommandPipeTable[File] = Pipe;
    }
    else if (FileName.empty()) {
	File = &cin;
	MyFile = 0;
    }

    if ((! File) || (! *File)) {
	delete MyFile;
	throw TKameException(
	    "TKamePlainTextRepository::OpenInputFile()",
	    "unable to open file: " + FileName
	);
    }
    _MyFileTable[File] = MyFile;

    return File;
}

void TKamePlainTextRepository::CloseOutputFile(ostream* File) throw(TKameException)
{
    map<ios*, FILE*>::iterator Iterator = _CommandPipeTable.find(File);
    if (Iterator != _CommandPipeTable.end()) {
	pclose(Iterator->second);
	_CommandPipeTable.erase(Iterator);
    }

    delete File;
}

void TKamePlainTextRepository::CloseInputFile(istream* File) throw(TKameException)
{
    map<ios*, FILE*>::iterator Iterator = _CommandPipeTable.find(File);
    if (Iterator != _CommandPipeTable.end()) {
	pclose(Iterator->second);
	_CommandPipeTable.erase(Iterator);
    }

    delete _MyFileTable[File];
}

void TKamePlainTextRepository::SaveNtuple(const TKameNtuple& Ntuple, const std::string& Name) throw(TKameException)
{
    int Revision = _RevisionTable[Name];
    _RevisionTable[Name] += 1;

    ostream* File = OpenOutputFile(Name, Revision);

    *File << "## Name: " << Name << endl;
    *File << "## Revision: " << Revision << endl;

    if (_HeaderDelimiter.empty() && _HeaderQuote.empty()) {
	unsigned NumberOfColumns = Ntuple.NumberOfColumns();
	for (unsigned Column = 0; Column < NumberOfColumns; Column++) {
	    if (Ntuple.ColumnNameOf(Column).find_first_of(" \t") != string::npos) {
		SetHeaderQuote("\"");
	    }
	}
	
    }

    if (! _HeaderDelimiter.empty()) {
	*File << "## HeaderDelimiter: " << _HeaderDelimiter << endl;
    }
    if (! _HeaderQuote.empty()) {
	*File << "## HeaderQuote: " << _HeaderQuote << endl;
    }
    if (! _Delimiter.empty()) {
	*File << "## Delimiter: " << _Delimiter << endl;
    }
    if (! _Quote.empty()) {
	*File << "## Quote: " << _Quote << endl;
    }

    *File << "## Fields: ";
    unsigned NumberOfColumns = Ntuple.NumberOfColumns();
    for (unsigned Column = 0; Column < NumberOfColumns; Column++) {
	if (Column > 0) {
	    *File << _HeaderDelimiter << " ";
	}
	*File << _HeaderQuote << Ntuple.ColumnNameOf(Column) << _HeaderQuote;
    }
    *File << endl;

    if (Ntuple.HasTypeList()) {
	*File << "## FieldTypes: ";
	unsigned NumberOfColumns = Ntuple.NumberOfColumns();
	for (unsigned Column = 0; Column < NumberOfColumns; Column++) {
	    if (Column > 0) {
		*File << _HeaderDelimiter << " ";
	    }
	    *File << Ntuple.ColumnTypeOf(Column);
	}
	*File << endl;
    }

    vector<string> PropertyNameList = Ntuple.PropertyNameList();
    for (unsigned i = 0; i < PropertyNameList.size(); i++) {
	string Name = PropertyNameList[i];
	if (
	    (Name != "Name") && (Name != "Revision") &&
	    (Name != "Fields") && (Name != "FieldTypes") &&
	    (Name != "Delimiter") && (Name != "Quote") &&
	    (Name != "HeaderDelimiter") && (Name != "HeaderQuote")
	){
	    *File << "# " << Name << ": " << Ntuple.Property(Name) << endl;
	}
    }
    *File << endl;

    unsigned CurrentSegmentIndex = 0;

    for (unsigned Row = 0; Row < Ntuple.NumberOfRows(); Row++) {
	if (Ntuple.SegmentIndexOf(Row) != CurrentSegmentIndex) {
	    CurrentSegmentIndex = Ntuple.SegmentIndexOf(Row);
	    *File << endl;
	}
	for (unsigned Column = 0; Column < NumberOfColumns; Column++) {
	    bool IsString = Ntuple[Row][Column].IsString();
	    if ((Column == 0) && _Quote.empty() && IsString) {
		const string& Value = Ntuple[Row][Column];
		if ((Value.size() > 0) && (Value[0] == '#')) {
		    *File << '\\';
		}
	    }
	    if (Column != 0) {
		*File << _Delimiter << " ";
	    }
		
	    if (IsString && (! _Quote.empty())) {
		*File << _Quote << Ntuple[Row][Column] << _Quote;
	    }
	    else {
		*File << Ntuple[Row][Column];
	    }
	}
	*File << endl;
    }

    CloseOutputFile(File);
}

void TKamePlainTextRepository::LoadNtuple(TKameNtuple& Ntuple, const std::string& Name, int Revision) throw(TKameException)
{
    istream* File = OpenInputFile(Name, Revision);
    TKamePlainTextFieldParser FieldParser;

    unsigned ColumnOffset = Ntuple.NumberOfColumns();

    string Line;
    int RowIndex = 0, LineNumber = 0;
    bool IsParserInitialized = false;
    while (getline(*File, Line, _LineTerminator)) {
	LineNumber++;

	if (Line.empty()) {
	    if (RowIndex > 0) {
		Ntuple.BreakSegment();
	    }
	    continue;
	}
	if (Line[0] == '#') {
	    _HeaderProcessor->ProcessLine(Line);
	    continue;
	}

	if (! IsParserInitialized) {
	    FieldParser.SetFieldTypeList(
		_HeaderProcessor->ItemValueListOf("FieldTypes")
	    );
	    TKameVariant Delimiter = _HeaderProcessor->ItemValueOf("Delimiter");
	    if (! Delimiter.IsVoid()) {
		_Delimiter = (string) Delimiter;
	    }
	    if (! _Delimiter.empty()) {
		FieldParser.SetDelimiter(_Delimiter);
	    }
	    TKameVariant Quote = _HeaderProcessor->ItemValueOf("Quote");
	    if (! Quote.IsVoid()) {
		_Quote = (string) Quote;
	    }
	    if (! _Quote.empty()) {
		FieldParser.SetQuote(_Quote);
	    }
	    IsParserInitialized = true;
	}

	try {
	    if (FieldParser.ProcessLine(Ntuple[RowIndex], Line, ColumnOffset)) {
		RowIndex++;
	    }
	}
	catch (TKameException &e) {
	    ostringstream os;
	    os << "line " << LineNumber << ": " << e;
	    throw TKameException(os.str());
	}
    }

    vector<TKameVariant> FieldNameList = (
	_HeaderProcessor->ItemValueListOf("Fields")
    );
    vector<TKameVariant> FieldTypeList = (
	_HeaderProcessor->ItemValueListOf("FieldTypes")
    );
    if (FieldNameList.empty() && ! FieldTypeList.empty()) {
	for (unsigned Index = 0; Index < FieldTypeList.size(); Index++) {
	    ostringstream os;
	    os << "Column" << setfill('0') << setw(2) << Index;
	    FieldNameList.push_back(os.str());
	}
    }
    for (unsigned Index = 0; Index < FieldNameList.size(); Index++) {
	if (FieldTypeList.empty()) {
	    Ntuple.SetColumnName(
		ColumnOffset+Index, FieldNameList[Index]
	    );
	}
	else {
	    Ntuple.SetColumnNameType(
		ColumnOffset+Index, FieldNameList[Index], 
		FieldTypeList[Index]
	    );
	}
    }

    for (unsigned i = 0; i < _HeaderProcessor->NumberOfItems(); i++) {
	Ntuple.Property(_HeaderProcessor->ItemNameOf(i)) = (
	    _HeaderProcessor->ItemValueOf(i)
	);
    }

    CloseInputFile(File);
}



TKamePlainTextLineTokenizer::TKamePlainTextLineTokenizer(void)
{
    _Delimiter = '\0';
    _Quote = '\0';
}

TKamePlainTextLineTokenizer::~TKamePlainTextLineTokenizer()
{
}

void TKamePlainTextLineTokenizer::SetDelimiter(const std::string& Delimiter)
{
    if (ispunct(Delimiter[0])) {
	_Delimiter = Delimiter[0];
    }
}

void TKamePlainTextLineTokenizer::SetQuote(const std::string& Quote)
{
    if (ispunct(Quote[0])) {
	_Quote = Quote[0];
    }
}

void TKamePlainTextLineTokenizer::TokenizeLine(const std::string& Line, std::vector<std::string>& ElementList) const throw(TKameException)
{
    string Element;
    string::const_iterator Char;
    bool IsStarted = false, IsInQuote = false, IsEscaped = false;
    for (Char = Line.begin(); Char != Line.end(); Char++) {
	if ((! IsStarted) && isspace(*Char)) {
	    continue;
	}
	IsStarted = true;
	if (IsEscaped) {
	    IsEscaped = false;
	}
	else if (*Char == '\\') {
	    IsEscaped = true;
	    continue;
	}
	else if (*Char == _Quote) {
	    IsInQuote = ! IsInQuote;
	    continue;
	}
	else if (! IsInQuote) {
	    if (
		((_Delimiter != '\0') && (*Char == _Delimiter)) ||
		((_Delimiter == '\0') && isspace(*Char))
	    ){
		ElementList.push_back(Element);
		Element = "";
		IsStarted = false;
		continue;
	    }
	}
	Element += *Char;
    }
    if (IsInQuote) {
	throw TKameException("quote mismatch");
    }
    ElementList.push_back(Element);
}



TKamePlainTextFieldParser::TKamePlainTextFieldParser(void)
{
    _IsAllFloat = true;
}

TKamePlainTextFieldParser::~TKamePlainTextFieldParser()
{
}

void TKamePlainTextFieldParser::SetFieldTypeList(const std::vector<TKameVariant>& FieldTypeStringList)
{
    for (unsigned i = 0; i < FieldTypeStringList.size(); i++) {
	const string& Type = FieldTypeStringList[i];
	if ((Type == "int") || (Type == "long")) {
	    _FieldTypeList.push_back(FieldType_Int);
	    _IsAllFloat = false;
	}
	else if ((Type == "float") || (Type == "double")) {
	    _FieldTypeList.push_back(FieldType_Float);
	}
	else {
	    _FieldTypeList.push_back(FieldType_String);
	    _IsAllFloat = false;
	}
    }
}

unsigned TKamePlainTextFieldParser::ProcessLine(TKameTable::TRow Row, std::string& Line, unsigned ColumnOffset) throw(TKameException)
{
    if ((_Delimiter == '\0') && (_Quote == '\0')) {
	istringstream LineStream(Line);
	if (_IsAllFloat) {
	    string Value;
	    unsigned ColumnIndex = 0;
	    while (LineStream >> Value) {
		if (
		    isxdigit(Value[0]) || 
		    (Value[0] == '+') || (Value[0] == '-')
		){
		    try {
			Row[ColumnOffset + ColumnIndex] = (
			    (double) TKameVariant(Value)
			);
		    }
		    catch (TKameException& e) {
			Row[ColumnOffset + ColumnIndex] = Value;
		    }
		}
		else {
		    Row[ColumnOffset + ColumnIndex] = Value;
		}
		ColumnIndex++;
	    }
	    return ColumnIndex;
	}
	else {
	    return ProcessSimpleLine(Row, LineStream, ColumnOffset);
	}
    }
    else {
	vector<string> ElementList;
	TokenizeLine(Line, ElementList);
	return ProcessTokenizedLine(Row, ElementList, ColumnOffset);
    }
}    

unsigned TKamePlainTextFieldParser::ProcessSimpleLine(TKameTable::TRow Row, std::istream& LineStream, unsigned ColumnOffset) throw(TKameException)
{
    unsigned NumberOfColumns = _FieldTypeList.size();

    string Value;
    for (unsigned i = 0; i < NumberOfColumns; i++) {
	if (LineStream >> Value) {
	    if (_FieldTypeList[i] == FieldType_Float) {
		Row[ColumnOffset + i] = (double) TKameVariant(Value);
	    }
	    else if (_FieldTypeList[i] == FieldType_Int) {
		Row[ColumnOffset + i] = (long) TKameVariant(Value);
	    }
	    else {
		if (
		    (i == 0) && 
		    (Value[0] == '\\') && (Value.size() > 1)
		){
		    Row[ColumnOffset + i] = Value.substr(1);
		}
		else {
		    Row[ColumnOffset + i] = Value;
		}
	    }
	}
	else {
	    Row[ColumnOffset + i];
	}
    }

    return NumberOfColumns;
}

unsigned TKamePlainTextFieldParser::ProcessTokenizedLine(TKameTable::TRow Row, std::vector<std::string>& ElementList, unsigned ColumnOffset) throw(TKameException)
{
    for (unsigned i = 0; i < ElementList.size(); i++) {
	if (_FieldTypeList.empty()) {
	    try {
		Row[ColumnOffset + i] = (double) TKameVariant(ElementList[i]);
	    }
	    catch (TKameException &e) {
		Row[ColumnOffset + i] = ElementList[i];
	    }
	}
	else if (i >= _FieldTypeList.size()) {
	    // too many elements: ignored //
	    break;
	}
	else if (_FieldTypeList[i] == FieldType_Float) {
	    Row[ColumnOffset + i] = (double) TKameVariant(ElementList[i]);
	}
	else if (_FieldTypeList[i] == FieldType_Int) {
	    Row[ColumnOffset + i] = (long) TKameVariant(ElementList[i]);
	}
	else {
	    Row[ColumnOffset + i] = ElementList[i];
	}
    }

    return ElementList.size();
}



TKamePlainTextHeaderProcessor::TKamePlainTextHeaderProcessor(void)
{
}

TKamePlainTextHeaderProcessor::~TKamePlainTextHeaderProcessor(void)
{
}

unsigned TKamePlainTextHeaderProcessor::NumberOfItems()
{
    return _ItemList.size();
}

bool TKamePlainTextHeaderProcessor::ProcessLine(const string& Line)
{
    // syntax: ^[\s]*#+[\s]*([\w]*)[\s]*:[\s]*(.*)$, Name = \1, Value = \2

    string Name;
    string Value;
    
    enum TState { 
	State_Initial, State_Header, State_LeadingSpace,
	State_Name, 
	State_PreSeparatorSpace, State_Separator, State_ProSeparatorSpace,
	State_Value
    };

    TState State = State_Initial;

    for (unsigned i = 0; i < Line.size(); i++) {
	char Char = Line[i];

	if (State == State_Initial) {
	    if (isspace(Char)) {
		continue;
	    }
	    else if (Char == '#') {
		State = State_Header;
		continue;
	    }
	    else {
		return false;
	    }
	}
	if (State == State_Header) {
	    if (Char == '#') {
		continue;
	    }
	    else {
		State = State_LeadingSpace;
	    }
	}
	if (State == State_LeadingSpace) {
	    if (isspace(Char)) {
		continue;
	    }
	    else {
		State = State_Name;
	    }
	}
	if (State == State_Name) {
	    if (! isspace(Char) && (Char != ':')) {
		Name += Char;
		continue;	    
	    }
	    else {
		State = State_PreSeparatorSpace;
	    }
	}
	if (State == State_PreSeparatorSpace) {
	    if (isspace(Char)) {
		continue;	    
	    }
	    else {
		State = State_Separator;
	    }
	}
	if (State == State_Separator) {
	    if (Char == ':') {
		State = State_ProSeparatorSpace;
		continue;	    
	    }
	    else {
		return false;
	    }
	}
	if (State == State_ProSeparatorSpace) {
	    if (isspace(Char)) {
		continue;	    
	    }
	    else {
		State = State_Value;
	    }
	}
	if (State == State_Value) {
	    Value += Char;
	    continue;	    
	}
    }

    if (State != State_Value) {
	return false;
    }

    _ItemTable[Name] = _ItemList.size();
    _ItemList.push_back(make_pair(Name, Value));
    
    if (Name == "HeaderDelimiter") {
	SetDelimiter(Value);
    }
    else if (Name == "HeaderQuote") {
	SetQuote(Value);
    }

    return true;
}

string TKamePlainTextHeaderProcessor::ItemNameOf(unsigned ItemIndex)
{
    return _ItemList[ItemIndex].first;
}

TKameVariant TKamePlainTextHeaderProcessor::ItemValueOf(unsigned ItemIndex)
{
    return _ItemList[ItemIndex].second;
}

TKameVariant TKamePlainTextHeaderProcessor::ItemValueOf(const string& ItemName)
{
    if (_ItemTable.count(ItemName) == 0) {
	return TKameVariant();
    }
    else {
	return _ItemList[_ItemTable[ItemName]].second;
    }
}

vector<TKameVariant> TKamePlainTextHeaderProcessor::ItemValueListOf(unsigned ItemIndex)
{
    vector<string> StringList;
    TokenizeLine(_ItemList[ItemIndex].second, StringList);

    vector<TKameVariant> List(StringList.size());
    for (unsigned i = 0; i < StringList.size(); i++) {
	List[i] = StringList[i];
    }

    return List;
}

vector<TKameVariant> TKamePlainTextHeaderProcessor::ItemValueListOf(const string& ItemName)
{
    if (_ItemTable.count(ItemName) == 0) {
	return vector<TKameVariant>();
    }
    return ItemValueListOf(_ItemTable[ItemName]);
}
