summaryrefslogtreecommitdiff
path: root/project2/streamRows.cpp
blob: 825dc600b596f3192450b609e2b6813d55692d7b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include "streamRows.h"
#include "rowProcessor.h"

StreamRows::StreamRows(const xmlpp::Element * p) :
	DefinedColumns(p),
	fieldSep(p->get_attribute_value("fieldSep")[0]),
	quoteChar(p->get_attribute_value("quoteChar")[0]),
	keepBlankRows(p->get_attribute_value("keepBlankRows") == "true"),
	countBlankRows(p->get_attribute_value("keepBlankRows") == "count"),
	newline(p->get_attribute_value("newline")),
	newlin(newline, 0, newline.length() - 1),
	encoding(p->get_attribute_value("encoding")),
	skipheader(atoi(p->get_attribute_value("skipheader").c_str())),
	inQuotes(false),
	prevWasQuote(false)
{
	mkCols = columns.empty();
}

StreamRows::~StreamRows()
{
}

void
StreamRows::addColumn(Glib::ustring & tok) const
{
	for (Glib::ustring::iterator i = tok.begin(); i != tok.end(); ) {
		if (!isalnum(*i)) {
			tok.erase(i);
		}
		else {
			i++;
		}
	}
	columns.insert(Column(columns.size(), tok));
}

void
StreamRows::begin() const
{
	curCol = columns.get<byColIdx>().begin();
	tok.clear();
}

void
StreamRows::pushChar(gunichar c, const RowProcessor * rp) const
{
	if ((!inQuotes) && (c == *newline.rbegin()) && (tok.compare(tok.length() - newlin.length(), newlin.length(), newlin) == 0)) {
		if (skipheader) {
			skipheader -= 1;
		}
		else {
			tok.erase(tok.length() - newlin.length());
			if (!mkCols) {
				if (!tok.empty()) {
					*curCol++ = VariableType(tok);
				}
				if (keepBlankRows || curCol != columns.get<byColIdx>().begin()) {
					while (curCol != columns.get<byColIdx>().end()) {
						*curCol++ = curCol->defValue;
					}
					rp->rowReady();
					rowNum += 1;
				}
				else if (countBlankRows) {
					rowNum += 1;
				}
			}
			else {
				if (!tok.empty()) {
					addColumn(tok);
				}
				mkCols = false;
			}
			curCol = columns.get<byColIdx>().begin();
		}
		tok.clear();
	}
	else if (c == quoteChar) {
		if (prevWasQuote) {
			tok += c;
			prevWasQuote = false;
			inQuotes = !inQuotes;
		}
		else {
			prevWasQuote = inQuotes;
			inQuotes = !inQuotes;
		}
	}
	else if ((!inQuotes) && (c == fieldSep)) {
		prevWasQuote = false;
		if (skipheader == 0) {
			if (mkCols) {
				addColumn(tok);
			}
			else {
				*curCol++ = VariableType(tok);
			}
		}
		tok.clear();
	}
	else {
		prevWasQuote = false;
		tok += c;
	}
}

void
StreamRows::end(const RowProcessor * rp) const
{
	if (!tok.empty()) {
		if (skipheader == 0) {
			if (mkCols) {
				addColumn(tok);
			}
			else {
				*curCol++ = VariableType(tok);
			}
		}
	}
	if (keepBlankRows || curCol != columns.get<byColIdx>().begin()) {
		while (curCol != columns.get<byColIdx>().end()) {
			*curCol++ = curCol->defValue;
		}
		rp->rowReady();
		rowNum += 1;
	}
	else if (countBlankRows) {
		rowNum += 1;
	}
	curCol = columns.get<byColIdx>().begin();
	tok.clear();
}