QJson home page
json_scanner.cpp
1 /* This file is part of QJson
2  *
3  * Copyright (C) 2008 Flavio Castelli <flavio.castelli@gmail.com>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License version 2.1, as published by the Free Software Foundation.
8  *
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this library; see the file COPYING.LIB. If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  */
20 
21 #include "qjson_debug.h"
22 #include "json_scanner.h"
23 #include "json_parser.hh"
24 
25 #include <ctype.h>
26 
27 #include <QtCore/QDebug>
28 #include <QtCore/QRegExp>
29 
30 #include <cassert>
31 
32 bool ishexnstring(const QString& string) {
33  for (int i = 0; i < string.length(); i++) {
34  if (isxdigit(string[i] == 0))
35  return false;
36  }
37  return true;
38 }
39 
40 JSonScanner::JSonScanner(QIODevice* io)
41  : m_allowSpecialNumbers(false),
42  m_io (io)
43 {
44  m_quotmarkClosed = true;
45  m_quotmarkCount = 0;
46 }
47 
48 void JSonScanner::allowSpecialNumbers(bool allow) {
49  m_allowSpecialNumbers = allow;
50 }
51 
52 static QString unescape( const QByteArray& ba, bool* ok ) {
53  assert( ok );
54  *ok = false;
55  QString res;
56  QByteArray seg;
57  bool bs = false;
58  for ( int i = 0, size = ba.size(); i < size; ++i ) {
59  const char ch = ba[i];
60  if ( !bs ) {
61  if ( ch == '\\' )
62  bs = true;
63  else
64  seg += ch;
65  } else {
66  bs = false;
67  switch ( ch ) {
68  case 'b':
69  seg += '\b';
70  break;
71  case 'f':
72  seg += '\f';
73  break;
74  case 'n':
75  seg += '\n';
76  break;
77  case 'r':
78  seg += '\r';
79  break;
80  case 't':
81  seg += '\t';
82  break;
83  case 'u':
84  {
85  res += QString::fromUtf8( seg );
86  seg.clear();
87 
88  if ( i > size - 5 ) {
89  //error
90  return QString();
91  }
92 
93  const QString hex_digit1 = QString::fromUtf8( ba.mid( i + 1, 2 ) );
94  const QString hex_digit2 = QString::fromUtf8( ba.mid( i + 3, 2 ) );
95  i += 4;
96 
97  if ( !ishexnstring( hex_digit1 ) || !ishexnstring( hex_digit2 ) ) {
98  qCritical() << "Not an hex string:" << hex_digit1 << hex_digit2;
99  return QString();
100  }
101  bool hexOk;
102  const ushort hex_code1 = hex_digit1.toShort( &hexOk, 16 );
103  if (!hexOk) {
104  qCritical() << "error converting hex value to short:" << hex_digit1;
105  return QString();
106  }
107  const ushort hex_code2 = hex_digit2.toShort( &hexOk, 16 );
108  if (!hexOk) {
109  qCritical() << "error converting hex value to short:" << hex_digit2;
110  return QString();
111  }
112 
113  res += QChar(hex_code2, hex_code1);
114  break;
115  }
116  case '\\':
117  seg += '\\';
118  break;
119  default:
120  seg += ch;
121  break;
122  }
123  }
124  }
125  res += QString::fromUtf8( seg );
126  *ok = true;
127  return res;
128 }
129 
130 int JSonScanner::yylex(YYSTYPE* yylval, yy::location *yylloc)
131 {
132  char ch;
133 
134  if (!m_io->isOpen()) {
135  qCritical() << "JSonScanner::yylex - io device is not open";
136  return -1;
137  }
138 
139  yylloc->step();
140 
141  do {
142  bool ret;
143  if (m_io->atEnd()) {
144  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::END";
145  return yy::json_parser::token::END;
146  }
147  else
148  ret = m_io->getChar(&ch);
149 
150  if (!ret) {
151  qCritical() << "JSonScanner::yylex - error reading from io device";
152  return -1;
153  }
154 
155  qjsonDebug() << "JSonScanner::yylex - got |" << ch << "|";
156  yylloc->columns();
157 
158  if (ch == '\n' || ch == '\r')
159  yylloc->lines();
160  } while (m_quotmarkClosed && (isspace(ch) != 0));
161 
162  if (m_quotmarkClosed && ((ch == 't') || (ch == 'T'))) {
163  const QByteArray buf = m_io->peek(3).toLower();
164  if (buf == "rue") {
165  m_io->read (3);
166  yylloc->columns(3);
167  qjsonDebug() << "JSonScanner::yylex - TRUE_VAL";
168  return yy::json_parser::token::TRUE_VAL;
169  }
170  }
171  else if (m_quotmarkClosed && ((ch == 'n') || (ch == 'N'))) {
172  const QByteArray buf = m_io->peek(3).toLower();
173  if (buf == "ull") {
174  m_io->read (3);
175  yylloc->columns(3);
176  qjsonDebug() << "JSonScanner::yylex - NULL_VAL";
177  return yy::json_parser::token::NULL_VAL;
178  } else if (buf.startsWith("an") && m_allowSpecialNumbers) {
179  m_io->read(2);
180  yylloc->columns(2);
181  qjsonDebug() << "JSonScanner::yylex - NAN_VAL";
182  return yy::json_parser::token::NAN_VAL;
183 
184  }
185  }
186  else if (m_quotmarkClosed && ((ch == 'f') || (ch == 'F'))) {
187  // check false value
188  const QByteArray buf = m_io->peek(4).toLower();
189  if (buf.length() == 4) {
190  if (buf == "alse") {
191  m_io->read (4);
192  yylloc->columns(4);
193  qjsonDebug() << "JSonScanner::yylex - FALSE_VAL";
194  return yy::json_parser::token::FALSE_VAL;
195  }
196  }
197  }
198  else if (m_quotmarkClosed && ((ch == 'e') || (ch == 'E'))) {
199  QByteArray ret(1, ch);
200  const QByteArray buf = m_io->peek(1);
201  if (!buf.isEmpty()) {
202  if ((buf[0] == '+' ) || (buf[0] == '-' )) {
203  ret += m_io->read (1);
204  yylloc->columns();
205  }
206  }
207  *yylval = QVariant(QString::fromUtf8(ret));
208  return yy::json_parser::token::E;
209  }
210  else if (m_allowSpecialNumbers && m_quotmarkClosed && ((ch == 'I') || (ch == 'i'))) {
211  QByteArray ret(1, ch);
212  const QByteArray buf = m_io->peek(7);
213  if (buf == "nfinity") {
214  m_io->read(7);
215  yylloc->columns(7);
216  qjsonDebug() << "JSonScanner::yylex - INFINITY_VAL";
217  return yy::json_parser::token::INFINITY_VAL;
218  }
219  }
220 
221  if (ch != '"' && !m_quotmarkClosed) {
222  // we're inside a " " block
223  QByteArray raw;
224  raw += ch;
225  char prevCh = ch;
226  bool escape_on = (ch == '\\') ? true : false;
227 
228  while ( true ) {
229  char nextCh;
230  qint64 ret = m_io->peek(&nextCh, 1);
231  if (ret != 1) {
232  if (m_io->atEnd())
233  return yy::json_parser::token::END;
234  else
235  return -1;
236  } else if ( !escape_on && nextCh == '\"' ) {
237  bool ok;
238  const QString str = unescape( raw, &ok );
239  *yylval = ok ? str : QString();
240  return ok ? yy::json_parser::token::STRING : -1;
241  }
242 #if 0
243  if ( prevCh == '\\' && nextCh != '"' && nextCh != '\\' && nextCh != '/' &&
244  nextCh != 'b' && nextCh != 'f' && nextCh != 'n' &&
245  nextCh != 'r' && nextCh != 't' && nextCh != 'u') {
246  qjsonDebug() << "Just read" << nextCh;
247  qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
248  return -1;
249  }
250 #endif
251  m_io->read(1); // consume
252  raw += nextCh;
253  prevCh = nextCh;
254  if (escape_on)
255  escape_on = false;
256  else
257  escape_on = (prevCh == '\\') ? true : false;
258 #if 0
259  if (nextCh == '\\') {
260  char buf;
261  if (m_io->getChar (&buf)) {
262  yylloc->columns();
263  if (((buf != '"') && (buf != '\\') && (buf != '/') &&
264  (buf != 'b') && (buf != 'f') && (buf != 'n') &&
265  (buf != 'r') && (buf != 't') && (buf != 'u'))) {
266  qjsonDebug() << "Just read" << buf;
267  qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
268  return -1;
269  }
270  } else {
271  qCritical() << "JSonScanner::yylex - error decoding escaped sequence : io error";
272  return -1;
273  }
274  }
275 #endif
276  }
277  }
278  else if (isdigit(ch) != 0 && m_quotmarkClosed) {
279  bool ok;
280  QByteArray numArray = QByteArray::fromRawData( &ch, 1 * sizeof(char) );
281  qulonglong number = numArray.toULongLong(&ok);
282  if (!ok) {
283  //This shouldn't happen
284  qCritical() << "JSonScanner::yylex - error while converting char to ulonglong, returning -1";
285  return -1;
286  }
287  if (number == 0) {
288  // we have to return immediately otherwise numbers like
289  // 2.04 will be converted to 2.4
290  *yylval = QVariant(number);
291  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT";
292  return yy::json_parser::token::DIGIT;
293  }
294 
295  char nextCh;
296  qint64 ret = m_io->peek(&nextCh, 1);
297  while (ret == 1 && isdigit(nextCh)) {
298  m_io->read(1); //consume
299  yylloc->columns(1);
300  numArray = QByteArray::fromRawData( &nextCh, 1 * sizeof(char) );
301  number = number * 10 + numArray.toULongLong(&ok);
302  if (!ok) {
303  //This shouldn't happen
304  qCritical() << "JSonScanner::yylex - error while converting char to ulonglong, returning -1";
305  return -1;
306  }
307  ret = m_io->peek(&nextCh, 1);
308  }
309 
310  *yylval = QVariant(number);
311  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT";
312  return yy::json_parser::token::DIGIT;
313  }
314  else if (isalnum(ch) != 0) {
315  *yylval = QVariant(QString(QChar::fromLatin1(ch)));
316  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::WORD ("
317  << ch << ")";
318  return yy::json_parser::token::STRING;
319  }
320  else if (ch == ':') {
321  // set yylval
322  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COLON";
323  return yy::json_parser::token::COLON;
324  }
325  else if (ch == '"') {
326  // yy::json_parser::token::QUOTMARK (")
327 
328  // set yylval
329  m_quotmarkCount++;
330  if (m_quotmarkCount %2 == 0) {
331  m_quotmarkClosed = true;
332  m_quotmarkCount = 0;
333  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKCLOSE";
334  return yy::json_parser::token::QUOTMARKCLOSE;
335  }
336  else {
337  m_quotmarkClosed = false;
338  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKOPEN";
339  return yy::json_parser::token::QUOTMARKOPEN;
340  }
341  }
342  else if (ch == ',') {
343  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COMMA";
344  return yy::json_parser::token::COMMA;
345  }
346  else if (ch == '.') {
347  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DOT";
348  return yy::json_parser::token::DOT;
349  }
350  else if (ch == '-') {
351  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::MINUS";
352  return yy::json_parser::token::MINUS;
353  }
354  else if (ch == '[') {
355  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_OPEN";
356  return yy::json_parser::token::SQUARE_BRACKET_OPEN;
357  }
358  else if (ch == ']') {
359  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_CLOSE";
360  return yy::json_parser::token::SQUARE_BRACKET_CLOSE;
361  }
362  else if (ch == '{') {
363  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_OPEN";
364  return yy::json_parser::token::CURLY_BRACKET_OPEN;
365  }
366  else if (ch == '}') {
367  qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_CLOSE";
368  return yy::json_parser::token::CURLY_BRACKET_CLOSE;
369  }
370 
371  //unknown char!
372  //TODO yyerror?
373  qCritical() << "JSonScanner::yylex - unknown char, returning -1";
374  return -1;
375 }
376 
377 

SourceForge Logo hosts this site. Send comments to:
QJson Developers