bson_builder.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. /*
  2. * Copyright (C) 2021 Duowan Inc. All rights reserved.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing,
  11. * software distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __X_PACK_BSON_BUILDER_H
  17. #define __X_PACK_BSON_BUILDER_H
  18. #include <stdexcept>
  19. #include "xpack/xpack.h"
  20. #include "xpack/traits.h"
  21. #include "bson_encoder.h"
  22. namespace xpack {
  23. class BsonBuilder {
  24. enum {
  25. Unknow = 0,
  26. ObjectBegin = 1<<0,
  27. ObjectEnd = 1<<1,
  28. ArrayBegin = 1<<2,
  29. ArrayEnd = 1<<3,
  30. Comma = 1<<4,
  31. Colon = 1<<5,
  32. Integer = 1<<6,
  33. Float = 1<<7,
  34. String = 1<<8,
  35. Bool = 1<<9,
  36. Variable = 1<<10,
  37. BEof = 1<<11
  38. };
  39. std::string tokenName(int type) const {
  40. std::string ret;
  41. if (0 != (ObjectBegin&type)) {
  42. ret += "/{";
  43. }
  44. if (0 != (ObjectEnd&type)) {
  45. ret += "/}";
  46. }
  47. if (0 != (ArrayBegin&type)) {
  48. ret += "/[";
  49. }
  50. if (0 != (ArrayEnd&type)) {
  51. ret += "/]";
  52. }
  53. if (0 != (Comma&type)) {
  54. ret += "/,";
  55. }
  56. if (0 != (Colon&type)) {
  57. ret += "/:";
  58. }
  59. if (0 != ((Float|Integer|Bool)&type)) {
  60. ret += "/number/bool";
  61. }
  62. if (0 != (String&type)) {
  63. ret += "/string";
  64. }
  65. if (0 != (Variable&type)) {
  66. ret += "/variable";
  67. }
  68. if (!ret.empty()) {
  69. ret = ret.substr(1);
  70. }
  71. return ret;
  72. }
  73. struct Token {
  74. int type; // see enum
  75. union {
  76. const char *sval;
  77. long lval;
  78. double dval;
  79. bool bval;
  80. };
  81. Token():type(Unknow),sval(NULL){}
  82. };
  83. struct Item {
  84. Token tk;
  85. const char *key;
  86. bool kvar; // key is variable
  87. Item():tk(),key(NULL), kvar(false) {
  88. }
  89. };
  90. struct Encoder {
  91. const std::vector<const Item*>* items;
  92. BsonEncoder en;
  93. size_t idx; // items index;
  94. size_t vidx; // variable index;
  95. std::vector<std::string> vkeys; // store variable key
  96. Item tmpIt; // key:value when key is a variable
  97. Item *pIt;
  98. std::vector<size_t> arrayIndex;
  99. std::vector<const char*> objKeys; // store object key. use in object end
  100. std::vector<const char*> arrKeys; // store array key, use in array end
  101. Encoder(const std::vector<const Item*>&its):items(&its), idx(0), vidx(0), pIt(NULL){}
  102. // array key. index: "0", "1", "2", "3"
  103. const char *Key(const char *raw) {
  104. if (NULL != raw) {
  105. return raw;
  106. } else if (arrayIndex.size()>0) {
  107. size_t idx = arrayIndex.size() - 1;
  108. return en.IndexKey(arrayIndex[idx]++);
  109. } else {
  110. return NULL;
  111. }
  112. }
  113. };
  114. public:
  115. BsonBuilder(const std::string& _fmt):raw(_fmt),en(NULL) {
  116. dup = new char[raw.length()+1];
  117. strcpy(dup, raw.c_str());
  118. if (parse()) {
  119. for (size_t i=0; i<items.size(); ++i) {
  120. if (items[i]->kvar || items[i]->tk.type==Variable) {
  121. return;
  122. }
  123. //const Item *it = items[i];
  124. //std::cout<<"type:"<<it->tk.type<<"("<<it->kvar<<")"<<std::endl;//' '<<"key:"<<items[i]->key<<std::endl;
  125. }
  126. // no variable. pre build it
  127. bstr = Encode();
  128. }
  129. }
  130. ~BsonBuilder() {
  131. if (NULL != dup) {
  132. for (size_t i=0; i<items.size(); ++i) {
  133. delete items[i];
  134. }
  135. delete[] dup;
  136. dup = NULL;
  137. } else if (NULL != en) {
  138. delete en;
  139. en = NULL;
  140. }
  141. }
  142. std::string Error() const {
  143. return fmtErr;
  144. }
  145. #ifdef X_PACK_SUPPORT_CXX0X // support c++11 or later
  146. template <typename... Args>
  147. std::string Encode(Args... args) {
  148. if (!bstr.empty()) {
  149. return bstr;
  150. } else if (!fmtErr.empty()) {
  151. return "";
  152. }
  153. BsonBuilder bd(*this);
  154. bd.iencode(args...);
  155. bd.end(NULL);
  156. return bd.en->en.String();
  157. }
  158. template <typename... Args>
  159. std::string EncodeAsJson(Args... args) {
  160. if (!fmtErr.empty()) {
  161. return "";
  162. }
  163. BsonBuilder bd(*this);
  164. bd.iencode(args...);
  165. bd.end(NULL);
  166. return bd.en->en.Json();
  167. }
  168. template <typename... Args>
  169. static std::string En(const std::string&fmt, Args... args) {
  170. BsonBuilder bb(fmt);
  171. return bb.Encode(args...);
  172. }
  173. #endif
  174. private: // encoder
  175. BsonBuilder(const BsonBuilder&bd):dup(NULL) {
  176. en = new Encoder(bd.items);
  177. }
  178. void iencode() {
  179. }
  180. template <typename T>
  181. void iencode(const T &val) {
  182. encode(val);
  183. ++en->vidx;
  184. }
  185. template <typename T, typename... Args>
  186. void iencode(const T& first, Args... args) {
  187. iencode(first);
  188. iencode(args...);
  189. }
  190. template <typename T>
  191. BsonBuilder& encode(const T &val) {
  192. // encode data items without variable
  193. for (; en->idx<en->items->size(); ++en->idx) {
  194. const Item *it = en->items->at(en->idx);
  195. if ((!it->kvar) && it->tk.type!=Variable) {
  196. add(it, 1);
  197. } else {
  198. break;
  199. }
  200. }
  201. if (en->idx >= en->items->size()) { // finish
  202. return *this;
  203. } else if (NULL != en->pIt) { // key:val. key and val is variable, key replaced in last encode
  204. add(en->pIt, val);
  205. en->pIt = NULL;
  206. ++en->idx;
  207. //std::cout<<"?:? of val"<<std::endl;
  208. return *this;
  209. }
  210. const Item *it = en->items->at(en->idx);
  211. if (it->kvar) { // key:val and key is variable, we fill string to key
  212. en->tmpIt = *it; // copy item info
  213. en->pIt = &en->tmpIt;
  214. en->pIt->key = dumpStr(val); // dump key string
  215. en->pIt->kvar = false;
  216. if (it->tk.type != Variable) { // value not variable
  217. add(en->pIt, 1);
  218. en->pIt = NULL;
  219. ++en->idx;
  220. }
  221. //std::cout<<"?:? of key:"<<en->pIt->key<<std::endl;
  222. } else { // val is variable
  223. add(it, val);
  224. //std::cout<<"key:? of val:"<<val<<std::endl;
  225. ++en->idx;
  226. }
  227. return *this;
  228. }
  229. // add items to encoder
  230. template <typename T>
  231. void add(const Item *it, const T&val) {
  232. const char *key = en->Key(it->key);
  233. switch (it->tk.type) {
  234. case ObjectBegin:
  235. en->en.ob(key);
  236. en->objKeys.push_back(key);
  237. break;
  238. case ObjectEnd:
  239. en->en.oe(en->objKeys.back());
  240. en->objKeys.pop_back();
  241. break;
  242. case ArrayBegin:
  243. en->arrayIndex.push_back(0);
  244. en->en.ab(key);
  245. en->arrKeys.push_back(key);
  246. break;
  247. case ArrayEnd:
  248. en->arrayIndex.pop_back();
  249. en->en.ae(en->arrKeys.back());
  250. en->arrKeys.pop_back();
  251. break;
  252. case Integer:
  253. en->en.add(key, it->tk.lval);
  254. break;
  255. case Float:
  256. en->en.add(key, it->tk.dval);
  257. break;
  258. case String:
  259. en->en.add(key, std::string(it->tk.sval));
  260. break;
  261. case Bool:
  262. en->en.add(key, it->tk.bval);
  263. break;
  264. case Variable:
  265. en->en.add(key, val);
  266. break;
  267. }
  268. }
  269. // encode finish
  270. void end(std::string *err) {
  271. for (; en->idx<en->items->size(); ++en->idx) {
  272. const Item *it = en->items->at(en->idx);
  273. if (it->kvar || it->tk.type==Variable) {
  274. if (NULL != err) {
  275. *err = "less variable";
  276. }
  277. return;
  278. }
  279. add(it, 1);
  280. }
  281. }
  282. // type to const char*
  283. const char *dumpStr(const std::string&str) {
  284. en->vkeys.push_back(str);
  285. return en->vkeys[en->vkeys.size()-1].c_str();
  286. }
  287. const char *dumpStr(const char *key) {
  288. return key;
  289. }
  290. template <typename T>
  291. const char *dumpStr(const T&val) {
  292. std::string err = "variable "+Util::itoa(en->vidx)+" is a key, must be std::string or const char*";
  293. throw std::runtime_error(err);
  294. return "";
  295. }
  296. private:
  297. bool isEnd(char ch) {
  298. return ch==' ' || ch==':' || ch==',' || ch=='}' || ch==']'|| ch=='\0';
  299. }
  300. // a simple bson string parser
  301. bool parse() {
  302. std::vector<char> stack;
  303. Item *item;
  304. char *p = dup;
  305. char *end = p;
  306. int keyIndex = -1;
  307. char ch;
  308. bool waitkey = false; // {key:value, expect key
  309. int exp = ObjectBegin; // expect type
  310. while (true) {
  311. item = new Item;
  312. p = end;
  313. nextToken(p, &end, item->tk);
  314. if (item->tk.type == BEof) {
  315. delete item;
  316. if (!fmtErr.empty()) {
  317. return false;
  318. }
  319. if (stack.size() > 0) {
  320. if ((ch=stack[stack.size()-1]) == '{') { // in object
  321. fmtErr = "miss } at the end of string";
  322. } else {
  323. fmtErr = "miss ] at the end of string";
  324. }
  325. return false;
  326. }
  327. return true;
  328. } else if (item->tk.type == Unknow) {
  329. fmtErr = "unknow token:";
  330. fmtErr += p;
  331. delete item;
  332. break;
  333. }
  334. //std::cout<<"get token:"<<item->tk.type<<std::endl;
  335. if ((item->tk.type&exp) == 0) {
  336. fmtErr = "unexpected token. expect["+tokenName(exp)+"]. but get["+tokenName(item->tk.type)+"]";
  337. if (NULL != item->tk.sval) {
  338. fmtErr += ". in [";
  339. fmtErr += item->tk.sval;
  340. fmtErr += "]";
  341. }
  342. delete item;
  343. break;
  344. }
  345. // key:value parse value done
  346. if (keyIndex >= 0) {
  347. const Item *it = items[keyIndex];
  348. items.pop_back();
  349. keyIndex = -1;
  350. if (it->tk.type == String) {
  351. item->key = it->tk.sval;
  352. } else {
  353. item->kvar = true;
  354. }
  355. delete it;
  356. }
  357. switch (item->tk.type) {
  358. case ObjectBegin:
  359. items.push_back(item);
  360. exp = ObjectEnd | Variable | String;
  361. stack.push_back('{');
  362. waitkey = true;
  363. break;
  364. case ObjectEnd:
  365. waitkey = false;
  366. case ArrayEnd:
  367. items.push_back(item);
  368. stack.pop_back();
  369. if (stack.size() == 0) {
  370. exp = BEof;
  371. } else if ((ch=stack[stack.size()-1]) == '{') { // in object
  372. exp = ObjectEnd | Comma | String | Variable;
  373. } else {
  374. exp = ArrayEnd | Comma | Integer | Float | String | Bool | Variable;
  375. }
  376. break;
  377. case ArrayBegin:
  378. items.push_back(item);
  379. exp = ObjectBegin | ArrayBegin | ArrayEnd | Integer | Float | String | Bool | Variable;
  380. stack.push_back('[');
  381. break;
  382. case Comma:
  383. if ((ch=stack[stack.size()-1]) == '{') { // in object
  384. exp = String | Variable;
  385. waitkey = true;
  386. } else {
  387. exp = ObjectBegin | ArrayBegin | Integer | Float | String | Bool | Variable;
  388. }
  389. delete item;
  390. break;
  391. case Colon:
  392. exp = ObjectBegin | ArrayBegin | Integer | Float | String | Bool | Variable;
  393. keyIndex = int(items.size()) - 1;
  394. waitkey = false;
  395. delete item;
  396. break;
  397. case Integer:
  398. case Float:
  399. case String:
  400. case Bool:
  401. case Variable:
  402. items.push_back(item);
  403. if ((ch=stack[stack.size()-1]) == '{') { // in object
  404. if (waitkey) {
  405. waitkey = false;
  406. exp = Colon;
  407. } else {
  408. exp = ObjectEnd | Comma;
  409. }
  410. } else {
  411. exp = ArrayEnd | Comma | Integer | Float | String | Bool | Variable;
  412. }
  413. break;
  414. }
  415. }
  416. return false;
  417. }
  418. // get next token from data, may modify data
  419. void nextToken(char* data, char**end, Token &tk) {
  420. char ch;
  421. size_t i = 0;
  422. *end = NULL;
  423. while (data[i] == ' ') {
  424. i++;
  425. }
  426. if (data[i] == '\0') {
  427. tk.type = BEof;
  428. return;
  429. }
  430. tk.sval = data+i;
  431. while ((ch=data[i++]) != '\0') {
  432. if (tk.type == String) {
  433. if (ch != '\'') {
  434. continue;
  435. } else if (isEnd(data[i])) {
  436. data[i-1] = '\0';
  437. *end = data+i;
  438. } else {
  439. tk.type = Unknow;
  440. }
  441. return;
  442. } else if (tk.type == Integer || tk.type == Float) {
  443. if (ch>='0' && ch<='9') {
  444. continue;
  445. } else if (ch == '.') {
  446. tk.type = Float;
  447. continue;
  448. } else if (isEnd(ch)) {
  449. if (tk.type == Integer) {
  450. data[i-1] = '\0';
  451. if (Util::atoi(tk.sval, tk.lval)) {
  452. *end = data+i-1;
  453. } else {
  454. tk.type = Unknow;
  455. }
  456. data[i-1] = ch;
  457. } else {
  458. char *dend;
  459. tk.dval = strtod(tk.sval, &dend);
  460. if (dend != data+i-1) {
  461. tk.type = Unknow;
  462. } else {
  463. *end = dend;
  464. }
  465. }
  466. } else {
  467. tk.type = Unknow;
  468. }
  469. return;
  470. } else if (tk.type == Variable) {
  471. if ((ch>='0' && ch<='9') || (ch>='a' && ch<='z') || (ch>='A' && ch<='Z') || ch=='_') {
  472. continue;
  473. } else if (isEnd(ch)) {
  474. *end = data+i-1;
  475. } else {
  476. tk.type = Unknow;
  477. }
  478. return;
  479. } else if (ch == '{') {
  480. tk.type = ObjectBegin;
  481. break;
  482. } else if (ch == '}') {
  483. tk.type = ObjectEnd;
  484. break;
  485. } else if (ch == '[') {
  486. tk.type = ArrayBegin;
  487. break;
  488. } else if (ch == ']') {
  489. tk.type = ArrayEnd;
  490. break;
  491. } else if (ch == ',') {
  492. tk.type = Comma;
  493. break;
  494. } else if (ch == ':') {
  495. tk.type = Colon;
  496. break;
  497. } else if (ch == '\'') { // string
  498. tk.type = String;
  499. tk.sval = data+i;
  500. } else if (ch == '?') {
  501. tk.type = Variable;
  502. } else if (ch=='-' || ch=='+' || (ch>='0' && ch<='9')) {
  503. tk.type = Integer;
  504. tk.sval = data+i-1;
  505. } else if (ch == 't') { //
  506. if (strncmp(data+i-1, "true", 4) == 0 && isEnd(data[i+3])) {
  507. tk.type = Bool;
  508. tk.bval = true;
  509. *end = data+i+3;
  510. }
  511. return;
  512. } else if (ch == 'f') { //
  513. if (strncmp(data+i-1, "false", 5) == 0 && isEnd(data[i+4])) {
  514. tk.type = Bool;
  515. tk.bval = false;
  516. *end = data+i+4;
  517. }
  518. return;
  519. } else {
  520. return;
  521. }
  522. }
  523. if (tk.type != Unknow) {
  524. if (*end==NULL && (tk.type==String||tk.type==Float||tk.type==Integer||tk.type==Variable)) {
  525. fmtErr = "no end for string/integer/float/variable";
  526. tk.type = BEof;
  527. } else {
  528. *end = data+i;
  529. }
  530. }
  531. }
  532. std::string raw;
  533. std::string fmtErr;
  534. std::string lastErr;
  535. std::string bstr; // for that without any variable
  536. char* dup; // for parse
  537. std::vector<const Item*> items;
  538. Encoder *en;
  539. };
  540. }
  541. #endif