CLI11
C++11 Command Line Interface Parser
Loading...
Searching...
No Matches
StringTools_inl.hpp
1// Copyright (c) 2017-2026, University of Cincinnati, developed by Henry Schreiner
2// under NSF AWARD 1414736 and by the respective contributors.
3// All rights reserved.
4//
5// SPDX-License-Identifier: BSD-3-Clause
6
7#pragma once
8
9// IWYU pragma: private, include "CLI/CLI.hpp"
10
11// This include is only needed for IDEs to discover symbols
12#include "../StringTools.hpp"
13
14// [CLI11:public_includes:set]
15#include <cstdint>
16#include <string>
17#include <utility>
18#include <vector>
19// [CLI11:public_includes:end]
20
21namespace CLI {
22// [CLI11:string_tools_inl_hpp:verbatim]
23
24namespace detail {
25CLI11_INLINE std::vector<std::string> split(const std::string &s, char delim) {
26 std::vector<std::string> elems;
27 // Check to see if empty string, give consistent result
28 if(s.empty()) {
29 elems.emplace_back();
30 } else {
31 std::stringstream ss;
32 ss.str(s);
33 std::string item;
34 while(std::getline(ss, item, delim)) {
35 elems.push_back(item);
36 }
37 }
38 return elems;
39}
40
41CLI11_INLINE std::string &ltrim(std::string &str) {
42 auto it = std::find_if(str.begin(), str.end(), [](char ch) { return !std::isspace<char>(ch, std::locale()); });
43 str.erase(str.begin(), it);
44 return str;
45}
46
47CLI11_INLINE std::string &ltrim(std::string &str, const std::string &filter) {
48 auto it = std::find_if(str.begin(), str.end(), [&filter](char ch) { return filter.find(ch) == std::string::npos; });
49 str.erase(str.begin(), it);
50 return str;
51}
52
53CLI11_INLINE std::string &rtrim(std::string &str) {
54 auto it = std::find_if(str.rbegin(), str.rend(), [](char ch) { return !std::isspace<char>(ch, std::locale()); });
55 str.erase(it.base(), str.end());
56 return str;
57}
58
59CLI11_INLINE std::string &rtrim(std::string &str, const std::string &filter) {
60 auto it =
61 std::find_if(str.rbegin(), str.rend(), [&filter](char ch) { return filter.find(ch) == std::string::npos; });
62 str.erase(it.base(), str.end());
63 return str;
64}
65
66CLI11_INLINE std::string &remove_quotes(std::string &str) {
67 if(str.length() > 1 && (str.front() == '"' || str.front() == '\'' || str.front() == '`')) {
68 if(str.front() == str.back()) {
69 str.pop_back();
70 str.erase(str.begin(), str.begin() + 1);
71 }
72 }
73 return str;
74}
75
76CLI11_INLINE std::string &remove_outer(std::string &str, char key) {
77 if(str.length() > 1 && (str.front() == key)) {
78 if(str.front() == str.back()) {
79 str.pop_back();
80 str.erase(str.begin(), str.begin() + 1);
81 }
82 }
83 return str;
84}
85
86CLI11_INLINE std::string fix_newlines(const std::string &leader, std::string input) {
87 std::string::size_type n = 0;
88 while(n != std::string::npos && n < input.size()) {
89 n = input.find_first_of("\r\n", n);
90 if(n != std::string::npos) {
91 input = input.substr(0, n + 1) + leader + input.substr(n + 1);
92 n += leader.size();
93 }
94 }
95 return input;
96}
97
98CLI11_INLINE std::ostream &format_aliases(std::ostream &out, const std::vector<std::string> &aliases, std::size_t wid) {
99 if(!aliases.empty()) {
100 out << std::setw(static_cast<int>(wid)) << " aliases: ";
101 bool front = true;
102 for(const auto &alias : aliases) {
103 if(!front) {
104 out << ", ";
105 } else {
106 front = false;
107 }
108 out << detail::fix_newlines(" ", alias);
109 }
110 out << "\n";
111 }
112 return out;
113}
114
115CLI11_INLINE bool valid_name_string(const std::string &str) {
116 if(str.empty() || !valid_first_char(str[0])) {
117 return false;
118 }
119 auto e = str.end();
120 for(auto c = str.begin() + 1; c != e; ++c)
121 if(!valid_later_char(*c))
122 return false;
123 return true;
124}
125
126CLI11_INLINE std::string get_group_separators() {
127 std::string separators{"_'"};
128#if CLI11_HAS_RTTI != 0
129 char group_separator = std::use_facet<std::numpunct<char>>(std::locale()).thousands_sep();
130 separators.push_back(group_separator);
131#endif
132 return separators;
133}
134
135CLI11_INLINE std::string find_and_replace(std::string str, std::string from, std::string to) {
136
137 std::size_t start_pos = 0;
138
139 while((start_pos = str.find(from, start_pos)) != std::string::npos) {
140 str.replace(start_pos, from.length(), to);
141 start_pos += to.length();
142 }
143
144 return str;
145}
146
147CLI11_INLINE void remove_default_flag_values(std::string &flags) {
148 auto loc = flags.find_first_of('{', 2);
149 while(loc != std::string::npos) {
150 auto finish = flags.find_first_of("},", loc + 1);
151 if((finish != std::string::npos) && (flags[finish] == '}')) {
152 flags.erase(flags.begin() + static_cast<std::ptrdiff_t>(loc),
153 flags.begin() + static_cast<std::ptrdiff_t>(finish) + 1);
154 }
155 loc = flags.find_first_of('{', loc + 1);
156 }
157 flags.erase(std::remove(flags.begin(), flags.end(), '!'), flags.end());
158}
159
160CLI11_INLINE std::ptrdiff_t
161find_member(std::string name, const std::vector<std::string> names, bool ignore_case, bool ignore_underscore) {
162 auto it = std::end(names);
163 if(ignore_case) {
164 if(ignore_underscore) {
165 name = detail::to_lower(detail::remove_underscore(name));
166 it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
167 return detail::to_lower(detail::remove_underscore(local_name)) == name;
168 });
169 } else {
170 name = detail::to_lower(name);
171 it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
172 return detail::to_lower(local_name) == name;
173 });
174 }
175
176 } else if(ignore_underscore) {
177 name = detail::remove_underscore(name);
178 it = std::find_if(std::begin(names), std::end(names), [&name](std::string local_name) {
179 return detail::remove_underscore(local_name) == name;
180 });
181 } else {
182 it = std::find(std::begin(names), std::end(names), name);
183 }
184
185 return (it != std::end(names)) ? (it - std::begin(names)) : (-1);
186}
187
188CLI11_MODULE_INLINE const std::string &escapedChars() {
189 static const std::string s{"\b\t\n\f\r\"\\"};
190 return s;
191}
192CLI11_MODULE_INLINE const std::string &escapedCharsCode() {
193 static const std::string s{"btnfr\"\\"};
194 return s;
195}
196CLI11_MODULE_INLINE const std::string &bracketChars() {
197 static const std::string s{"\"'`[(<{"};
198 return s;
199}
200CLI11_MODULE_INLINE const std::string &matchBracketChars() {
201 static const std::string s{"\"'`])>}"};
202 return s;
203}
204
205// CLI11_MODULE_INLINE constexpr char escapedChars[]="\b\t\n\f\r\"\\";
206// CLI11_MODULE_INLINE constexpr char escapedCharsCode[]="btnfr\"\\";
207/*
208const std::string &escapedChars("\b\t\n\f\r\"\\");
209
210CLI11_MODULE_INLINE const std::string &bracketChars("\"'`[(<{");
211CLI11_MODULE_INLINE const std::string &matchBracketChars("\"'`])>}");
212*/
213CLI11_INLINE bool has_escapable_character(const std::string &str) {
214 return (str.find_first_of(escapedChars()) != std::string::npos);
215}
216
217CLI11_INLINE std::string add_escaped_characters(const std::string &str) {
218 std::string out;
219 out.reserve(str.size() + 4);
220 for(char s : str) {
221 auto sloc = escapedChars().find_first_of(s);
222 if(sloc != std::string::npos) {
223 out.push_back('\\');
224 out.push_back(escapedCharsCode()[sloc]);
225 } else {
226 out.push_back(s);
227 }
228 }
229 return out;
230}
231
232CLI11_INLINE std::uint32_t hexConvert(char hc) {
233 int hcode{0};
234 if(hc >= '0' && hc <= '9') {
235 hcode = (hc - '0');
236 } else if(hc >= 'A' && hc <= 'F') {
237 hcode = (hc - 'A' + 10);
238 } else if(hc >= 'a' && hc <= 'f') {
239 hcode = (hc - 'a' + 10);
240 } else {
241 hcode = -1;
242 }
243 return static_cast<uint32_t>(hcode);
244}
245
246CLI11_INLINE char make_char(std::uint32_t code) { return static_cast<char>(static_cast<unsigned char>(code)); }
247
248CLI11_INLINE void append_codepoint(std::string &str, std::uint32_t code) {
249 if(code < 0x80) { // ascii code equivalent
250 str.push_back(static_cast<char>(code));
251 } else if(code < 0x800) { // \u0080 to \u07FF
252 // 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111
253 str.push_back(make_char(0xC0 | code >> 6));
254 str.push_back(make_char(0x80 | (code & 0x3F)));
255 } else if(code < 0x10000) { // U+0800...U+FFFF
256 if(0xD800 <= code && code <= 0xDFFF) {
257 throw std::invalid_argument("[0xD800, 0xDFFF] are not valid UTF-8.");
258 }
259 // 1110yyyy 10yxxxxx 10xxxxxx
260 str.push_back(make_char(0xE0 | code >> 12));
261 str.push_back(make_char(0x80 | (code >> 6 & 0x3F)));
262 str.push_back(make_char(0x80 | (code & 0x3F)));
263 } else if(code < 0x110000) { // U+010000 ... U+10FFFF
264 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
265 str.push_back(make_char(0xF0 | code >> 18));
266 str.push_back(make_char(0x80 | (code >> 12 & 0x3F)));
267 str.push_back(make_char(0x80 | (code >> 6 & 0x3F)));
268 str.push_back(make_char(0x80 | (code & 0x3F)));
269 }
270}
271
272CLI11_INLINE std::string remove_escaped_characters(const std::string &str) {
273
274 std::string out;
275 out.reserve(str.size());
276 for(auto loc = str.begin(); loc < str.end(); ++loc) {
277 if(*loc == '\\') {
278 if(str.end() - loc < 2) {
279 throw std::invalid_argument("invalid escape sequence " + str);
280 }
281 auto ecloc = escapedCharsCode().find_first_of(*(loc + 1));
282 if(ecloc != std::string::npos) {
283 out.push_back(escapedChars()[ecloc]);
284 ++loc;
285 } else if(*(loc + 1) == 'u') {
286 // must have 4 hex characters
287 if(str.end() - loc < 6) {
288 throw std::invalid_argument("unicode sequence must have 4 hex codes " + str);
289 }
290 std::uint32_t code{0};
291 std::uint32_t mplier{16 * 16 * 16};
292 for(int ii = 2; ii < 6; ++ii) {
293 std::uint32_t res = hexConvert(*(loc + ii));
294 if(res > 0x0F) {
295 throw std::invalid_argument("unicode sequence must have 4 hex codes " + str);
296 }
297 code += res * mplier;
298 mplier = mplier / 16;
299 }
300 append_codepoint(out, code);
301 loc += 5;
302 } else if(*(loc + 1) == 'U') {
303 // must have 8 hex characters
304 if(str.end() - loc < 10) {
305 throw std::invalid_argument("unicode sequence must have 8 hex codes " + str);
306 }
307 std::uint32_t code{0};
308 std::uint32_t mplier{16 * 16 * 16 * 16 * 16 * 16 * 16};
309 for(int ii = 2; ii < 10; ++ii) {
310 std::uint32_t res = hexConvert(*(loc + ii));
311 if(res > 0x0F) {
312 throw std::invalid_argument("unicode sequence must have 8 hex codes " + str);
313 }
314 code += res * mplier;
315 mplier = mplier / 16;
316 }
317 append_codepoint(out, code);
318 loc += 9;
319 } else if(*(loc + 1) == '0') {
320 out.push_back('\0');
321 ++loc;
322 } else {
323 throw std::invalid_argument(std::string("unrecognized escape sequence \\") + *(loc + 1) + " in " + str);
324 }
325 } else {
326 out.push_back(*loc);
327 }
328 }
329 return out;
330}
331
332CLI11_INLINE std::size_t close_string_quote(const std::string &str, std::size_t start, char closure_char) {
333 std::size_t loc{0};
334 for(loc = start + 1; loc < str.size(); ++loc) {
335 if(str[loc] == closure_char) {
336 break;
337 }
338 if(str[loc] == '\\') {
339 // skip the next character for escaped sequences
340 ++loc;
341 }
342 }
343 return loc;
344}
345
346CLI11_INLINE std::size_t close_literal_quote(const std::string &str, std::size_t start, char closure_char) {
347 auto loc = str.find_first_of(closure_char, start + 1);
348 return (loc != std::string::npos ? loc : str.size());
349}
350
351CLI11_INLINE std::size_t close_sequence(const std::string &str, std::size_t start, char closure_char) {
352
353 auto bracket_loc = matchBracketChars().find(closure_char);
354 switch(bracket_loc) {
355 case 0:
356 return close_string_quote(str, start, closure_char);
357 case 1:
358 case 2:
359#if defined(_MSC_VER) && _MSC_VER < 1920
360 case(std::size_t)-1:
361#else
362 case std::string::npos:
363#endif
364 return close_literal_quote(str, start, closure_char);
365 default:
366 break;
367 }
368
369 std::string closures(1, closure_char);
370 auto loc = start + 1;
371
372 while(loc < str.size()) {
373 if(str[loc] == closures.back()) {
374 closures.pop_back();
375 if(closures.empty()) {
376 return loc;
377 }
378 }
379 bracket_loc = bracketChars().find(str[loc]);
380 if(bracket_loc != std::string::npos) {
381 switch(bracket_loc) {
382 case 0:
383 loc = close_string_quote(str, loc, str[loc]);
384 break;
385 case 1:
386 case 2:
387 loc = close_literal_quote(str, loc, str[loc]);
388 break;
389 default:
390 closures.push_back(matchBracketChars()[bracket_loc]);
391 break;
392 }
393 }
394 ++loc;
395 }
396 if(loc > str.size()) {
397 loc = str.size();
398 }
399 return loc;
400}
401
402CLI11_INLINE std::vector<std::string> split_up(std::string str, char delimiter) {
403
404 auto find_ws = [delimiter](char ch) {
405 return (delimiter == '\0') ? std::isspace<char>(ch, std::locale()) : (ch == delimiter);
406 };
407 trim(str);
408
409 std::vector<std::string> output;
410 while(!str.empty()) {
411 if(bracketChars().find_first_of(str[0]) != std::string::npos) {
412 auto bracketLoc = bracketChars().find_first_of(str[0]);
413 auto end = close_sequence(str, 0, matchBracketChars()[bracketLoc]);
414 if(end >= str.size()) {
415 output.push_back(std::move(str));
416 str.clear();
417 } else {
418 output.push_back(str.substr(0, end + 1));
419 if(end + 2 < str.size()) {
420 str = str.substr(end + 2);
421 } else {
422 str.clear();
423 }
424 }
425
426 } else {
427 auto it = std::find_if(std::begin(str), std::end(str), find_ws);
428 if(it != std::end(str)) {
429 std::string value = std::string(str.begin(), it);
430 output.push_back(value);
431 str = std::string(it + 1, str.end());
432 } else {
433 output.push_back(str);
434 str.clear();
435 }
436 }
437 trim(str);
438 }
439 return output;
440}
441
442CLI11_INLINE std::size_t escape_detect(std::string &str, std::size_t offset) {
443 auto next = str[offset + 1];
444 if((next == '\"') || (next == '\'') || (next == '`')) {
445 auto astart = str.find_last_of("-/ \"\'`", offset - 1);
446 if(astart != std::string::npos) {
447 if(str[astart] == ((str[offset] == '=') ? '-' : '/'))
448 str[offset] = ' '; // interpret this as a space so the split_up works properly
449 }
450 }
451 return offset + 1;
452}
453
454CLI11_INLINE std::string binary_escape_string(const std::string &string_to_escape, bool force) {
455 // s is our escaped output string
456 std::string escaped_string{};
457 // loop through all characters
458 for(char c : string_to_escape) {
459 // check if a given character is printable
460 // the cast is necessary to avoid undefined behaviour
461 if(isprint(static_cast<unsigned char>(c)) == 0) {
462 std::stringstream stream;
463 // if the character is not printable
464 // we'll convert it to a hex string using a stringstream
465 // note that since char is signed we have to cast it to unsigned first
466 stream << std::hex << static_cast<unsigned int>(static_cast<unsigned char>(c));
467 std::string code = stream.str();
468 escaped_string += std::string("\\x") + (code.size() < 2 ? "0" : "") + code;
469 } else if(c == 'x' || c == 'X') {
470 // need to check for inadvertent binary sequences
471 if(!escaped_string.empty() && escaped_string.back() == '\\') {
472 escaped_string += std::string("\\x") + (c == 'x' ? "78" : "58");
473 } else {
474 escaped_string.push_back(c);
475 }
476
477 } else {
478 escaped_string.push_back(c);
479 }
480 }
481 if(escaped_string != string_to_escape || force) {
482 auto sqLoc = escaped_string.find('\'');
483 while(sqLoc != std::string::npos) {
484 escaped_string[sqLoc] = '\\';
485 escaped_string.insert(sqLoc + 1, "x27");
486 sqLoc = escaped_string.find('\'');
487 }
488 escaped_string.insert(0, "'B\"(");
489 escaped_string.push_back(')');
490 escaped_string.push_back('"');
491 escaped_string.push_back('\'');
492 }
493 return escaped_string;
494}
495
496CLI11_INLINE bool is_binary_escaped_string(const std::string &escaped_string) {
497 size_t ssize = escaped_string.size();
498 if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) {
499 return true;
500 }
501 return (escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0);
502}
503
504CLI11_INLINE std::string extract_binary_string(const std::string &escaped_string) {
505 std::size_t start{0};
506 std::size_t tail{0};
507 size_t ssize = escaped_string.size();
508 if(escaped_string.compare(0, 3, "B\"(") == 0 && escaped_string.compare(ssize - 2, 2, ")\"") == 0) {
509 start = 3;
510 tail = 2;
511 } else if(escaped_string.compare(0, 4, "'B\"(") == 0 && escaped_string.compare(ssize - 3, 3, ")\"'") == 0) {
512 start = 4;
513 tail = 3;
514 }
515
516 if(start == 0) {
517 return escaped_string;
518 }
519 std::string outstring;
520
521 outstring.reserve(ssize - start - tail);
522 std::size_t loc = start;
523 while(loc < ssize - tail) {
524 // ssize-2 to skip )" at the end
525 if(escaped_string[loc] == '\\' && (escaped_string[loc + 1] == 'x' || escaped_string[loc + 1] == 'X')) {
526 auto c1 = escaped_string[loc + 2];
527 auto c2 = escaped_string[loc + 3];
528
529 std::uint32_t res1 = hexConvert(c1);
530 std::uint32_t res2 = hexConvert(c2);
531 if(res1 <= 0x0F && res2 <= 0x0F) {
532 loc += 4;
533 outstring.push_back(static_cast<char>(res1 * 16 + res2));
534 continue;
535 }
536 }
537 outstring.push_back(escaped_string[loc]);
538 ++loc;
539 }
540 return outstring;
541}
542
543CLI11_INLINE void remove_quotes(std::vector<std::string> &args) {
544 for(auto &arg : args) {
545 if(arg.front() == '\"' && arg.back() == '\"') {
546 remove_quotes(arg);
547 // only remove escaped for string arguments not literal strings
548 arg = remove_escaped_characters(arg);
549 } else {
550 remove_quotes(arg);
551 }
552 }
553}
554
555CLI11_INLINE void handle_secondary_array(std::string &str) {
556 if(str.size() >= 2 && str.front() == '[' && str.back() == ']') {
557 // handle some special array processing for arguments if it might be interpreted as a secondary array
558 std::string tstr{"[["};
559 for(std::size_t ii = 1; ii < str.size(); ++ii) {
560 tstr.push_back(str[ii]);
561 tstr.push_back(str[ii]);
562 }
563 str = std::move(tstr);
564 }
565}
566
567CLI11_INLINE bool
568process_quoted_string(std::string &str, char string_char, char literal_char, bool disable_secondary_array_processing) {
569 if(str.size() <= 1) {
570 return false;
571 }
572 if(detail::is_binary_escaped_string(str)) {
573 str = detail::extract_binary_string(str);
574 if(!disable_secondary_array_processing)
575 handle_secondary_array(str);
576 return true;
577 }
578 if(str.front() == string_char && str.back() == string_char) {
579 detail::remove_outer(str, string_char);
580 if(str.find_first_of('\\') != std::string::npos) {
581 str = detail::remove_escaped_characters(str);
582 }
583 if(!disable_secondary_array_processing)
584 handle_secondary_array(str);
585 return true;
586 }
587 if((str.front() == literal_char || str.front() == '`') && str.back() == str.front()) {
588 detail::remove_outer(str, str.front());
589 if(!disable_secondary_array_processing)
590 handle_secondary_array(str);
591 return true;
592 }
593 return false;
594}
595
596std::string get_environment_value(const std::string &env_name) {
597 std::string ename_string;
598
599#ifdef _MSC_VER
600 // Windows version
601 char *buffer = nullptr;
602 std::size_t sz = 0;
603 if(_dupenv_s(&buffer, &sz, env_name.c_str()) == 0 && buffer != nullptr) {
604 ename_string = std::string(buffer);
605 free(buffer);
606 }
607#else
608 // This also works on Windows, but gives a warning
609
610 // MISRA static analysis need. MISRACPP2023-25_5_2-a-1
611 const char *buffer = nullptr;
612 buffer = std::getenv(env_name.c_str());
613 if(buffer != nullptr) {
614 ename_string = std::string(buffer);
615 }
616#endif
617 return ename_string;
618}
619
620CLI11_INLINE std::ostream &streamOutAsParagraph(std::ostream &out,
621 const std::string &text,
622 std::size_t paragraphWidth,
623 const std::string &linePrefix,
624 bool skipPrefixOnFirstLine) {
625 if(!skipPrefixOnFirstLine)
626 out << linePrefix; // First line prefix
627
628 std::istringstream lss(text);
629 std::string line = "";
630 while(std::getline(lss, line)) {
631 std::istringstream iss(line);
632 std::string word = "";
633 std::size_t charsWritten = 0;
634
635 while(iss >> word) {
636 if(charsWritten > 0 && (word.length() + 1 + charsWritten > paragraphWidth)) {
637 out << '\n' << linePrefix;
638 charsWritten = 0;
639 }
640 if(charsWritten == 0) {
641 out << word;
642 charsWritten += word.length();
643 } else {
644 out << ' ' << word;
645 charsWritten += word.length() + 1;
646 }
647 }
648
649 if(!lss.eof())
650 out << '\n' << linePrefix;
651 }
652 return out;
653}
654
655} // namespace detail
656// [CLI11:string_tools_inl_hpp:end]
657} // namespace CLI