//clang #include #include #include #include #include using namespace std; int main(int argc, char **argv) { // in C++03: const char *s1 = "Standard string"; const wchar_t *s2 = L"Wide character string"; cout << "s1: " << s1 << endl; cout << "s2: " << s2 << endl; // pointer written b/c cout is a char stream wcout << "s2: " << s1 << endl; wcout << "s2: " << s2 << endl; // (neither are unicode encoded!) // C++11 Unicode support: // char is now wide enough to store eight-bit coding of UTF-8 // char16_t is a 16-bit character (to store UTF-16) // char32_t is a 32-bit character (to store UCS-4/UTF-32) // NOTE: UTF-32: always 32 bits per code point (character) // UTF-16: 1 or 2 16-bit code units per code point (character) // UTF-8: 1-4 bytes per code point (character) // [no endianness problems] char c1 = 'A'; wchar_t c2 = L'A'; // below literals are UCS-encoded char16_t c3 = u'A'; // use 'u' prefix to denote char16_t char32_t c4 = U'A'; // use 'U' prefix to denote char32_t cout << c1 << ' ' << c2 << ' ' << c3 << ' ' << c4 << endl; // only c1 written as character, because cout is a char stream wcout << c1 << ' ' << c2 << ' ' << c3 << ' ' << c4 << endl; // c1 and c2 written above, but char16_t and char32_t are not // strings also available: // UTF-8 encoded string: const char *s3 = u8"Euros \u20AC and G clef \U0001D11E"; // UTF-16 encoded string: const char16_t *s4 = u"Euros \u20AC and G clef \U0001D11E"; // UTF-32 encoded string: const char32_t *s5 = U"Euros \u20AC and G clef \U0001D11E"; // note: u8 not allowed for characters // u allowed for characters, but cannot produce 2 16-bit code units // (u" " can produce multiple 16-bit units per "character") std::u16string s6 = s4; // u16string = basic_string std::u32string s7 = s5; // u32string = basic_string cout << endl; // write out the code units: cout << string(s3).length() << ": "; for(char c : string(s3)) cout << c << ' '; cout << endl; cout << s6.length() << ": "; for(char16_t c : s6) cout << hex << showbase << c << ' '; cout << dec << endl; cout << s7.length() << ": "; for(char32_t c : s7) cout << hex << showbase << c << ' '; cout << dec << endl; // UTF-8 encoded should work to wcout if //wcout << s3 << endl; // doesn't on my system... :( // this should create a method to write a char32_t string to cout // but it doesn't work with any STL I can find... :( /* wbuffer_convert,char32_t> u32buffer(cout.rdbuf(),new codecvt_utf8()); std::basic_ostream u32out(&u32buffer); u32out << s5 << endl; */ // Raw String Literals: // ==================== // use "R" (after "L" "u" "U" or "u8") for a raw string cout << endl; cout << "standard string: " << "a string\t\"quote\"" << endl; cout << "raw string: " << R"(a string\t"quote")" << endl; // can change "( )" into "( )" cout << "another raw str: " << R"#(a string\t(paren))#" << endl; }