1 /** 2 Utility functions for string processing 3 4 Copyright: © 2012-2014 RejectedSoftware e.K. 5 License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file. 6 Authors: Sönke Ludwig 7 */ 8 module vibe.utils..string; 9 10 public import std.string; 11 12 import vibe.utils.array; 13 import vibe.internal.utilallocator; 14 15 import std.algorithm; 16 import std.array; 17 import std.ascii; 18 import std.format; 19 import std.uni; 20 import std.utf; 21 import core.exception; 22 23 24 /** 25 Takes a string with possibly invalid UTF8 sequences and outputs a valid UTF8 string as near to 26 the original as possible. 27 */ 28 string sanitizeUTF8(in ubyte[] str) 29 @safe pure { 30 import std.utf; 31 auto ret = appender!string(); 32 ret.reserve(str.length); 33 34 size_t i = 0; 35 while (i < str.length) { 36 dchar ch = str[i]; 37 try ch = std.utf.decode(cast(const(char[]))str, i); 38 catch( UTFException ){ i++; } 39 //catch( AssertError ){ i++; } 40 char[4] dst; 41 auto len = std.utf.encode(dst, ch); 42 ret.put(dst[0 .. len]); 43 } 44 45 return ret.data; 46 } 47 48 /** 49 Strips the byte order mark of an UTF8 encoded string. 50 This is useful when the string is coming from a file. 51 */ 52 inout(char)[] stripUTF8Bom(inout(char)[] str) 53 @safe pure nothrow { 54 if (str.length >= 3 && str[0 .. 3] == [0xEF, 0xBB, 0xBF]) 55 return str[3 ..$]; 56 return str; 57 } 58 59 60 /** 61 Checks if all characters in 'str' are contained in 'chars'. 62 */ 63 bool allOf(const(char)[] str, const(char)[] chars) 64 @safe pure { 65 foreach (dchar ch; str) 66 if (!chars.canFind(ch)) 67 return false; 68 return true; 69 } 70 71 ptrdiff_t indexOfCT(Char)(in Char[] s, dchar c, CaseSensitive cs = CaseSensitive.yes) 72 @safe pure { 73 if (__ctfe) { 74 if (cs == CaseSensitive.yes) { 75 foreach (i, dchar ch; s) 76 if (ch == c) 77 return i; 78 } else { 79 c = std.uni.toLower(c); 80 foreach (i, dchar ch; s) 81 if (std.uni.toLower(ch) == c) 82 return i; 83 } 84 return -1; 85 } else return std..string.indexOf(s, c, cs); 86 } 87 ptrdiff_t indexOfCT(Char)(in Char[] s, in Char[] needle) 88 { 89 if (__ctfe) { 90 if (s.length < needle.length) return -1; 91 foreach (i; 0 .. s.length - needle.length) 92 if (s[i .. i+needle.length] == needle) 93 return i; 94 return -1; 95 } else return std..string.indexOf(s, needle); 96 } 97 98 /** 99 Checks if any character in 'str' is contained in 'chars'. 100 */ 101 bool anyOf(const(char)[] str, const(char)[] chars) 102 @safe pure { 103 foreach (ch; str) 104 if (chars.canFind(ch)) 105 return true; 106 return false; 107 } 108 109 110 /// ASCII whitespace trimming (space and tab) 111 inout(char)[] stripLeftA(inout(char)[] s) 112 @safe pure nothrow { 113 while (s.length > 0 && (s[0] == ' ' || s[0] == '\t')) 114 s = s[1 .. $]; 115 return s; 116 } 117 118 /// ASCII whitespace trimming (space and tab) 119 inout(char)[] stripRightA(inout(char)[] s) 120 @safe pure nothrow { 121 while (s.length > 0 && (s[$-1] == ' ' || s[$-1] == '\t')) 122 s = s[0 .. $-1]; 123 return s; 124 } 125 126 /// ASCII whitespace trimming (space and tab) 127 inout(char)[] stripA(inout(char)[] s) 128 @safe pure nothrow { 129 return stripLeftA(stripRightA(s)); 130 } 131 132 /// Finds the first occurence of any of the characters in `chars` 133 sizediff_t indexOfAny(const(char)[] str, const(char)[] chars) 134 @safe pure { 135 foreach (i, char ch; str) 136 if (chars.canFind(ch)) 137 return i; 138 return -1; 139 } 140 alias countUntilAny = indexOfAny; 141 142 /** 143 Finds the closing bracket (works with any of '[', '$(LPAREN)', '<', '{'). 144 145 Params: 146 str = input string 147 nested = whether to skip nested brackets 148 Returns: 149 The index of the closing bracket or -1 for unbalanced strings 150 and strings that don't start with a bracket. 151 */ 152 sizediff_t matchBracket(const(char)[] str, bool nested = true) 153 @safe pure nothrow { 154 if (str.length < 2) return -1; 155 156 char open = str[0], close = void; 157 switch (str[0]) { 158 case '[': close = ']'; break; 159 case '(': close = ')'; break; 160 case '<': close = '>'; break; 161 case '{': close = '}'; break; 162 default: return -1; 163 } 164 165 size_t level = 1; 166 foreach (i, char c; str[1 .. $]) { 167 if (nested && c == open) ++level; 168 else if (c == close) --level; 169 if (level == 0) return i + 1; 170 } 171 return -1; 172 } 173 174 @safe unittest 175 { 176 static struct Test { string str; sizediff_t res; } 177 enum tests = [ 178 Test("[foo]", 4), Test("<bar>", 4), Test("{baz}", 4), 179 Test("[", -1), Test("[foo", -1), Test("ab[f]", -1), 180 Test("[foo[bar]]", 9), Test("[foo{bar]]", 8), 181 ]; 182 foreach (test; tests) 183 assert(matchBracket(test.str) == test.res); 184 assert(matchBracket("[foo[bar]]", false) == 8); 185 static assert(matchBracket("[foo]") == 4); 186 } 187 188 /// Same as std.string.format, just using an allocator. 189 string formatAlloc(ARGS...)(IAllocator alloc, string fmt, ARGS args) 190 { 191 auto app = AllocAppender!string(alloc); 192 formattedWrite(() @trusted { return &app; } (), fmt, args); 193 return () @trusted { return app.data; } (); 194 } 195 196 /// Special version of icmp() with optimization for ASCII characters 197 int icmp2(const(char)[] a, const(char)[] b) 198 @safe pure { 199 size_t i = 0, j = 0; 200 201 // fast skip equal prefix 202 size_t min_len = min(a.length, b.length); 203 while( i < min_len && a[i] == b[i] ) i++; 204 if( i > 0 && (a[i-1] & 0x80) ) i--; // don't stop half-way in a UTF-8 sequence 205 j = i; 206 207 // compare the differing character and the rest of the string 208 while(i < a.length && j < b.length){ 209 uint ac = cast(uint)a[i]; 210 uint bc = cast(uint)b[j]; 211 if( !((ac | bc) & 0x80) ){ 212 i++; 213 j++; 214 if( ac >= 'A' && ac <= 'Z' ) ac += 'a' - 'A'; 215 if( bc >= 'A' && bc <= 'Z' ) bc += 'a' - 'A'; 216 if( ac < bc ) return -1; 217 else if( ac > bc ) return 1; 218 } else { 219 dchar acp = decode(a, i); 220 dchar bcp = decode(b, j); 221 if( acp != bcp ){ 222 acp = std.uni.toLower(acp); 223 bcp = std.uni.toLower(bcp); 224 if( acp < bcp ) return -1; 225 else if( acp > bcp ) return 1; 226 } 227 } 228 } 229 230 if( i < a.length ) return 1; 231 else if( j < b.length ) return -1; 232 233 assert(i == a.length || j == b.length, "Strings equal but we didn't fully compare them!?"); 234 return 0; 235 }