//clang #include #include using namespace std; int main(int argc, char **argv) { // note! HTML is *NOT* parsable with regular expressions // this is just an example if you wanted to try // // regex is just basic_regex regex hrefex(R"##(href\s*=\s*"([^"]*)")##", std::regex_constants::icase | std::regex_constants::optimize); string s; while(!getline(cin,s).eof()) { // iterators include cregex_iterator (references const char*) // wcregex_iterator (references const wchar_t *) // sregex_iterator (ref string::const_iterator) // wsregex_iterator (ref wstring::const_iterator) // can make your own (regex_iterator) /* sregex_iterator b(s.cbegin(),s.cend(),hrefex); sregex_iterator e; for(auto i=b;i!=e;++i) cout << i->str() << endl; // OR for(sregex_iterator b(begin(s),end(s),hrefex),e;b!=e;++b) cout << b->str() << endl; */ // OR // or could use for_each: /* for_each(sregex_iterator(begin(s),end(s),hrefex), sregex_iterator(), [](const std::smatch &m){ cout << m.str() << endl;} ); */ // iterator is dereferences to get match_results // which has methods including // str(): returns string (see above) // size(): returns number of sub matches // operator[int i]: returns object type sub_match // for the ith sub-match // note: [0] is the whole thing. [1] is the first sub-exp // // a sub_match has methods including // str(): returns the submatch string // length(): length of the submatch string (0 if no match) // matched(): bool of whether submatch is part of reg-ex match // so, to print out the URLs (without the href) we could do: for(sregex_iterator b(begin(s),end(s),hrefex),e;b!=e;++b) cout << (*b)[1].str() << endl; } }