Skip to content

Commit ea18fce

Browse files
committed
Add text mode formatters: ansi, markdown
1 parent ef36d39 commit ea18fce

File tree

2 files changed

+117
-22
lines changed

2 files changed

+117
-22
lines changed

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ Options:
4444
may be one of { data, text, attr }:
4545
data - return raw html of matching elements
4646
text - return inner text of matching elements
47+
[mode argument: formatting]
48+
supported modes: { plain, ansi, md }
49+
default: plain
50+
for plain, ANSI, or markdown formatted output respectively
4751
attr - return attribute value of matching elements
4852
<mode argument: attr>
4953
attribute to return

main.cpp

+113-22
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ Usage: %s [options] <selector> <mode> [mode argument]
3535
may be one of { data, text, attr }:
3636
data - return raw html of matching elements
3737
text - return inner text of matching elements
38+
[mode argument: formatting]
39+
supported modes: { plain, ansi, md }
40+
default: plain
41+
for plain, ANSI, or markdown formatted output respectively
3842
attr - return attribute value of matching elements
3943
<mode argument: attr>
4044
attribute to return
@@ -44,6 +48,14 @@ Usage: %s [options] <selector> <mode> [mode argument]
4448
curl -sSL https://example.com | %s a attr href
4549
)";
4650

51+
static const string afmt_s = "\033[";
52+
static const string afmt_e = "m";
53+
static const vector<char> collapsible = {' ', '\t', '\n', '\r'};
54+
static const vector<unsigned long> breaking = {
55+
MyHTML_TAG_BR,
56+
MyHTML_TAG_P
57+
};
58+
4759
static map<const string, bool> flags = {
4860
{"dirtyargs", false}
4961
};
@@ -86,10 +98,18 @@ bool readfile(string filename, string &target){
8698
return true;
8799
}
88100

89-
template <typename T> inline bool vec_has(vector<T> &vec, T val){
101+
template <typename T> inline bool vec_has(const vector<T> &vec, T val){
90102
return std::find(vec.begin(), vec.end(), val) != vec.end();
91103
}
92104

105+
template <typename T> inline bool node_in(myhtml_tree_node_t* node, T tag){
106+
while(node){
107+
if(node->tag_id == tag) return true;
108+
node = node->parent;
109+
}
110+
return false;
111+
}
112+
93113
static map<const char, const string> option_longopts = { // maps shortopts to longopts from option_handlers
94114
{'h', "help"},
95115
{'f', "file"},
@@ -115,6 +135,78 @@ static map<const string, const function<void(int&, const char**&)>> option_handl
115135
}}
116136
};
117137

138+
static pair<const function<void(myhtml_tree_node_t*, string&)>, const function<void(myhtml_tree_node_t*, string&)>> format_handlers = { // {format, unformat}
139+
[](myhtml_tree_node_t* node_iter, string &rendered){
140+
if(state["modearg"].length() > 0){
141+
const bool ansi = state["modearg"] == "ansi";
142+
const bool md = state["modearg"] == "md";
143+
switch(node_iter->tag_id){ // modearg formatters
144+
case MyHTML_TAG_B: // bold on
145+
case MyHTML_TAG_STRONG:
146+
if(ansi) rendered += afmt_s + "1" + afmt_e;
147+
if(md) rendered += "**";
148+
break;
149+
case MyHTML_TAG_I: // italics on
150+
case MyHTML_TAG_U:
151+
case MyHTML_TAG_EM:
152+
if(ansi) rendered += afmt_s + "4" + afmt_e;
153+
if(md) rendered += "_";
154+
break;
155+
case MyHTML_TAG_CODE: // code on
156+
if(node_in(node_iter, MyHTML_TAG_PRE)){
157+
rendered += "```\n";
158+
}else{
159+
if(ansi) rendered += afmt_s + "7" + afmt_e;
160+
if(md) rendered += "`";
161+
}
162+
break;
163+
}
164+
}
165+
switch(node_iter->tag_id){ // global formatters
166+
case MyHTML_TAG_LI:
167+
rendered += "- ";
168+
break;
169+
}
170+
},
171+
[](myhtml_tree_node_t* node_iter, string &rendered){
172+
if(state["modearg"].length() > 0){
173+
const bool ansi = state["modearg"] == "ansi";
174+
const bool md = state["modearg"] == "md";
175+
switch(node_iter->tag_id){ // modearg unformatters
176+
case MyHTML_TAG_B: // bold off
177+
case MyHTML_TAG_STRONG:
178+
if(ansi) rendered += afmt_s + "21" + afmt_e;
179+
if(md) rendered += "**";
180+
break;
181+
case MyHTML_TAG_I: // italics off
182+
case MyHTML_TAG_U:
183+
case MyHTML_TAG_EM:
184+
if(ansi) rendered += afmt_s + "24" + afmt_e; // no italics here :(
185+
if(md) rendered += "_";
186+
break;
187+
case MyHTML_TAG_CODE: // code off
188+
if(node_in(node_iter, MyHTML_TAG_PRE)){
189+
rendered += "```\n";
190+
}else{
191+
if(ansi) rendered += afmt_s + "27" + afmt_e;
192+
if(md) rendered += "`";
193+
}
194+
break;
195+
}
196+
}
197+
switch(node_iter->tag_id){ // global unformatters
198+
case MyHTML_TAG_LI:
199+
case MyHTML_TAG_UL:
200+
rendered += "\n";
201+
break;
202+
}
203+
204+
if(vec_has(breaking, node_iter->tag_id)){ // <br/>
205+
rendered += "\n";
206+
}
207+
}
208+
};
209+
118210
static map<const string, const function<void(myhtml_tree_node_t*)>> mode_handlers = { // maps modes to functions
119211
{"data", [](myhtml_tree_node_t* node) {
120212
myhtml_serialization_tree_callback(node, [](const char* data, size_t len, void* ctx) -> unsigned int {
@@ -127,42 +219,41 @@ static map<const string, const function<void(myhtml_tree_node_t*)>> mode_handler
127219
{"text", [](myhtml_tree_node_t* node) {
128220
string rendered = "";
129221

130-
static vector<char> collapsible = {' ', '\t', '\n', '\r'};
131-
static vector<unsigned long> breaking = {
132-
MyHTML_TAG_BR,
133-
MyHTML_TAG_P
134-
};
135-
136222
myhtml_tree_node_t* node_iter = node->child;
137223
while(node_iter){
138224
const char* text_c = myhtml_node_text(node_iter, nullptr);
139225
string text = "";
140226
if(text_c != nullptr) text += text_c;
141227

142228
if(node_iter->tag_id == MyHTML_TAG__TEXT){
143-
// collapse whitespace to single character
144-
string::iterator nend = unique(text.begin(), text.end(), [](char c1, char c2) -> bool {
145-
return vec_has(collapsible, c1) && vec_has(collapsible, c2);
146-
});
147-
text.resize(static_cast<unsigned long>(nend-text.begin()));
148-
149-
// replace whitespace with space
150-
replace_if(text.begin(), text.end(), [](char c) -> bool {
151-
return vec_has(collapsible, c);
152-
}, ' ');
229+
if(!node_in(node_iter, MyHTML_TAG_PRE)){
230+
// collapse whitespace to single character
231+
string::iterator nend = unique(text.begin(), text.end(), [](char c1, char c2) -> bool {
232+
return vec_has(collapsible, c1) && vec_has(collapsible, c2);
233+
});
234+
text.resize(static_cast<unsigned long>(nend-text.begin()));
235+
236+
// replace whitespace with space
237+
replace_if(text.begin(), text.end(), [](char c) -> bool {
238+
return vec_has(collapsible, c);
239+
}, ' ');
240+
}
153241

154242
rendered += text;
243+
}else{
244+
format_handlers.first(node_iter, rendered);
155245
}
156246

157247
if(node_iter->child) node_iter = node_iter->child;
158248
else{
159-
while(node_iter != node && node_iter->next == nullptr) node_iter = node_iter->parent;
160-
if(node_iter == node) break;
249+
while(node_iter != node && node_iter->next == nullptr){
250+
format_handlers.second(node_iter, rendered);
161251

162-
if(vec_has(breaking, node_iter->tag_id)){ // <br/>
163-
rendered += "\n";
252+
node_iter = node_iter->parent;
164253
}
254+
if(node_iter == node) break;
165255

256+
format_handlers.second(node_iter, rendered);
166257
node_iter = node_iter->next;
167258
}
168259
}
@@ -228,7 +319,7 @@ void parseopts(int &argc, const char** &argv){
228319
cerr << "invalid short option '-" << argv[1][0] << "'" << endl;
229320
exit(EXIT_FAILURE);
230321
}
231-
if(flags["dirtyargs"]){
322+
if(flags["dirtyargs"]){ // option handler touched argv (args?); skip
232323
flags["dirtyargs"] = false;
233324
break;
234325
}

0 commit comments

Comments
 (0)