@@ -35,6 +35,10 @@ Usage: %s [options] <selector> <mode> [mode argument]
35
35
may be one of { data, text, attr }:
36
36
data - return raw html of matching elements
37
37
text - return inner text of matching elements
38
+ [mode argument: formatting]
39
+ supported modes: { plain, ansi, md }
40
+ default: plain
41
+ for plain, ANSI, or markdown formatted output respectively
38
42
attr - return attribute value of matching elements
39
43
<mode argument: attr>
40
44
attribute to return
@@ -44,6 +48,14 @@ Usage: %s [options] <selector> <mode> [mode argument]
44
48
curl -sSL https://example.com | %s a attr href
45
49
)" ;
46
50
51
+ static const string afmt_s = " \033 [" ;
52
+ static const string afmt_e = " m" ;
53
+ static const vector<char > collapsible = {' ' , ' \t ' , ' \n ' , ' \r ' };
54
+ static const vector<unsigned long > breaking = {
55
+ MyHTML_TAG_BR,
56
+ MyHTML_TAG_P
57
+ };
58
+
47
59
static map<const string, bool > flags = {
48
60
{" dirtyargs" , false }
49
61
};
@@ -86,10 +98,18 @@ bool readfile(string filename, string &target){
86
98
return true ;
87
99
}
88
100
89
- template <typename T> inline bool vec_has (vector<T> &vec, T val){
101
+ template <typename T> inline bool vec_has (const vector<T> &vec, T val){
90
102
return std::find (vec.begin (), vec.end (), val) != vec.end ();
91
103
}
92
104
105
+ template <typename T> inline bool node_in (myhtml_tree_node_t * node, T tag){
106
+ while (node){
107
+ if (node->tag_id == tag) return true ;
108
+ node = node->parent ;
109
+ }
110
+ return false ;
111
+ }
112
+
93
113
static map<const char , const string> option_longopts = { // maps shortopts to longopts from option_handlers
94
114
{' h' , " help" },
95
115
{' f' , " file" },
@@ -115,6 +135,78 @@ static map<const string, const function<void(int&, const char**&)>> option_handl
115
135
}}
116
136
};
117
137
138
+ static pair<const function<void (myhtml_tree_node_t *, string&)>, const function<void (myhtml_tree_node_t *, string&)>> format_handlers = { // {format, unformat}
139
+ [](myhtml_tree_node_t * node_iter, string &rendered){
140
+ if (state[" modearg" ].length () > 0 ){
141
+ const bool ansi = state[" modearg" ] == " ansi" ;
142
+ const bool md = state[" modearg" ] == " md" ;
143
+ switch (node_iter->tag_id ){ // modearg formatters
144
+ case MyHTML_TAG_B: // bold on
145
+ case MyHTML_TAG_STRONG:
146
+ if (ansi) rendered += afmt_s + " 1" + afmt_e;
147
+ if (md) rendered += " **" ;
148
+ break ;
149
+ case MyHTML_TAG_I: // italics on
150
+ case MyHTML_TAG_U:
151
+ case MyHTML_TAG_EM:
152
+ if (ansi) rendered += afmt_s + " 4" + afmt_e;
153
+ if (md) rendered += " _" ;
154
+ break ;
155
+ case MyHTML_TAG_CODE: // code on
156
+ if (node_in (node_iter, MyHTML_TAG_PRE)){
157
+ rendered += " ```\n " ;
158
+ }else {
159
+ if (ansi) rendered += afmt_s + " 7" + afmt_e;
160
+ if (md) rendered += " `" ;
161
+ }
162
+ break ;
163
+ }
164
+ }
165
+ switch (node_iter->tag_id ){ // global formatters
166
+ case MyHTML_TAG_LI:
167
+ rendered += " - " ;
168
+ break ;
169
+ }
170
+ },
171
+ [](myhtml_tree_node_t * node_iter, string &rendered){
172
+ if (state[" modearg" ].length () > 0 ){
173
+ const bool ansi = state[" modearg" ] == " ansi" ;
174
+ const bool md = state[" modearg" ] == " md" ;
175
+ switch (node_iter->tag_id ){ // modearg unformatters
176
+ case MyHTML_TAG_B: // bold off
177
+ case MyHTML_TAG_STRONG:
178
+ if (ansi) rendered += afmt_s + " 21" + afmt_e;
179
+ if (md) rendered += " **" ;
180
+ break ;
181
+ case MyHTML_TAG_I: // italics off
182
+ case MyHTML_TAG_U:
183
+ case MyHTML_TAG_EM:
184
+ if (ansi) rendered += afmt_s + " 24" + afmt_e; // no italics here :(
185
+ if (md) rendered += " _" ;
186
+ break ;
187
+ case MyHTML_TAG_CODE: // code off
188
+ if (node_in (node_iter, MyHTML_TAG_PRE)){
189
+ rendered += " ```\n " ;
190
+ }else {
191
+ if (ansi) rendered += afmt_s + " 27" + afmt_e;
192
+ if (md) rendered += " `" ;
193
+ }
194
+ break ;
195
+ }
196
+ }
197
+ switch (node_iter->tag_id ){ // global unformatters
198
+ case MyHTML_TAG_LI:
199
+ case MyHTML_TAG_UL:
200
+ rendered += " \n " ;
201
+ break ;
202
+ }
203
+
204
+ if (vec_has (breaking, node_iter->tag_id )){ // <br/>
205
+ rendered += " \n " ;
206
+ }
207
+ }
208
+ };
209
+
118
210
static map<const string, const function<void (myhtml_tree_node_t *)>> mode_handlers = { // maps modes to functions
119
211
{" data" , [](myhtml_tree_node_t * node) {
120
212
myhtml_serialization_tree_callback (node, [](const char * data, size_t len, void * ctx) -> unsigned int {
@@ -127,42 +219,41 @@ static map<const string, const function<void(myhtml_tree_node_t*)>> mode_handler
127
219
{" text" , [](myhtml_tree_node_t * node) {
128
220
string rendered = " " ;
129
221
130
- static vector<char > collapsible = {' ' , ' \t ' , ' \n ' , ' \r ' };
131
- static vector<unsigned long > breaking = {
132
- MyHTML_TAG_BR,
133
- MyHTML_TAG_P
134
- };
135
-
136
222
myhtml_tree_node_t * node_iter = node->child ;
137
223
while (node_iter){
138
224
const char * text_c = myhtml_node_text (node_iter, nullptr );
139
225
string text = " " ;
140
226
if (text_c != nullptr ) text += text_c;
141
227
142
228
if (node_iter->tag_id == MyHTML_TAG__TEXT){
143
- // collapse whitespace to single character
144
- string::iterator nend = unique (text.begin (), text.end (), [](char c1, char c2) -> bool {
145
- return vec_has (collapsible, c1) && vec_has (collapsible, c2);
146
- });
147
- text.resize (static_cast <unsigned long >(nend-text.begin ()));
148
-
149
- // replace whitespace with space
150
- replace_if (text.begin (), text.end (), [](char c) -> bool {
151
- return vec_has (collapsible, c);
152
- }, ' ' );
229
+ if (!node_in (node_iter, MyHTML_TAG_PRE)){
230
+ // collapse whitespace to single character
231
+ string::iterator nend = unique (text.begin (), text.end (), [](char c1, char c2) -> bool {
232
+ return vec_has (collapsible, c1) && vec_has (collapsible, c2);
233
+ });
234
+ text.resize (static_cast <unsigned long >(nend-text.begin ()));
235
+
236
+ // replace whitespace with space
237
+ replace_if (text.begin (), text.end (), [](char c) -> bool {
238
+ return vec_has (collapsible, c);
239
+ }, ' ' );
240
+ }
153
241
154
242
rendered += text;
243
+ }else {
244
+ format_handlers.first (node_iter, rendered);
155
245
}
156
246
157
247
if (node_iter->child ) node_iter = node_iter->child ;
158
248
else {
159
- while (node_iter != node && node_iter->next == nullptr ) node_iter = node_iter-> parent ;
160
- if (node_iter == node) break ;
249
+ while (node_iter != node && node_iter->next == nullptr ){
250
+ format_handlers. second (node_iter, rendered) ;
161
251
162
- if (vec_has (breaking, node_iter->tag_id )){ // <br/>
163
- rendered += " \n " ;
252
+ node_iter = node_iter->parent ;
164
253
}
254
+ if (node_iter == node) break ;
165
255
256
+ format_handlers.second (node_iter, rendered);
166
257
node_iter = node_iter->next ;
167
258
}
168
259
}
@@ -228,7 +319,7 @@ void parseopts(int &argc, const char** &argv){
228
319
cerr << " invalid short option '-" << argv[1 ][0 ] << " '" << endl;
229
320
exit (EXIT_FAILURE);
230
321
}
231
- if (flags[" dirtyargs" ]){
322
+ if (flags[" dirtyargs" ]){ // option handler touched argv (args?); skip
232
323
flags[" dirtyargs" ] = false ;
233
324
break ;
234
325
}
0 commit comments