@@ -14,7 +14,7 @@ import TabItem from '@theme/TabItem';
14
14
15
15
---
16
16
17
- Now that we know how to execute scripts on a page, we're ready to learn a bit about [ data extraction] ( ../../scraping_basics_javascript/data_extraction/index.md ) . In this lesson, we'll be scraping all the on-sale products from our [ Fakestore ] ( https://demo-webstore.apify.org/search/on-sale ) website.
17
+ Now that we know how to execute scripts on a page, we're ready to learn a bit about [ data extraction] ( ../../scraping_basics_javascript/data_extraction/index.md ) . In this lesson, we'll be scraping all the on-sale products from [ warehouse-theme-metal.myshopify.com ] ( https://warehouse-theme-metal.myshopify.com/ ) , a sample Shopify website.
18
18
19
19
> Most web data extraction cases involve looping through a list of items of some sort.
20
20
@@ -36,7 +36,7 @@ import { chromium } from 'playwright';
36
36
const browser = await chromium .launch ({ headless: false });
37
37
const page = await browser .newPage ();
38
38
39
- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
39
+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
40
40
41
41
// code will go here
42
42
@@ -54,7 +54,7 @@ import puppeteer from 'puppeteer';
54
54
const browser = await puppeteer .launch ({ headless: false });
55
55
const page = await browser .newPage ();
56
56
57
- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
57
+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
58
58
59
59
// code will go here
60
60
@@ -82,16 +82,12 @@ We'll be returning a bunch of product objects from this function, which will be
82
82
83
83
``` js
84
84
const products = await page .evaluate (() => {
85
- const productCards = Array .from (document .querySelectorAll (' a[class*="ProductCard_root"] ' ));
85
+ const productCards = Array .from (document .querySelectorAll (' .product-item ' ));
86
86
87
87
return productCards .map ((element ) => {
88
- const name = element .querySelector (' h3[class*="ProductCard_name"]' ).textContent ;
89
- const price = element .querySelector (' div[class*="ProductCard_price"]' ).textContent ;
90
-
91
- return {
92
- name,
93
- price,
94
- };
88
+ const name = element .querySelector (' .product-item__title' ).textContent ;
89
+ const price = element .querySelector (' .price' ).lastChild .textContent ;
90
+ return { name, price };
95
91
});
96
92
});
97
93
@@ -100,7 +96,20 @@ console.log(products);
100
96
101
97
When we run this code, we see this logged to our console:
102
98
103
- ![ Products logged to the console] ( ./images/log-products.png )
99
+ ``` text
100
+ $ node index.js
101
+ [
102
+ {
103
+ name: 'JBL Flip 4 Waterproof Portable Bluetooth Speaker',
104
+ price: '$74.95'
105
+ },
106
+ {
107
+ name: 'Sony XBR-950G BRAVIA 4K HDR Ultra HD TV',
108
+ price: 'From $1,398.00'
109
+ },
110
+ ...
111
+ ]
112
+ ```
104
113
105
114
## Using jQuery {#using-jquery}
106
115
@@ -118,19 +127,12 @@ Now, since we're able to use jQuery, let's translate our vanilla JavaScript code
118
127
await page .addScriptTag ({ url: ' https://code.jquery.com/jquery-3.6.0.min.js' });
119
128
120
129
const products = await page .evaluate (() => {
121
- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
122
-
123
- return productCards .map ((element ) => {
124
- const card = $ (element);
125
-
126
- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
127
- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
128
-
129
- return {
130
- name,
131
- price,
132
- };
133
- });
130
+ return Array .from ($ (' .product-item' ).map (function () {
131
+ const card = $ (this );
132
+ const name = card .find (' .product-item__title' ).text ();
133
+ const price = card .find (' .price' ).contents ().last ().text ();
134
+ return { name, price };
135
+ }));
134
136
});
135
137
136
138
console .log (products);
@@ -178,7 +180,7 @@ import { load } from 'cheerio';
178
180
const browser = await chromium .launch ({ headless: false });
179
181
const page = await browser .newPage ();
180
182
181
- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
183
+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
182
184
183
185
const $ = load (await page .content ());
184
186
@@ -197,7 +199,7 @@ import { load } from 'cheerio';
197
199
const browser = await puppeteer .launch ({ headless: false });
198
200
const page = await browser .newPage ();
199
201
200
- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
202
+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
201
203
202
204
const $ = load (await page .content ());
203
205
@@ -214,19 +216,12 @@ Now, to loop through all of the products, we'll make use of the `$` object and l
214
216
``` js
215
217
const $ = load (await page .content ());
216
218
217
- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
218
-
219
- const products = productCards .map ((element ) => {
220
- const card = $ (element);
221
-
222
- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
223
- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
224
-
225
- return {
226
- name,
227
- price,
228
- };
229
- });
219
+ const products = Array .from ($ (' .product-item' ).map (function () {
220
+ const card = $ (this );
221
+ const name = card .find (' .product-item__title' ).text ();
222
+ const price = card .find (' .price' ).contents ().last ().text ();
223
+ return { name, price };
224
+ }));
230
225
231
226
console .log (products);
232
227
```
@@ -245,23 +240,16 @@ import { load } from 'cheerio';
245
240
const browser = await chromium .launch ({ headless: false });
246
241
const page = await browser .newPage ();
247
242
248
- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
243
+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
249
244
250
245
const $ = load (await page .content ());
251
246
252
- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
253
-
254
- const products = productCards .map ((element ) => {
255
- const card = $ (element);
256
-
257
- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
258
- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
259
-
260
- return {
261
- name,
262
- price,
263
- };
264
- });
247
+ const products = Array .from ($ (' .product-item' ).map (function () {
248
+ const card = $ (this );
249
+ const name = card .find (' .product-item__title' ).text ();
250
+ const price = card .find (' .price' ).contents ().last ().text ();
251
+ return { name, price };
252
+ }));
265
253
266
254
console .log (products);
267
255
@@ -278,23 +266,16 @@ import { load } from 'cheerio';
278
266
const browser = await puppeteer .launch ({ headless: false });
279
267
const page = await browser .newPage ();
280
268
281
- await page .goto (' https://demo-webstore.apify.org/search/on-sale ' );
269
+ await page .goto (' https://warehouse-theme-metal.myshopify.com/collections/sales ' );
282
270
283
271
const $ = load (await page .content ());
284
272
285
- const productCards = Array .from ($ (' a[class*="ProductCard_root"]' ));
286
-
287
- const products = productCards .map ((element ) => {
288
- const card = $ (element);
289
-
290
- const name = card .find (' h3[class*="ProductCard_name"]' ).text ();
291
- const price = card .find (' div[class*="ProductCard_price"]' ).text ();
292
-
293
- return {
294
- name,
295
- price,
296
- };
297
- });
273
+ const products = Array .from ($ (' .product-item' ).map (function () {
274
+ const card = $ (this );
275
+ const name = card .find (' .product-item__title' ).text ();
276
+ const price = card .find (' .price' ).contents ().last ().text ();
277
+ return { name, price };
278
+ }));
298
279
299
280
console .log (products);
300
281
0 commit comments