|
27 | 27 | },
|
28 | 28 | {
|
29 | 29 | "cell_type": "code",
|
30 |
| - "execution_count": 1, |
| 30 | + "execution_count": 5, |
31 | 31 | "metadata": {},
|
32 | 32 | "outputs": [],
|
33 | 33 | "source": [
|
|
311 | 311 | "metadata": {},
|
312 | 312 | "source": [
|
313 | 313 | "### Working with JSON\n",
|
314 |
| - "Redis also supports native **JSON** objects. These can be multi-level (nested) objects, with full JSONPath support for updating/retrieving sub elements:\n", |
315 |
| - "\n", |
316 |
| - "```python\n", |
317 |
| - "{\n", |
318 |
| - " \"name\": \"bike\",\n", |
319 |
| - " \"metadata\": {\n", |
320 |
| - " \"model\": \"Deimos\",\n", |
321 |
| - " \"brand\": \"Ergonom\",\n", |
322 |
| - " \"type\": \"Enduro bikes\",\n", |
323 |
| - " \"price\": 4972,\n", |
324 |
| - " }\n", |
325 |
| - "}\n", |
326 |
| - "```\n", |
327 | 314 | "\n",
|
328 | 315 | "JSON is best suited for use cases with the following characteristics:\n",
|
329 | 316 | "- Ease of use and data model flexibility are top concerns\n",
|
330 | 317 | "- Application data is already native JSON\n",
|
331 | 318 | "- Replacing another document storage/db solution"
|
332 | 319 | ]
|
333 | 320 | },
|
334 |
| - { |
335 |
| - "cell_type": "markdown", |
336 |
| - "metadata": {}, |
337 |
| - "source": [ |
338 |
| - "#### Full JSON Path support\n", |
339 |
| - "Because Redis enables full JSON path support, when creating an index schema, elements need to be indexed and selected by their path with the desired `name` AND `path` that points to where the data is located within the objects.\n", |
340 |
| - "\n", |
341 |
| - "> By default, RedisVL will assume the path as `$.{name}` if not provided in JSON fields schema." |
342 |
| - ] |
343 |
| - }, |
344 | 321 | {
|
345 | 322 | "cell_type": "code",
|
346 | 323 | "execution_count": 11,
|
|
505 | 482 | "source": [
|
506 | 483 | "jindex.delete()"
|
507 | 484 | ]
|
| 485 | + }, |
| 486 | + { |
| 487 | + "cell_type": "markdown", |
| 488 | + "metadata": {}, |
| 489 | + "source": [ |
| 490 | + "# Working with nested data in JSON\n", |
| 491 | + "\n", |
| 492 | + "Redis also supports native **JSON** objects. These can be multi-level (nested) objects, with full JSONPath support for updating/retrieving sub elements:\n", |
| 493 | + "\n", |
| 494 | + "```json\n", |
| 495 | + "{\n", |
| 496 | + " \"name\": \"Specialized Stump jumper\",\n", |
| 497 | + " \"metadata\": {\n", |
| 498 | + " \"model\": \"Stumpjumper\",\n", |
| 499 | + " \"brand\": \"Specialized\",\n", |
| 500 | + " \"type\": \"Enduro bikes\",\n", |
| 501 | + " \"price\": 3000\n", |
| 502 | + " },\n", |
| 503 | + "}\n", |
| 504 | + "```\n", |
| 505 | + "\n", |
| 506 | + "#### Full JSON Path support\n", |
| 507 | + "Because Redis enables full JSON path support, when creating an index schema, elements need to be indexed and selected by their path with the desired `name` AND `path` that points to where the data is located within the objects.\n", |
| 508 | + "\n", |
| 509 | + "> By default, RedisVL will assume the path as `$.{name}` if not provided in JSON fields schema. If nested provide path as `$.object.attribute`\n", |
| 510 | + "\n", |
| 511 | + "### As an example:" |
| 512 | + ] |
| 513 | + }, |
| 514 | + { |
| 515 | + "cell_type": "code", |
| 516 | + "execution_count": 45, |
| 517 | + "metadata": {}, |
| 518 | + "outputs": [ |
| 519 | + { |
| 520 | + "name": "stderr", |
| 521 | + "output_type": "stream", |
| 522 | + "text": [ |
| 523 | + "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", |
| 524 | + " warnings.warn(\n" |
| 525 | + ] |
| 526 | + } |
| 527 | + ], |
| 528 | + "source": [ |
| 529 | + "from redisvl.utils.vectorize import HFTextVectorizer\n", |
| 530 | + "\n", |
| 531 | + "emb_model = HFTextVectorizer()\n", |
| 532 | + "\n", |
| 533 | + "bike_data = [\n", |
| 534 | + " {\n", |
| 535 | + " \"name\": \"Specialized Stump jumper\",\n", |
| 536 | + " \"metadata\": {\n", |
| 537 | + " \"model\": \"Stumpjumper\",\n", |
| 538 | + " \"brand\": \"Specialized\",\n", |
| 539 | + " \"type\": \"Enduro bikes\",\n", |
| 540 | + " \"price\": 3000\n", |
| 541 | + " },\n", |
| 542 | + " \"description\": \"The Specialized Stumpjumper is a versatile enduro bike that dominates both climbs and descents. Features a FACT 11m carbon fiber frame, FOX FLOAT suspension with 160mm travel, and SRAM X01 Eagle drivetrain. The asymmetric frame design and internal storage compartment make it a practical choice for all-day adventures.\"\n", |
| 543 | + " },\n", |
| 544 | + " {\n", |
| 545 | + " \"name\": \"bike_2\",\n", |
| 546 | + " \"metadata\": {\n", |
| 547 | + " \"model\": \"Slash\",\n", |
| 548 | + " \"brand\": \"Trek\",\n", |
| 549 | + " \"type\": \"Enduro bikes\",\n", |
| 550 | + " \"price\": 5000\n", |
| 551 | + " },\n", |
| 552 | + " \"description\": \"Trek's Slash is built for aggressive enduro riding and racing. Featuring Trek's Alpha Aluminum frame with RE:aktiv suspension technology, 160mm travel, and Knock Block frame protection. Equipped with Bontrager components and a Shimano XT drivetrain, this bike excels on technical trails and enduro race courses.\"\n", |
| 553 | + " }\n", |
| 554 | + "]\n", |
| 555 | + "\n", |
| 556 | + "bike_data = [{**d, \"bike_embedding\": emb_model.embed(d[\"description\"])} for d in bike_data]\n", |
| 557 | + "\n", |
| 558 | + "bike_schema = {\n", |
| 559 | + " \"index\": {\n", |
| 560 | + " \"name\": \"bike-json\",\n", |
| 561 | + " \"prefix\": \"bike-json\",\n", |
| 562 | + " \"storage_type\": \"json\", # JSON storage type\n", |
| 563 | + " },\n", |
| 564 | + " \"fields\": [\n", |
| 565 | + " {\n", |
| 566 | + " \"name\": \"model\",\n", |
| 567 | + " \"type\": \"tag\",\n", |
| 568 | + " \"path\": \"$.metadata.model\" # note the '$'\n", |
| 569 | + " },\n", |
| 570 | + " {\n", |
| 571 | + " \"name\": \"brand\",\n", |
| 572 | + " \"type\": \"tag\",\n", |
| 573 | + " \"path\": \"$.metadata.brand\"\n", |
| 574 | + " },\n", |
| 575 | + " {\n", |
| 576 | + " \"name\": \"price\",\n", |
| 577 | + " \"type\": \"numeric\",\n", |
| 578 | + " \"path\": \"$.metadata.price\"\n", |
| 579 | + " },\n", |
| 580 | + " {\n", |
| 581 | + " \"name\": \"bike_embedding\",\n", |
| 582 | + " \"type\": \"vector\",\n", |
| 583 | + " \"attrs\": {\n", |
| 584 | + " \"dims\": len(bike_data[0][\"bike_embedding\"]),\n", |
| 585 | + " \"distance_metric\": \"cosine\",\n", |
| 586 | + " \"algorithm\": \"flat\",\n", |
| 587 | + " \"datatype\": \"float32\"\n", |
| 588 | + " }\n", |
| 589 | + "\n", |
| 590 | + " }\n", |
| 591 | + " ],\n", |
| 592 | + "}" |
| 593 | + ] |
| 594 | + }, |
| 595 | + { |
| 596 | + "cell_type": "code", |
| 597 | + "execution_count": 46, |
| 598 | + "metadata": {}, |
| 599 | + "outputs": [], |
| 600 | + "source": [ |
| 601 | + "# construct a search index from the json schema\n", |
| 602 | + "bike_index = SearchIndex.from_dict(bike_schema)\n", |
| 603 | + "\n", |
| 604 | + "# connect to local redis instance\n", |
| 605 | + "bike_index.connect(\"redis://localhost:6379\")\n", |
| 606 | + "\n", |
| 607 | + "# create the index (no data yet)\n", |
| 608 | + "bike_index.create(overwrite=True)" |
| 609 | + ] |
| 610 | + }, |
| 611 | + { |
| 612 | + "cell_type": "code", |
| 613 | + "execution_count": 47, |
| 614 | + "metadata": {}, |
| 615 | + "outputs": [ |
| 616 | + { |
| 617 | + "data": { |
| 618 | + "text/plain": [ |
| 619 | + "['bike-json:de92cb9955434575b20f4e87a30b03d5',\n", |
| 620 | + " 'bike-json:054ab3718b984532b924946fa5ce00c6']" |
| 621 | + ] |
| 622 | + }, |
| 623 | + "execution_count": 47, |
| 624 | + "metadata": {}, |
| 625 | + "output_type": "execute_result" |
| 626 | + } |
| 627 | + ], |
| 628 | + "source": [ |
| 629 | + "bike_index.load(bike_data)" |
| 630 | + ] |
| 631 | + }, |
| 632 | + { |
| 633 | + "cell_type": "code", |
| 634 | + "execution_count": 48, |
| 635 | + "metadata": {}, |
| 636 | + "outputs": [], |
| 637 | + "source": [ |
| 638 | + "from redisvl.query import VectorQuery\n", |
| 639 | + "\n", |
| 640 | + "vec = emb_model.embed(\"I'd like a bike for aggressive riding\")\n", |
| 641 | + "\n", |
| 642 | + "v = VectorQuery(vector=vec,\n", |
| 643 | + " vector_field_name=\"bike_embedding\",\n", |
| 644 | + " return_fields=[\n", |
| 645 | + " \"brand\",\n", |
| 646 | + " \"name\",\n", |
| 647 | + " \"$.metadata.type\"\n", |
| 648 | + " ]\n", |
| 649 | + " )\n", |
| 650 | + "\n", |
| 651 | + "\n", |
| 652 | + "results = bike_index.query(v)" |
| 653 | + ] |
| 654 | + }, |
| 655 | + { |
| 656 | + "cell_type": "markdown", |
| 657 | + "metadata": {}, |
| 658 | + "source": [ |
| 659 | + "**Note:** As shown in the example if you want to retrieve a field from json object that was not indexed you will also need to supply the full path as with `$.metadata.type`." |
| 660 | + ] |
| 661 | + }, |
| 662 | + { |
| 663 | + "cell_type": "code", |
| 664 | + "execution_count": 49, |
| 665 | + "metadata": {}, |
| 666 | + "outputs": [ |
| 667 | + { |
| 668 | + "data": { |
| 669 | + "text/plain": [ |
| 670 | + "[{'id': 'bike-json:054ab3718b984532b924946fa5ce00c6',\n", |
| 671 | + " 'vector_distance': '0.519989073277',\n", |
| 672 | + " 'brand': 'Trek',\n", |
| 673 | + " '$.metadata.type': 'Enduro bikes'},\n", |
| 674 | + " {'id': 'bike-json:de92cb9955434575b20f4e87a30b03d5',\n", |
| 675 | + " 'vector_distance': '0.657624483109',\n", |
| 676 | + " 'brand': 'Specialized',\n", |
| 677 | + " '$.metadata.type': 'Enduro bikes'}]" |
| 678 | + ] |
| 679 | + }, |
| 680 | + "execution_count": 49, |
| 681 | + "metadata": {}, |
| 682 | + "output_type": "execute_result" |
| 683 | + } |
| 684 | + ], |
| 685 | + "source": [ |
| 686 | + "results" |
| 687 | + ] |
| 688 | + }, |
| 689 | + { |
| 690 | + "cell_type": "markdown", |
| 691 | + "metadata": {}, |
| 692 | + "source": [ |
| 693 | + "# Cleanup" |
| 694 | + ] |
| 695 | + }, |
| 696 | + { |
| 697 | + "cell_type": "code", |
| 698 | + "execution_count": 44, |
| 699 | + "metadata": {}, |
| 700 | + "outputs": [], |
| 701 | + "source": [ |
| 702 | + "bike_index.delete()" |
| 703 | + ] |
508 | 704 | }
|
509 | 705 | ],
|
510 | 706 | "metadata": {
|
511 | 707 | "kernelspec": {
|
512 |
| - "display_name": "Python 3.8.13 ('redisvl2')", |
| 708 | + "display_name": "Python 3", |
513 | 709 | "language": "python",
|
514 | 710 | "name": "python3"
|
515 | 711 | },
|
|
523 | 719 | "name": "python",
|
524 | 720 | "nbconvert_exporter": "python",
|
525 | 721 | "pygments_lexer": "ipython3",
|
526 |
| - "version": "3.9.12" |
| 722 | + "version": "3.11.9" |
527 | 723 | },
|
528 |
| - "orig_nbformat": 4, |
529 |
| - "vscode": { |
530 |
| - "interpreter": { |
531 |
| - "hash": "9b1e6e9c2967143209c2f955cb869d1d3234f92dc4787f49f155f3abbdfb1316" |
532 |
| - } |
533 |
| - } |
| 724 | + "orig_nbformat": 4 |
534 | 725 | },
|
535 | 726 | "nbformat": 4,
|
536 | 727 | "nbformat_minor": 2
|
|
0 commit comments