Is it possible to have an example of upload_system with rag view data example ? <a

Hi <a class="user-mention notranslate" data-hovercard-type="user" data-hovercard-url="

upload_system with rag view about zeno-client HOT 2 CLOSED

mikelonestone commented on August 22, 2024

upload_system with rag view

from zeno-client.

Comments (2)

Sparkier commented on August 22, 2024 1

Hi @mikelonestone, the attached example shows how you can use the rag view and add data. Let me know if you have additional questions.
document-qa-results.json

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from zeno_client import ZenoClient, ZenoMetric\n",
    "import pandas as pd\n",
    "import json\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv(override=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"document-qa-results.json\", \"r\") as f:\n",
    "    data = json.load(f)\n",
    "data_df = pd.DataFrame({\"question\": [d[\"data\"] for d in data]})\n",
    "data_df[\"id\"] = data_df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = ZenoClient(os.environ[\"ZENO_API_KEY\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Successfully updated project.\n",
      "Access your project at  https://hub.zenoml.com/project/cabreraalex/Document%20QA\n"
     ]
    }
   ],
   "source": [
    "project = client.create_project(\n",
    "    name=\"Document QA\",\n",
    "    view={\n",
    "        \"data\": {\"type\": \"text\"},\n",
    "        \"label\": {\"type\": \"text\"},\n",
    "        \"output\": {\n",
    "            \"type\": \"vstack\",\n",
    "            \"keys\": {\n",
    "                \"answer\": {\"type\": \"text\"},\n",
    "                \"retrieved\": {\n",
    "                    \"type\": \"list\",\n",
    "                    \"elements\": {\n",
    "                        \"type\": \"vstack\",\n",
    "                        \"keys\": {\n",
    "                            \"score\": {\"type\": \"text\", \"label\": \"score: \"},\n",
    "                            \"reference\": {\"type\": \"markdown\"},\n",
    "                            \"text\": {\"type\": \"text\", \"label\": \"text: \"},\n",
    "                        },\n",
    "                    },\n",
    "                    \"border\": True,\n",
    "                },\n",
    "            },\n",
    "        },\n",
    "    },\n",
    "    description=\"Document-grounded question answering with Wikipedia\",\n",
    "    metrics=[\n",
    "        ZenoMetric(name=\"accuracy\", type=\"mean\", columns=[\"accuracy\"]),\n",
    "        ZenoMetric(name=\"exact_match\", type=\"mean\", columns=[\"exact_match\"]),\n",
    "        ZenoMetric(name=\"substring_match\", type=\"mean\", columns=[\"substring_match\"]),\n",
    "        ZenoMetric(name=\"f1\", type=\"mean\", columns=[\"f1\"]),\n",
    "        ZenoMetric(name=\"rougel\", type=\"mean\", columns=[\"rougel\"]),\n",
    "    ],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "project.upload_dataset(data_df, id_column=\"id\", data_column=\"question\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_df = pd.DataFrame(\n",
    "    {\n",
    "        \"output\": [\n",
    "            json.dumps(\n",
    "                {\n",
    "                    \"answer\": d[\"output\"][0][\"answer\"],\n",
    "                    \"retrieved\": [\n",
    "                        {\n",
    "                            \"reference\": \"[{idx}]({url})\".format(\n",
    "                                idx=d[\"output\"][0][\"retrieved\"][0][\"reference\"],\n",
    "                                url=\"https://en.wikipedia.org/?curid=\"\n",
    "                                + d[\"output\"][0][\"retrieved\"][0][\"reference\"],\n",
    "                            ),\n",
    "                            \"text\": d[\"output\"][0][\"retrieved\"][0][\"text\"],\n",
    "                            \"score\": d[\"output\"][0][\"retrieved\"][0][\"score\"],\n",
    "                        }\n",
    "                    ],\n",
    "                }\n",
    "            )\n",
    "            for d in data\n",
    "        ],\n",
    "        \"accuracy\": [d[\"output\"][0][\"answer_evaluation\"][\"accuracy\"] for d in data],\n",
    "        \"exact_match\": [\n",
    "            d[\"output\"][0][\"answer_evaluation\"][\"exact_match\"] for d in data\n",
    "        ],\n",
    "        \"substring_match\": [\n",
    "            d[\"output\"][0][\"answer_evaluation\"][\"substring_match\"] for d in data\n",
    "        ],\n",
    "        \"f1\": [d[\"output\"][0][\"answer_evaluation\"][\"f1\"] for d in data],\n",
    "        \"rougel\": [d[\"output\"][0][\"answer_evaluation\"][\"rougel\"] for d in data],\n",
    "    }\n",
    ")\n",
    "output_df[\"id\"] = output_df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "project.upload_system(\n",
    "    output_df, name=\"Llama-2 BM25\", id_column=\"id\", output_column=\"output\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "compare",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

from zeno-client.

mikelonestone commented on August 22, 2024

sorry i forgot to close the issue, I just had to delete/recreate project (before that the data view specification wasn't used and data was displayed as text instead to be formated)

from zeno-client.

upload_system with rag view about zeno-client HOT 2 CLOSED

Comments (2)

Related Issues (7)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent