Giter Club home page Giter Club logo

Comments (2)

Sparkier avatar Sparkier commented on August 22, 2024 1

Hi @mikelonestone, the attached example shows how you can use the rag view and add data. Let me know if you have additional questions.
document-qa-results.json

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from zeno_client import ZenoClient, ZenoMetric\n",
    "import pandas as pd\n",
    "import json\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv(override=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"document-qa-results.json\", \"r\") as f:\n",
    "    data = json.load(f)\n",
    "data_df = pd.DataFrame({\"question\": [d[\"data\"] for d in data]})\n",
    "data_df[\"id\"] = data_df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = ZenoClient(os.environ[\"ZENO_API_KEY\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Successfully updated project.\n",
      "Access your project at  https://hub.zenoml.com/project/cabreraalex/Document%20QA\n"
     ]
    }
   ],
   "source": [
    "project = client.create_project(\n",
    "    name=\"Document QA\",\n",
    "    view={\n",
    "        \"data\": {\"type\": \"text\"},\n",
    "        \"label\": {\"type\": \"text\"},\n",
    "        \"output\": {\n",
    "            \"type\": \"vstack\",\n",
    "            \"keys\": {\n",
    "                \"answer\": {\"type\": \"text\"},\n",
    "                \"retrieved\": {\n",
    "                    \"type\": \"list\",\n",
    "                    \"elements\": {\n",
    "                        \"type\": \"vstack\",\n",
    "                        \"keys\": {\n",
    "                            \"score\": {\"type\": \"text\", \"label\": \"score: \"},\n",
    "                            \"reference\": {\"type\": \"markdown\"},\n",
    "                            \"text\": {\"type\": \"text\", \"label\": \"text: \"},\n",
    "                        },\n",
    "                    },\n",
    "                    \"border\": True,\n",
    "                },\n",
    "            },\n",
    "        },\n",
    "    },\n",
    "    description=\"Document-grounded question answering with Wikipedia\",\n",
    "    metrics=[\n",
    "        ZenoMetric(name=\"accuracy\", type=\"mean\", columns=[\"accuracy\"]),\n",
    "        ZenoMetric(name=\"exact_match\", type=\"mean\", columns=[\"exact_match\"]),\n",
    "        ZenoMetric(name=\"substring_match\", type=\"mean\", columns=[\"substring_match\"]),\n",
    "        ZenoMetric(name=\"f1\", type=\"mean\", columns=[\"f1\"]),\n",
    "        ZenoMetric(name=\"rougel\", type=\"mean\", columns=[\"rougel\"]),\n",
    "    ],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "project.upload_dataset(data_df, id_column=\"id\", data_column=\"question\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_df = pd.DataFrame(\n",
    "    {\n",
    "        \"output\": [\n",
    "            json.dumps(\n",
    "                {\n",
    "                    \"answer\": d[\"output\"][0][\"answer\"],\n",
    "                    \"retrieved\": [\n",
    "                        {\n",
    "                            \"reference\": \"[{idx}]({url})\".format(\n",
    "                                idx=d[\"output\"][0][\"retrieved\"][0][\"reference\"],\n",
    "                                url=\"https://en.wikipedia.org/?curid=\"\n",
    "                                + d[\"output\"][0][\"retrieved\"][0][\"reference\"],\n",
    "                            ),\n",
    "                            \"text\": d[\"output\"][0][\"retrieved\"][0][\"text\"],\n",
    "                            \"score\": d[\"output\"][0][\"retrieved\"][0][\"score\"],\n",
    "                        }\n",
    "                    ],\n",
    "                }\n",
    "            )\n",
    "            for d in data\n",
    "        ],\n",
    "        \"accuracy\": [d[\"output\"][0][\"answer_evaluation\"][\"accuracy\"] for d in data],\n",
    "        \"exact_match\": [\n",
    "            d[\"output\"][0][\"answer_evaluation\"][\"exact_match\"] for d in data\n",
    "        ],\n",
    "        \"substring_match\": [\n",
    "            d[\"output\"][0][\"answer_evaluation\"][\"substring_match\"] for d in data\n",
    "        ],\n",
    "        \"f1\": [d[\"output\"][0][\"answer_evaluation\"][\"f1\"] for d in data],\n",
    "        \"rougel\": [d[\"output\"][0][\"answer_evaluation\"][\"rougel\"] for d in data],\n",
    "    }\n",
    ")\n",
    "output_df[\"id\"] = output_df.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "project.upload_system(\n",
    "    output_df, name=\"Llama-2 BM25\", id_column=\"id\", output_column=\"output\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "compare",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

from zeno-client.

mikelonestone avatar mikelonestone commented on August 22, 2024

sorry i forgot to close the issue, I just had to delete/recreate project (before that the data view specification wasn't used and data was displayed as text instead to be formated)

from zeno-client.

Related Issues (7)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.