Setup Superlinked

In a notebook

Install the superlinked library:

%pip install superlinked

As a script

Ensure your python version is at least 3.10.x but not newer than 3.12.x.

$> python -V
Python 3.10.9

If your python version is not >=3.10 and <=3.12 you might use pyenv to install it.

Upgrade pip and install the superlinked library.

$> python -m pip install --upgrade pip
$> python -m pip install superlinked

Run the example

First run will take slightly longer as it has to download the embedding model.

import json

from superlinked.framework.common.embedding.number_embedding import Mode
from superlinked.framework.common.nlq.open_ai import OpenAIClientConfig
from superlinked.framework.common.parser.dataframe_parser import DataFrameParser
from superlinked.framework.common.schema.schema import schema
from superlinked.framework.common.schema.schema_object import Integer, String
from superlinked.framework.common.schema.id_schema_object import IdField
from superlinked.framework.dsl.space.number_space import NumberSpace
from superlinked.framework.dsl.space.text_similarity_space import TextSimilaritySpace
from superlinked.framework.dsl.index.index import Index
from superlinked.framework.dsl.query.param import Param
from superlinked.framework.dsl.query.query import Query
from superlinked.framework.dsl.source.in_memory_source import InMemorySource
from superlinked.framework.dsl.executor.in_memory.in_memory_executor import (
    InMemoryExecutor,
)


@schema
class Review:
    id: IdField
    review_text: String
    rating: Integer


review = Review()

review_text_space = TextSimilaritySpace(
    text=review.review_text, model="Alibaba-NLP/gte-large-en-v1.5"
)
rating_maximizer_space = NumberSpace(
    number=review.rating, min_value=1, max_value=5, mode=Mode.MAXIMUM
)
index = Index([review_text_space, rating_maximizer_space], fields=[review.rating])

# fill this with your API key - this will drive param extraction
openai_config = OpenAIClientConfig(api_key="YOUR_OPENAI_API_KEY", model="gpt-4o")

# it is possible now to add descriptions to a `Param` to aid the parsing of information from natural language queries.
text_similar_param = Param(
    "query_text",
    description="The text in the user's query that is used to search in the reviews' body. Extract info that does apply to other spaces or params.",
)

# Define your query using dynamic parameters for query text and weights.
# we will have our LLM fill them based on our natural language query
query = (
    Query(
        index,
        weights={
            review_text_space: Param("review_text_weight"),
            rating_maximizer_space: Param("rating_maximizer_weight"),
        },
    )
    .find(review)
    .similar(
        review_text_space.text,
        text_similar_param,
    )
    .limit(Param("limit"))
    .with_natural_query(Param("natural_query"), openai_config)
)

# Run the app.
source: InMemorySource = InMemorySource(review)
executor = InMemoryExecutor(sources=[source], indices=[index])
app = executor.run()

# Download dataset.
data = [
    {"id": 1, "review_text": "Useless product", "rating": 1},
    {"id": 2, "review_text": "Great product I am so happy!", "rating": 5},
    {"id": 3, "review_text": "Mediocre stuff fits the purpose", "rating": 3},
]

# Ingest data to the framework.
source.put(data)

result = app.query(query, natural_query="Show me the best product", limit=1)

# examine the extracted parameters from your query
print(json.dumps(result.knn_params, indent=2))
# the result is the 5 star rated product
result.to_pandas()

Last updated