Install the superlinked library:
Copy
%pip install superlinked
Run the example
First run will take slightly longer as it has to download the embedding model.
Example.py
Copy
import json
import os
from superlinked import framework as sl
class Product(sl.Schema):
id: sl.IdField
description: sl.String
rating: sl.Integer
product = Product()
description_space = sl.TextSimilaritySpace(
text=product.description, model="Alibaba-NLP/gte-large-en-v1.5"
)
rating_space = sl.NumberSpace(
number=product.rating, min_value=1, max_value=5, mode=sl.Mode.MAXIMUM
)
index = sl.Index([description_space, rating_space], fields=[product.rating])
# Define your query and parameters to set them directly at query-time
# or let an LLM fill them in for you using the `natural_language_query` param.
# Don't forget to set your OpenAI API key to unlock this feature.
query = (
sl.Query(
index,
weights={
description_space: sl.Param("description_weight"),
rating_space: sl.Param("rating_weight"),
},
)
.find(product)
.similar(
description_space,
sl.Param(
"description_query",
description="The text in the user's query that refers to product descriptions.",
),
)
.select_all()
.limit(sl.Param("limit"))
.with_natural_query(
sl.Param("natural_language_query"),
sl.OpenAIClientConfig(api_key=os.environ["OPEN_AI_API_KEY"], model="gpt-4o")
)
)
# Run the app in-memory (server & Apache Spark executors available too!).
source = sl.InMemorySource(product)
executor = sl.InMemoryExecutor(sources=[source], indices=[index])
app = executor.run()
# Ingest data into the system - index updates and other processing happens automatically.
source.put([
{
"id": 1,
"description": "Budget toothbrush in black color. Just what you need.",
"rating": 1,
},
{
"id": 2,
"description": "High-end toothbrush created with no compromises.",
"rating": 5,
},
{
"id": 3,
"description": "A toothbrush created for the smart 21st century man.",
"rating": 3,
},
])
result = app.query(query, natural_language_query="best toothbrushes", limit=1)
# Examine the extracted parameters from your query
print(json.dumps(result.metadata, indent=2))
# The result is the 5-star rated product.
sl.PandasConverter.to_pandas(result)