# Install dependencies if needed
%pip install openai
%pip install scikit-learn
from IPython.display import Image, display
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from openai import OpenAI
# Initializing OpenAI client - see https://platform.openai.com/docs/quickstart?context=python
client = OpenAI()
# Loading dataset
dataset_path = "data/amazon_furniture_dataset.csv"
df = pd.read_csv(dataset_path)
system_prompt = '''
You are an agent specialized in tagging images of furniture items, decorative items, or furnishings with relevant keywords that could be used to search for these items on a marketplace.
You will be provided with an image and the title of the item that is depicted in the image, and your goal is to extract keywords for only the item specified.
Keywords should be concise and in lower case.
Keywords can describe things like:
- Item type e.g. 'sofa bed', 'chair', 'desk', 'plant'
- Item material e.g. 'wood', 'metal', 'fabric'
- Item style e.g. 'scandinavian', 'vintage', 'industrial'
- Item color e.g. 'red', 'blue', 'white'
Only deduce material, style or color keywords when it is obvious that they make the item depicted in the image stand out.
Return keywords in the format of an array of strings, like this:
['desk', 'industrial', 'metal']
'''
def compare_keyword(keyword):
embedded_value = get_embedding(keyword)
df_keywords['similarity'] = df_keywords['embedding'].apply(lambda x: cosine_similarity(np.array(x).reshape(1,-1), np.array(embedded_value).reshape(1, -1)))
most_similar = df_keywords.sort_values('similarity', ascending=False).iloc[0]
return most_similar
def replace_keyword(keyword, threshold = 0.6):
most_similar = compare_keyword(keyword)
if most_similar['similarity'] > threshold:
print(f"Replacing '{keyword}' with existing keyword: '{most_similar['keyword']}'")
return most_similar['keyword']
return keyword
describe_system_prompt = '''
You are a system generating descriptions for furniture items, decorative items, or furnishings on an e-commerce website.
Provided with an image and a title, you will describe the main item that you see in the image, giving details but staying concise.
You can describe unambiguously what the item is and its material, color, and style if clearly identifiable.
If there are multiple items depicted, refer to the title to understand which item you should describe.
'''
'''
This is a free-standing shoe rack featuring a multi-layer design, constructed from metal for durability. The rack is finished in a clean white color, which gives it a modern and versatile look, suitable for various home decor styles. It includes several horizontal shelves dedicated to organizing shoes, providing ample space for multiple pairs.
Additionally, the rack is equipped with 8 double hooks, which are integrated into the frame above the shoe shelves. These hooks offer extra functionality, allowing for the hanging of accessories such as hats, scarves, or bags. The design is space-efficient and ideal for placement in living rooms, bathrooms, hallways, or entryways, where it can serve as a practical storage solution while contributing to the tidiness and aesthetic of the space.
'''
caption_system_prompt = '''
Your goal is to generate short, descriptive captions for images of furniture items, decorative items, or furnishings based on an image description.
You will be provided with a description of an item image and you will output a caption that captures the most important information about the item.
Your generated caption should be short (1 sentence), and include the most relevant information about the item.
The most important information could be: the type of the item, the style (if mentioned), the material if especially relevant and any distinctive features.
'''
few_shot_examples = [
{
"description": "This is a multi-layer metal shoe rack featuring a free-standing design. It has a clean, white finish that gives it a modern and versatile look, suitable for various home decors. The rack includes several horizontal shelves dedicated to organizing shoes, providing ample space for multiple pairs. Above the shoe storage area, there are 8 double hooks arranged in two rows, offering additional functionality for hanging items such as hats, scarves, or bags. The overall structure is sleek and space-saving, making it an ideal choice for placement in living rooms, bathrooms, hallways, or entryways where efficient use of space is essential.",
"caption": "White metal free-standing shoe rack"
},
{
"description": "The image shows a set of two dining chairs in black. These chairs are upholstered in a leather-like material, giving them a sleek and sophisticated appearance. The design features straight lines with a slight curve at the top of the high backrest, which adds a touch of elegance. The chairs have a simple, vertical stitching detail on the backrest, providing a subtle decorative element. The legs are also black, creating a uniform look that would complement a contemporary dining room setting. The chairs appear to be designed for comfort and style, suitable for both casual and formal dining environments.",
"caption": "Set of 2 modern black leather dining chairs"
},
{
"description": "This is a square plant repotting mat designed for indoor gardening tasks such as transplanting and changing soil for plants. It measures 26.8 inches by 26.8 inches and is made from a waterproof material, which appears to be a durable, easy-to-clean fabric in a vibrant green color. The edges of the mat are raised with integrated corner loops, likely to keep soil and water contained during gardening activities. The mat is foldable, enhancing its portability, and can be used as a protective surface for various gardening projects, including working with succulents. It's a practical accessory for garden enthusiasts and makes for a thoughtful gift for those who enjoy indoor plant care.",
"caption": "Waterproof square plant repotting mat"
}
]
formatted_examples = [[{
"role": "user",
"content": ex['description']
},
{
"role": "assistant",
"content": ex['caption']
}]
for ex in few_shot_examples
]
formatted_examples = [i for ex in formatted_examples for i in ex]
def caption_image(description, model="gpt-4-turbo-preview"):
messages = formatted_examples
messages.insert(0,
{
"role": "system",
"content": caption_system_prompt
})
messages.append(
{
"role": "user",
"content": description
})
response = client.chat.completions.create(
model=model,
temperature=0.2,
messages=messages
)
return response.choices[0].message.content