from huggingface_hub import notebook_login, login
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
from random import randrange
from string import Template
from IPython.display import display, HTML
import pandas as pd
import tabulate
Imports
Model and Dataset
= "samsum"
dataset_id # dataset_id2 = "knkarthick/dialogsum"
="google/flan-t5-base" model_id
= load_dataset(dataset_id); dataset dataset
DatasetDict({
train: Dataset({
features: ['id', 'dialogue', 'summary'],
num_rows: 14732
})
test: Dataset({
features: ['id', 'dialogue', 'summary'],
num_rows: 819
})
validation: Dataset({
features: ['id', 'dialogue', 'summary'],
num_rows: 818
})
})
'train'], dataset['test'] dataset[
(Dataset({
features: ['id', 'dialogue', 'summary'],
num_rows: 14732
}),
Dataset({
features: ['id', 'dialogue', 'summary'],
num_rows: 819
}))
= randrange(len(dataset['train'])); sample_id
sample_id = dataset['train'][sample_id]; sample sample
{'id': '13681753',
'dialogue': "Catherine: hi! what's up? \r\nKevin: Hi, sorry, who are you?\r\nCatherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P\r\nKevin: oh, I see. Nice!\r\nCatherine: How are you doing? \r\nKevin: quite good... you?\r\nCatherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P\r\nKevin: how bold!\r\nCatherine: So maybe we should just meet?\r\nKevin: I would love to, but I don't have tinder any more.\r\nCatherine: Do you have a girlfriend?\r\nKevin: Kind of, it's very fresh and I don't want to mess it up.\r\nCatherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!\r\nKevin: HAHAHHA, thanks!\r\nCatherine: :*",
'summary': "Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship. "}
print(f"Dialogue: \n{sample['dialogue']}")
print("------------")
print(f"Summary: \n{sample['summary']}")
Dialogue:
Catherine: hi! what's up?
Kevin: Hi, sorry, who are you?
Catherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P
Kevin: oh, I see. Nice!
Catherine: How are you doing?
Kevin: quite good... you?
Catherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P
Kevin: how bold!
Catherine: So maybe we should just meet?
Kevin: I would love to, but I don't have tinder any more.
Catherine: Do you have a girlfriend?
Kevin: Kind of, it's very fresh and I don't want to mess it up.
Catherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!
Kevin: HAHAHHA, thanks!
Catherine: :*
------------
Summary:
Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship.
Tokenizer
= AutoTokenizer.from_pretrained(model_id, use_fast=True); tokenizer tokenizer
T5TokenizerFast(name_or_path='google/flan-t5-base', vocab_size=32100, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'additional_special_tokens': ['<extra_id_0>', '<extra_id_1>', '<extra_id_2>', '<extra_id_3>', '<extra_id_4>', '<extra_id_5>', '<extra_id_6>', '<extra_id_7>', '<extra_id_8>', '<extra_id_9>', '<extra_id_10>', '<extra_id_11>', '<extra_id_12>', '<extra_id_13>', '<extra_id_14>', '<extra_id_15>', '<extra_id_16>', '<extra_id_17>', '<extra_id_18>', '<extra_id_19>', '<extra_id_20>', '<extra_id_21>', '<extra_id_22>', '<extra_id_23>', '<extra_id_24>', '<extra_id_25>', '<extra_id_26>', '<extra_id_27>', '<extra_id_28>', '<extra_id_29>', '<extra_id_30>', '<extra_id_31>', '<extra_id_32>', '<extra_id_33>', '<extra_id_34>', '<extra_id_35>', '<extra_id_36>', '<extra_id_37>', '<extra_id_38>', '<extra_id_39>', '<extra_id_40>', '<extra_id_41>', '<extra_id_42>', '<extra_id_43>', '<extra_id_44>', '<extra_id_45>', '<extra_id_46>', '<extra_id_47>', '<extra_id_48>', '<extra_id_49>', '<extra_id_50>', '<extra_id_51>', '<extra_id_52>', '<extra_id_53>', '<extra_id_54>', '<extra_id_55>', '<extra_id_56>', '<extra_id_57>', '<extra_id_58>', '<extra_id_59>', '<extra_id_60>', '<extra_id_61>', '<extra_id_62>', '<extra_id_63>', '<extra_id_64>', '<extra_id_65>', '<extra_id_66>', '<extra_id_67>', '<extra_id_68>', '<extra_id_69>', '<extra_id_70>', '<extra_id_71>', '<extra_id_72>', '<extra_id_73>', '<extra_id_74>', '<extra_id_75>', '<extra_id_76>', '<extra_id_77>', '<extra_id_78>', '<extra_id_79>', '<extra_id_80>', '<extra_id_81>', '<extra_id_82>', '<extra_id_83>', '<extra_id_84>', '<extra_id_85>', '<extra_id_86>', '<extra_id_87>', '<extra_id_88>', '<extra_id_89>', '<extra_id_90>', '<extra_id_91>', '<extra_id_92>', '<extra_id_93>', '<extra_id_94>', '<extra_id_95>', '<extra_id_96>', '<extra_id_97>', '<extra_id_98>', '<extra_id_99>']}, clean_up_tokenization_spaces=True), added_tokens_decoder={
0: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
1: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
2: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32000: AddedToken("<extra_id_99>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32001: AddedToken("<extra_id_98>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32002: AddedToken("<extra_id_97>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32003: AddedToken("<extra_id_96>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32004: AddedToken("<extra_id_95>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32005: AddedToken("<extra_id_94>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32006: AddedToken("<extra_id_93>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32007: AddedToken("<extra_id_92>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32008: AddedToken("<extra_id_91>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32009: AddedToken("<extra_id_90>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32010: AddedToken("<extra_id_89>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32011: AddedToken("<extra_id_88>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32012: AddedToken("<extra_id_87>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32013: AddedToken("<extra_id_86>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32014: AddedToken("<extra_id_85>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32015: AddedToken("<extra_id_84>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32016: AddedToken("<extra_id_83>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32017: AddedToken("<extra_id_82>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32018: AddedToken("<extra_id_81>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32019: AddedToken("<extra_id_80>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32020: AddedToken("<extra_id_79>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32021: AddedToken("<extra_id_78>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32022: AddedToken("<extra_id_77>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32023: AddedToken("<extra_id_76>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32024: AddedToken("<extra_id_75>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32025: AddedToken("<extra_id_74>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32026: AddedToken("<extra_id_73>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32027: AddedToken("<extra_id_72>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32028: AddedToken("<extra_id_71>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32029: AddedToken("<extra_id_70>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32030: AddedToken("<extra_id_69>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32031: AddedToken("<extra_id_68>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32032: AddedToken("<extra_id_67>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32033: AddedToken("<extra_id_66>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32034: AddedToken("<extra_id_65>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32035: AddedToken("<extra_id_64>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32036: AddedToken("<extra_id_63>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32037: AddedToken("<extra_id_62>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32038: AddedToken("<extra_id_61>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32039: AddedToken("<extra_id_60>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32040: AddedToken("<extra_id_59>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32041: AddedToken("<extra_id_58>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32042: AddedToken("<extra_id_57>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32043: AddedToken("<extra_id_56>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32044: AddedToken("<extra_id_55>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32045: AddedToken("<extra_id_54>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32046: AddedToken("<extra_id_53>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32047: AddedToken("<extra_id_52>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32048: AddedToken("<extra_id_51>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32049: AddedToken("<extra_id_50>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32050: AddedToken("<extra_id_49>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32051: AddedToken("<extra_id_48>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32052: AddedToken("<extra_id_47>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32053: AddedToken("<extra_id_46>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32054: AddedToken("<extra_id_45>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32055: AddedToken("<extra_id_44>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32056: AddedToken("<extra_id_43>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32057: AddedToken("<extra_id_42>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32058: AddedToken("<extra_id_41>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32059: AddedToken("<extra_id_40>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32060: AddedToken("<extra_id_39>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32061: AddedToken("<extra_id_38>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32062: AddedToken("<extra_id_37>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32063: AddedToken("<extra_id_36>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32064: AddedToken("<extra_id_35>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32065: AddedToken("<extra_id_34>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32066: AddedToken("<extra_id_33>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32067: AddedToken("<extra_id_32>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32068: AddedToken("<extra_id_31>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32069: AddedToken("<extra_id_30>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32070: AddedToken("<extra_id_29>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32071: AddedToken("<extra_id_28>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32072: AddedToken("<extra_id_27>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32073: AddedToken("<extra_id_26>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32074: AddedToken("<extra_id_25>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32075: AddedToken("<extra_id_24>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32076: AddedToken("<extra_id_23>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32077: AddedToken("<extra_id_22>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32078: AddedToken("<extra_id_21>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32079: AddedToken("<extra_id_20>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32080: AddedToken("<extra_id_19>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32081: AddedToken("<extra_id_18>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32082: AddedToken("<extra_id_17>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32083: AddedToken("<extra_id_16>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32084: AddedToken("<extra_id_15>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32085: AddedToken("<extra_id_14>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32086: AddedToken("<extra_id_13>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32087: AddedToken("<extra_id_12>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32088: AddedToken("<extra_id_11>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32089: AddedToken("<extra_id_10>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32090: AddedToken("<extra_id_9>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32091: AddedToken("<extra_id_8>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32092: AddedToken("<extra_id_7>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32093: AddedToken("<extra_id_6>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32094: AddedToken("<extra_id_5>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32095: AddedToken("<extra_id_4>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32096: AddedToken("<extra_id_3>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32097: AddedToken("<extra_id_2>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32098: AddedToken("<extra_id_1>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
32099: AddedToken("<extra_id_0>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
= "What time is it, Tom?"
sentence
= tokenizer(sentence, return_tensors='pt'); sentence_encoded sentence_encoded
{'input_ids': tensor([[ 363, 97, 19, 34, 6, 3059, 58, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}
= tokenizer.decode(sentence_encoded['input_ids'][0], skip_special_tokens=True); sentence_decoded sentence_decoded
'What time is it, Tom?'
Model
= AutoModelForSeq2SeqLM.from_pretrained(model_id); model model
T5ForConditionalGeneration(
(shared): Embedding(32128, 768)
(encoder): T5Stack(
(embed_tokens): Embedding(32128, 768)
(block): ModuleList(
(0): T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=768, out_features=768, bias=False)
(k): Linear(in_features=768, out_features=768, bias=False)
(v): Linear(in_features=768, out_features=768, bias=False)
(o): Linear(in_features=768, out_features=768, bias=False)
(relative_attention_bias): Embedding(32, 12)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerFF(
(DenseReluDense): T5DenseGatedActDense(
(wi_0): Linear(in_features=768, out_features=2048, bias=False)
(wi_1): Linear(in_features=768, out_features=2048, bias=False)
(wo): Linear(in_features=2048, out_features=768, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): NewGELUActivation()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(1-11): 11 x T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=768, out_features=768, bias=False)
(k): Linear(in_features=768, out_features=768, bias=False)
(v): Linear(in_features=768, out_features=768, bias=False)
(o): Linear(in_features=768, out_features=768, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerFF(
(DenseReluDense): T5DenseGatedActDense(
(wi_0): Linear(in_features=768, out_features=2048, bias=False)
(wi_1): Linear(in_features=768, out_features=2048, bias=False)
(wo): Linear(in_features=2048, out_features=768, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): NewGELUActivation()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(final_layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(decoder): T5Stack(
(embed_tokens): Embedding(32128, 768)
(block): ModuleList(
(0): T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=768, out_features=768, bias=False)
(k): Linear(in_features=768, out_features=768, bias=False)
(v): Linear(in_features=768, out_features=768, bias=False)
(o): Linear(in_features=768, out_features=768, bias=False)
(relative_attention_bias): Embedding(32, 12)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerCrossAttention(
(EncDecAttention): T5Attention(
(q): Linear(in_features=768, out_features=768, bias=False)
(k): Linear(in_features=768, out_features=768, bias=False)
(v): Linear(in_features=768, out_features=768, bias=False)
(o): Linear(in_features=768, out_features=768, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(2): T5LayerFF(
(DenseReluDense): T5DenseGatedActDense(
(wi_0): Linear(in_features=768, out_features=2048, bias=False)
(wi_1): Linear(in_features=768, out_features=2048, bias=False)
(wo): Linear(in_features=2048, out_features=768, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): NewGELUActivation()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(1-11): 11 x T5Block(
(layer): ModuleList(
(0): T5LayerSelfAttention(
(SelfAttention): T5Attention(
(q): Linear(in_features=768, out_features=768, bias=False)
(k): Linear(in_features=768, out_features=768, bias=False)
(v): Linear(in_features=768, out_features=768, bias=False)
(o): Linear(in_features=768, out_features=768, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(1): T5LayerCrossAttention(
(EncDecAttention): T5Attention(
(q): Linear(in_features=768, out_features=768, bias=False)
(k): Linear(in_features=768, out_features=768, bias=False)
(v): Linear(in_features=768, out_features=768, bias=False)
(o): Linear(in_features=768, out_features=768, bias=False)
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(2): T5LayerFF(
(DenseReluDense): T5DenseGatedActDense(
(wi_0): Linear(in_features=768, out_features=2048, bias=False)
(wi_1): Linear(in_features=768, out_features=2048, bias=False)
(wo): Linear(in_features=2048, out_features=768, bias=False)
(dropout): Dropout(p=0.1, inplace=False)
(act): NewGELUActivation()
)
(layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(final_layer_norm): T5LayerNorm()
(dropout): Dropout(p=0.1, inplace=False)
)
(lm_head): Linear(in_features=768, out_features=32128, bias=False)
)
Summarization
Prompt Engineering and Zero Shot Learning
print(f"Dialogue: \n{sample['dialogue']}")
print("------------")
print(f"Baseline Human Summary: \n{sample['summary']}")
Dialogue:
Catherine: hi! what's up?
Kevin: Hi, sorry, who are you?
Catherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P
Kevin: oh, I see. Nice!
Catherine: How are you doing?
Kevin: quite good... you?
Catherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P
Kevin: how bold!
Catherine: So maybe we should just meet?
Kevin: I would love to, but I don't have tinder any more.
Catherine: Do you have a girlfriend?
Kevin: Kind of, it's very fresh and I don't want to mess it up.
Catherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!
Kevin: HAHAHHA, thanks!
Catherine: :*
------------
Baseline Human Summary:
Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship.
= sample['dialogue']
dialogue = sample['summary']
humman_summary = tokenizer(dialogue, return_tensors='pt'); inputs inputs
{'input_ids': tensor([[17006, 10, 7102, 55, 125, 31, 7, 95, 58, 8595,
10, 2018, 6, 8032, 6, 113, 33, 25, 58, 17006,
10, 3, 32, 107, 6, 27, 183, 8032, 6, 27,
816, 25, 6024, 82, 381, 5, 101, 130, 3, 9,
1588, 30, 3, 16151, 52, 128, 97, 977, 10, 345,
8595, 10, 3, 32, 107, 6, 27, 217, 5, 8012,
55, 17006, 10, 571, 33, 25, 692, 58, 8595, 10,
882, 207, 233, 25, 58, 17006, 10, 59, 1282, 6,
27, 31, 51, 1119, 12, 143, 128, 490, 13237, 28,
151, 45, 8, 1396, 42, 9268, 135, 3, 10, 345,
8595, 10, 149, 8197, 55, 17006, 10, 264, 2087, 62,
225, 131, 942, 58, 8595, 10, 27, 133, 333, 12,
6, 68, 27, 278, 31, 17, 43, 3, 16151, 52,
136, 72, 5, 17006, 10, 531, 25, 43, 3, 9,
17442, 58, 8595, 10, 6557, 13, 6, 34, 31, 7,
182, 1434, 11, 27, 278, 31, 17, 241, 12, 8090,
34, 95, 5, 17006, 10, 27, 217, 5, 264, 25,
31, 195, 36, 16355, 16, 3, 9, 798, 5, 4244,
1024, 107, 5, 27, 1663, 25, 207, 5851, 6, 1312,
27363, 55, 8595, 10, 454, 14084, 14084, 5478, 6, 2049,
55, 17006, 10, 3, 10, 1935, 1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
'input_ids'], max_new_tokens=50)[0], skip_special_tokens=True) tokenizer.decode(model.generate(inputs[
"Catherine is trying to make some real bonds with people from the internet or delete them. Kevin doesn't have tinder anymore. Kevin has a fresh girlfriend."
def get_prompt(sample):
= sample['dialogue']
dialogue = f"""Dialogue:\n{dialogue}\nSummary:"""
p
return p
print(get_prompt(sample))
Dialogue:
Catherine: hi! what's up?
Kevin: Hi, sorry, who are you?
Catherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P
Kevin: oh, I see. Nice!
Catherine: How are you doing?
Kevin: quite good... you?
Catherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P
Kevin: how bold!
Catherine: So maybe we should just meet?
Kevin: I would love to, but I don't have tinder any more.
Catherine: Do you have a girlfriend?
Kevin: Kind of, it's very fresh and I don't want to mess it up.
Catherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!
Kevin: HAHAHHA, thanks!
Catherine: :*
Summary:
def get_prompt2(sample):
= sample['dialogue']
dialogue = f"""What is happening in the conversation below? \n{dialogue}\nPlease tell:"""
p
return p
def gen_from_prompt(sample, make_prompt = get_prompt, check=False):
= make_prompt(sample)
prompt = sample['summary']
human_summary = tokenizer(prompt, return_tensors='pt'); #print(inputs)
inputs if not check: outputs = tokenizer.decode(model.generate(inputs['input_ids'], max_new_tokens=50)[0], skip_special_tokens=True)
else: outputs = tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)
return prompt, human_summary, outputs
sample
{'id': '13681753',
'dialogue': "Catherine: hi! what's up? \r\nKevin: Hi, sorry, who are you?\r\nCatherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P\r\nKevin: oh, I see. Nice!\r\nCatherine: How are you doing? \r\nKevin: quite good... you?\r\nCatherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P\r\nKevin: how bold!\r\nCatherine: So maybe we should just meet?\r\nKevin: I would love to, but I don't have tinder any more.\r\nCatherine: Do you have a girlfriend?\r\nKevin: Kind of, it's very fresh and I don't want to mess it up.\r\nCatherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!\r\nKevin: HAHAHHA, thanks!\r\nCatherine: :*",
'summary': "Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship. "}
print(gen_from_prompt(sample, check=False, make_prompt=get_prompt2)[2])
Catherine is trying to make some real bonds with people from the internet. Kevin doesn't have tinder anymore. Kevin has a fresh girlfriend.
print(gen_from_prompt(sample, check=False, make_prompt=get_prompt)[2])
Jody is a big fan of cards and presents. She's going to miss everyone. Ellie is first in line for baby cuddles. Kelly wants a cuddle too.
= gen_from_prompt(sample, check=False, make_prompt=get_prompt)
prompt, human_summary, outputs dict(zip(['Prompt', 'human_summary', 'outputs'], [prompt, human_summary, outputs]))
# #
= pd.DataFrame([dict(zip(['Prompt', 'human_summary', 'outputs'], [prompt, human_summary, outputs]))])
df 'Summary'] = 'Human_summary:\n'+df['human_summary'] + "\nModel:\n"+df['outputs']
df[print(display(df[['Prompt', 'Summary']].to_markdown(index=False)))
"| Prompt | Summary |\n|:-------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------|\n| Dialogue: | Human_summary: |\n| Catherine: hi! what's up? | Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship. |\n| Kevin: Hi, sorry, who are you? | Model: |\n| Catherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P | Catherine is trying to make some real bonds with people from the internet. Kevin doesn't have tinder anymore. Kevin has a girlfriend. |\n| Kevin: oh, I see. Nice! | |\n| Catherine: How are you doing? | |\n| Kevin: quite good... you? | |\n| Catherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P | |\n| Kevin: how bold! | |\n| Catherine: So maybe we should just meet? | |\n| Kevin: I would love to, but I don't have tinder any more. | |\n| Catherine: Do you have a girlfriend? | |\n| Kevin: Kind of, it's very fresh and I don't want to mess it up. | |\n| Catherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde! | |\n| Kevin: HAHAHHA, thanks! | |\n| Catherine: :* | |\n| Summary: | |"
None
def display_sample_summary(sample_ids, ds_type='test', gen= lambda sample: gen_from_prompt(sample, make_prompt=get_prompt)):
= []
d for i, sample_id in enumerate(sample_ids):
= dataset[ds_type][sample_id]
sample = gen(sample)
prompt, human_summary, outputs dict(zip(['Prompt', 'human_summary', 'outputs'], [prompt, human_summary, outputs])))
d.append(= pd.DataFrame(d)
df 'Results'] = '\nPROMPT:-\n'+df['Prompt']+'\n\nHUMAN_SUMMARY:-\n'+df['human_summary'] + "\n\nMODEL_SUMMARY:-\n"+df['outputs']+"\n"
df[print(df[['Results']].to_markdown())
# print()
# print(f"{i+1}")
# print("________________")
# print("________________")
# print('INPUT PROMPT:')
# print("________________")
# print(prompt)
# print("________________")
# print('Baseline Human Summary:')
# print("________________")
# print(human_summary)
# print("________________")
# print("Machine Generation")
# print("________________")
# print(outputs)
=[sample_id], ds_type='train', gen=lambda sample: gen_from_prompt(sample, check=False, make_prompt=get_prompt2)) display_sample_summary(sample_ids
| | Results |
|---:|:-------------------------------------------------------------------------------------------------------------------------------------------------|
| 0 | PROMPT:- |
| | What is happening in the conversation below? |
| | Jody: Would like to say a massive thank you to everyone for cards and presents! It was a lovely send off! :) I'm gonna miss you! |
| | Kevin: We're all gonna miss you too! |
| | Sarah: You're very welcome! |
| | Sarah: Glad you liked it! |
| | Gina: can't wait for your baby first pictures! |
| | Jody: you can get some picture of me too if you want! x |
| | Ellie: i'm first in line for baby cuddles! |
| | Jody: can't believe i won't be there for a whole year! |
| | Emma: see you when you have a little baby! xxx |
| | Jody: a bit scared! |
| | Kelly: i want a cuddle too! lots of love |
| | Kevin: assuming you mean the baby! :D |
| | Jody: promise to visit for lots of cuddles! |
| | Please tell: |
| | |
| | HUMAN_SUMMARY:- |
| | Jody is thanking everyone, she will miss them. Others are saying by and wishing good her and a baby she is expecting. |
| | |
| | MODEL_SUMMARY:- |
| | Jody is a big fan of cards and presents. She's going to miss everyone. Ellie is first in line for baby cuddles. Kelly and Emma want to see Jody. |
# display_sample_summary(sample_ids=[10, 20, 45, 58, 1000], ds_type='train')
=[10, 20, 45, 58, 1000], ds_type='train', gen=lambda sample: gen_from_prompt(sample, check=False, make_prompt=get_prompt2)) display_sample_summary(sample_ids
| | Results |
|---:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0 | PROMPT:- |
| | What is happening in the conversation below? |
| | Lucas: Hey! How was your day? |
| | Demi: Hey there! |
| | Demi: It was pretty fine, actually, thank you! |
| | Demi: I just got promoted! :D |
| | Lucas: Whoa! Great news! |
| | Lucas: Congratulations! |
| | Lucas: Such a success has to be celebrated. |
| | Demi: I agree! :D |
| | Demi: Tonight at Death & Co.? |
| | Lucas: Sure! |
| | Lucas: See you there at 10pm? |
| | Demi: Yeah! See you there! :D |
| | Please tell: |
| | |
| | HUMAN_SUMMARY:- |
| | Demi got promoted. She will celebrate that with Lucas at Death & Co at 10 pm. |
| | |
| | MODEL_SUMMARY:- |
| | Demi got promoted. She will meet Lucas at Death & Co. at 10 pm. |
| 1 | PROMPT:- |
| | What is happening in the conversation below? |
| | Ashley: Guys, you have to read this book! <file_photo> |
| | Marcus: Why, what's so special about it? |
| | Erin: I think I've already heard about it from someone. Is it really that good? |
| | Ashley: It's the best thing I've ever read! Completely life-changing! It's opened my eyes to a lot of things. |
| | Seamus: Sorry, but I don't like books that are written to change my life. I prefer books that are simply fun to read :P |
| | Marcus: I get what you mean. I feel like some authors are so concentrated on making their books full of wisdom that they completely forget that they should also be readable. |
| | Erin: Do you mean Coelho? XD |
| | Marcus: No, while I'm not a fan of his, at least I've never fallen asleep while reading his books. I meant this one for example: <file_other> |
| | Ashley: Erm, I quite like his books. |
| | Seamus: Did they change your life too? :D |
| | Ashley: Wait, I meant Coelho. I've never read the other guy. |
| | Marcus: Trust me, don't. There are lots of better ways of wasting your time. |
| | Ashley: LOL, okay, I trust you. But the one I posted at the beginning is really good. It's not just some philosophical gibberish, it's actually a crime novel, so there's a lot of action too. |
| | Erin: Does it have a cute detective? ;) |
| | Ashley: Even two of them, actually. Believe me, you won't be able to decide which one to love more! |
| | Erin: Okay, I'm already sold :D |
| | Please tell: |
| | |
| | HUMAN_SUMMARY:- |
| | Erin is convinced by Ashley's book recommendations, while Seamus and Marcus aren't. |
| | |
| | MODEL_SUMMARY:- |
| | Ashley recommends Coelho's books to Marcus and Erin. |
| 2 | PROMPT:- |
| | What is happening in the conversation below? |
| | Juliette: So what? Tell me |
| | Jimmy: One minute |
| | Juliette: What did the doctor say? |
| | Jimmy: It's almost finished, wait a second |
| | Juliette: Is it so serious? Should I be afraid? |
| | Jimmy: OK, just finished |
| | Juliette: So, tell me, I can't wait longer |
| | Jimmy: Acute gastritis |
| | Juliette: What's that? |
| | Jimmy: Acid attacks in the stomach |
| | Juliette: It's not a cancer? |
| | Jimmy: No, it's not |
| | Juliette: Is this sure? |
| | Jimmy: Yes, darling |
| | Juliette: You're not lying to me ? |
| | Jimmy: No, darling, please believe me |
| | Juliette: And how can we cure this? |
| | Jimmy: Just a few tablets for a month and that should be enough |
| | Juliette: I'm so glad it's not a cancer |
| | Jimmy: :000 |
| | Juliette: I love you |
| | Jimmy: Me too |
| | Please tell: |
| | |
| | HUMAN_SUMMARY:- |
| | Jimmy is going to take medication for a month to cure his acute gastritis. |
| | |
| | MODEL_SUMMARY:- |
| | Jimmy has Acute gastritis. He has acid attacks in the stomach. He will take a few tablets for a month. |
| 3 | PROMPT:- |
| | What is happening in the conversation below? |
| | Joyce: Guys, sorry I'm running late today! Will be there soon |
| | Andrew: I’m a wee bit late too but will be there before 8! Leaving soon |
| | Carla: On my way, with sipke😀 |
| | Anette: On the way kids |
| | Anette: In the first here wtf |
| | Helen: Where are yall |
| | Helen: Annette are you here? |
| | Helen: I'm at a table by one of the windows at the front! |
| | Please tell: |
| | |
| | HUMAN_SUMMARY:- |
| | Joyce and Andrew are running late. Carla and Anette are on their way. Helen is at a table by one of the windows at the front. |
| | |
| | MODEL_SUMMARY:- |
| | Joyce is running late. Andrew is a bit late. Carla is on her way. Helen is at a table by one of the windows at the front. |
| 4 | PROMPT:- |
| | What is happening in the conversation below? |
| | Ann: Do you have plans for the holidays this year? |
| | Mary: Well we thought of going for a hike somewhere beautiful |
| | Mary: and you? |
| | Ann: We talked about it last night and we thought maybe you would like to do sth together? |
| | Mary: Why not |
| | Mary: just remember that we're outdoorsy people :D |
| | Ann: I know, but you're not even considering a nice beach somewhere? |
| | Mary: weeeeell, that's not really us, you know? |
| | Mary: we prefer tents and hiking boots to bikinis and sunscreen ;) |
| | Ann: I see. |
| | Ann: I'm not sure I'm ready for a longer hike but maybe we could go somewhere like that for a weekend so I can try it? |
| | Mary: That would be great! |
| | Mary: I'm sure you'll love it just like we do :) |
| | Ann: the idea of spending you time in the woods sure is romantic, but i'm not sure I'll remember that when bugs qill be eating me alive or sth |
| | Mary: Yeah, it's not for everyone :D |
| | Ann: Would like yo try though. |
| | Mary: There are a few great spots nearby |
| | Mary: Perfect for a weekend. |
| | Ann: Great, let me know and I'll talk to Henry. |
| | Mary: OK :) |
| | Please tell: |
| | |
| | HUMAN_SUMMARY:- |
| | Mary, Ann and their partners will spend the weekend together hiking. |
| | |
| | MODEL_SUMMARY:- |
| | Mary and Ann are going for a hike for the holidays. Ann is not sure if she's ready for a longer hike. Ann will let Mary know about the possibility. |
# display_sample_summary(sample_ids=[10, 20, 45, 58, 1000], ds_type='train')
=[10, 20, 45, 58, 1000], ds_type='train', gen=lambda sample: gen_from_prompt(sample, check=False, make_prompt=get_prompt)) display_sample_summary(sample_ids
| | Results |
|---:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0 | PROMPT:- |
| | Dialogue: |
| | Lucas: Hey! How was your day? |
| | Demi: Hey there! |
| | Demi: It was pretty fine, actually, thank you! |
| | Demi: I just got promoted! :D |
| | Lucas: Whoa! Great news! |
| | Lucas: Congratulations! |
| | Lucas: Such a success has to be celebrated. |
| | Demi: I agree! :D |
| | Demi: Tonight at Death & Co.? |
| | Lucas: Sure! |
| | Lucas: See you there at 10pm? |
| | Demi: Yeah! See you there! :D |
| | Summary: |
| | |
| | HUMAN_SUMMARY:- |
| | Demi got promoted. She will celebrate that with Lucas at Death & Co at 10 pm. |
| | |
| | MODEL_SUMMARY:- |
| | Demi got promoted. She will meet Lucas at Death & Co. at 10 pm. |
| 1 | PROMPT:- |
| | Dialogue: |
| | Ashley: Guys, you have to read this book! <file_photo> |
| | Marcus: Why, what's so special about it? |
| | Erin: I think I've already heard about it from someone. Is it really that good? |
| | Ashley: It's the best thing I've ever read! Completely life-changing! It's opened my eyes to a lot of things. |
| | Seamus: Sorry, but I don't like books that are written to change my life. I prefer books that are simply fun to read :P |
| | Marcus: I get what you mean. I feel like some authors are so concentrated on making their books full of wisdom that they completely forget that they should also be readable. |
| | Erin: Do you mean Coelho? XD |
| | Marcus: No, while I'm not a fan of his, at least I've never fallen asleep while reading his books. I meant this one for example: <file_other> |
| | Ashley: Erm, I quite like his books. |
| | Seamus: Did they change your life too? :D |
| | Ashley: Wait, I meant Coelho. I've never read the other guy. |
| | Marcus: Trust me, don't. There are lots of better ways of wasting your time. |
| | Ashley: LOL, okay, I trust you. But the one I posted at the beginning is really good. It's not just some philosophical gibberish, it's actually a crime novel, so there's a lot of action too. |
| | Erin: Does it have a cute detective? ;) |
| | Ashley: Even two of them, actually. Believe me, you won't be able to decide which one to love more! |
| | Erin: Okay, I'm already sold :D |
| | Summary: |
| | |
| | HUMAN_SUMMARY:- |
| | Erin is convinced by Ashley's book recommendations, while Seamus and Marcus aren't. |
| | |
| | MODEL_SUMMARY:- |
| | Ashley recommends Coelho's books. Seamus doesn't like books that are written to change his life. Marcus doesn't like books that are written to change his life. |
| 2 | PROMPT:- |
| | Dialogue: |
| | Juliette: So what? Tell me |
| | Jimmy: One minute |
| | Juliette: What did the doctor say? |
| | Jimmy: It's almost finished, wait a second |
| | Juliette: Is it so serious? Should I be afraid? |
| | Jimmy: OK, just finished |
| | Juliette: So, tell me, I can't wait longer |
| | Jimmy: Acute gastritis |
| | Juliette: What's that? |
| | Jimmy: Acid attacks in the stomach |
| | Juliette: It's not a cancer? |
| | Jimmy: No, it's not |
| | Juliette: Is this sure? |
| | Jimmy: Yes, darling |
| | Juliette: You're not lying to me ? |
| | Jimmy: No, darling, please believe me |
| | Juliette: And how can we cure this? |
| | Jimmy: Just a few tablets for a month and that should be enough |
| | Juliette: I'm so glad it's not a cancer |
| | Jimmy: :000 |
| | Juliette: I love you |
| | Jimmy: Me too |
| | Summary: |
| | |
| | HUMAN_SUMMARY:- |
| | Jimmy is going to take medication for a month to cure his acute gastritis. |
| | |
| | MODEL_SUMMARY:- |
| | Jimmy has Acute gastritis. He has acid attacks in the stomach. He's taking a few tablets for a month. |
| 3 | PROMPT:- |
| | Dialogue: |
| | Joyce: Guys, sorry I'm running late today! Will be there soon |
| | Andrew: I’m a wee bit late too but will be there before 8! Leaving soon |
| | Carla: On my way, with sipke😀 |
| | Anette: On the way kids |
| | Anette: In the first here wtf |
| | Helen: Where are yall |
| | Helen: Annette are you here? |
| | Helen: I'm at a table by one of the windows at the front! |
| | Summary: |
| | |
| | HUMAN_SUMMARY:- |
| | Joyce and Andrew are running late. Carla and Anette are on their way. Helen is at a table by one of the windows at the front. |
| | |
| | MODEL_SUMMARY:- |
| | Joyce is running late today. Andrew is a bit late. Carla is on her way with sipke. Helen is at a table by one of the windows at the front. |
| 4 | PROMPT:- |
| | Dialogue: |
| | Ann: Do you have plans for the holidays this year? |
| | Mary: Well we thought of going for a hike somewhere beautiful |
| | Mary: and you? |
| | Ann: We talked about it last night and we thought maybe you would like to do sth together? |
| | Mary: Why not |
| | Mary: just remember that we're outdoorsy people :D |
| | Ann: I know, but you're not even considering a nice beach somewhere? |
| | Mary: weeeeell, that's not really us, you know? |
| | Mary: we prefer tents and hiking boots to bikinis and sunscreen ;) |
| | Ann: I see. |
| | Ann: I'm not sure I'm ready for a longer hike but maybe we could go somewhere like that for a weekend so I can try it? |
| | Mary: That would be great! |
| | Mary: I'm sure you'll love it just like we do :) |
| | Ann: the idea of spending you time in the woods sure is romantic, but i'm not sure I'll remember that when bugs qill be eating me alive or sth |
| | Mary: Yeah, it's not for everyone :D |
| | Ann: Would like yo try though. |
| | Mary: There are a few great spots nearby |
| | Mary: Perfect for a weekend. |
| | Ann: Great, let me know and I'll talk to Henry. |
| | Mary: OK :) |
| | Summary: |
| | |
| | HUMAN_SUMMARY:- |
| | Mary, Ann and their partners will spend the weekend together hiking. |
| | |
| | MODEL_SUMMARY:- |
| | Mary and Ann are going for a hike for the holidays. Ann is not sure if she's ready for a longer hike. Ann will let Mary know about the possibility. |
One Shot Learning
# def display_sample_summary(sample_ids, ds_type='test', gen= lambda sample: gen_from_prompt(sample, make_prompt=get_prompt)):
# d = []
# for i, sample_id in enumerate(sample_ids):
# sample = dataset[ds_type][sample_id]
sample
{'id': '13681753',
'dialogue': "Catherine: hi! what's up? \r\nKevin: Hi, sorry, who are you?\r\nCatherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P\r\nKevin: oh, I see. Nice!\r\nCatherine: How are you doing? \r\nKevin: quite good... you?\r\nCatherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P\r\nKevin: how bold!\r\nCatherine: So maybe we should just meet?\r\nKevin: I would love to, but I don't have tinder any more.\r\nCatherine: Do you have a girlfriend?\r\nKevin: Kind of, it's very fresh and I don't want to mess it up.\r\nCatherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!\r\nKevin: HAHAHHA, thanks!\r\nCatherine: :*",
'summary': "Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship. "}
def n_shot_prompt(example_ds, example_indices, sample, prompt_func=get_prompt):
= []
examples for idx in example_indices:
+"\n"+example_ds[idx]['summary'])
examples.append(prompt_func(example_ds[idx])
= "\n\n".join(examples)
example_prefix = "\n\n".join([example_prefix, prompt_func(sample)])
p return p
= dataset['train']
example_ds = [1, 2, 3]
example_indices
= get_prompt2
prompt_func
= n_shot_prompt(example_ds, example_indices, sample, prompt_func)
p print(p)
# print(prompt_func(example_ds[1]['dialogue'])+"\n"+example_ds[1]['summary'])
# print(prompt_func(sample['dialogue']))
# sample
1], sample example_ds[
What is happening in the conversation below?
Olivia: Who are you voting for in this election?
Oliver: Liberals as always.
Olivia: Me too!!
Oliver: Great
Please tell:
Olivia and Olivier are voting for liberals in this election.
What is happening in the conversation below?
Tim: Hi, what's up?
Kim: Bad mood tbh, I was going to do lots of stuff but ended up procrastinating
Tim: What did you plan on doing?
Kim: Oh you know, uni stuff and unfucking my room
Kim: Maybe tomorrow I'll move my ass and do everything
Kim: We were going to defrost a fridge so instead of shopping I'll eat some defrosted veggies
Tim: For doing stuff I recommend Pomodoro technique where u use breaks for doing chores
Tim: It really helps
Kim: thanks, maybe I'll do that
Tim: I also like using post-its in kaban style
Please tell:
Kim may try the pomodoro technique recommended by Tim to get more stuff done.
What is happening in the conversation below?
Edward: Rachel, I think I'm in ove with Bella..
rachel: Dont say anything else..
Edward: What do you mean??
rachel: Open your fu**ing door.. I'm outside
Please tell:
Edward thinks he is in love with Bella. Rachel wants Edward to open his door. Rachel is outside.
What is happening in the conversation below?
Catherine: hi! what's up?
Kevin: Hi, sorry, who are you?
Catherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P
Kevin: oh, I see. Nice!
Catherine: How are you doing?
Kevin: quite good... you?
Catherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P
Kevin: how bold!
Catherine: So maybe we should just meet?
Kevin: I would love to, but I don't have tinder any more.
Catherine: Do you have a girlfriend?
Kevin: Kind of, it's very fresh and I don't want to mess it up.
Catherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!
Kevin: HAHAHHA, thanks!
Catherine: :*
Please tell:
({'id': '13728867',
'dialogue': 'Olivia: Who are you voting for in this election? \r\nOliver: Liberals as always.\r\nOlivia: Me too!!\r\nOliver: Great',
'summary': 'Olivia and Olivier are voting for liberals in this election. '},
{'id': '13681753',
'dialogue': "Catherine: hi! what's up? \r\nKevin: Hi, sorry, who are you?\r\nCatherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P\r\nKevin: oh, I see. Nice!\r\nCatherine: How are you doing? \r\nKevin: quite good... you?\r\nCatherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P\r\nKevin: how bold!\r\nCatherine: So maybe we should just meet?\r\nKevin: I would love to, but I don't have tinder any more.\r\nCatherine: Do you have a girlfriend?\r\nKevin: Kind of, it's very fresh and I don't want to mess it up.\r\nCatherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!\r\nKevin: HAHAHHA, thanks!\r\nCatherine: :*",
'summary': "Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship. "})
= dataset['train']
example_ds = [1]
example_indices
print(n_shot_prompt(example_ds, example_indices, sample, prompt_func=get_prompt))
Dialogue:
Olivia: Who are you voting for in this election?
Oliver: Liberals as always.
Olivia: Me too!!
Oliver: Great
Summary:
Olivia and Olivier are voting for liberals in this election.
Dialogue:
Catherine: hi! what's up?
Kevin: Hi, sorry, who are you?
Catherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P
Kevin: oh, I see. Nice!
Catherine: How are you doing?
Kevin: quite good... you?
Catherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P
Kevin: how bold!
Catherine: So maybe we should just meet?
Kevin: I would love to, but I don't have tinder any more.
Catherine: Do you have a girlfriend?
Kevin: Kind of, it's very fresh and I don't want to mess it up.
Catherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde!
Kevin: HAHAHHA, thanks!
Catherine: :*
Summary:
=[sample_id], ds_type='train',
display_sample_summary(sample_ids=lambda sample: gen_from_prompt(sample, check=False,
gen=lambda sample: n_shot_prompt(example_ds, example_indices, sample, prompt_func=get_prompt2))) make_prompt
| | Results |
|---:|:--------------------------------------------------------------------------------------------------------------------------------------------|
| 0 | PROMPT:- |
| | What is happening in the conversation below? |
| | Olivia: Who are you voting for in this election? |
| | Oliver: Liberals as always. |
| | Olivia: Me too!! |
| | Oliver: Great |
| | Please tell: |
| | Olivia and Olivier are voting for liberals in this election. |
| | |
| | What is happening in the conversation below? |
| | Catherine: hi! what's up? |
| | Kevin: Hi, sorry, who are you? |
| | Catherine: oh, I am sorry, I thought you saved my number. We were a match on tinder some time ago:P |
| | Kevin: oh, I see. Nice! |
| | Catherine: How are you doing? |
| | Kevin: quite good... you? |
| | Catherine: not bad, I'm trying to make some real bonds with people from the internet or delete them :P |
| | Kevin: how bold! |
| | Catherine: So maybe we should just meet? |
| | Kevin: I would love to, but I don't have tinder any more. |
| | Catherine: Do you have a girlfriend? |
| | Kevin: Kind of, it's very fresh and I don't want to mess it up. |
| | Catherine: I see. So you'll be deleted in a moment. hahah. I wish you good luck, hot blonde! |
| | Kevin: HAHAHHA, thanks! |
| | Catherine: :* |
| | Please tell: |
| | |
| | HUMAN_SUMMARY:- |
| | Catherine wants to meet with Kevin. They met on a tinder date. Kevin declines her, as he's in a fresh relationship. |
| | |
| | MODEL_SUMMARY:- |
| | Catherine is trying to make some real bonds with people from the internet. Kevin doesn't have tinder anymore. Kevin has a fresh girlfriend. |