python generate questions for a given context
# for a given context, generate a question using an ai model # https://pythonprogrammingsnippets.tumblr.com import torch device = torch.device("cpu") from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("voidful/context-only-question-generator") model = AutoModelForSeq2SeqLM.from_pretrained("voidful/context-only-question-generator").to(device) def get_questions_for_context(context, model, tokenizer, num_count=5): inputs = tokenizer(context, return_tensors="pt") with torch.no_grad(): outputs = model.generate(**inputs, num_beams=num_count, num_return_sequences=num_count) return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs] def get_question_for_context(context, model, tokenizer): return get_questions_for_context(context, model, tokenizer)[0] # send array of sentences, and the function will return an array of questions def context_sentences_to_questions(context, model, tokenizer): questions = [] for sentence in context.split("."): if len(sentence) < 1: continue # skip blanks question = get_question_for_context(sentence, model, tokenizer) questions.append(question) return questions
example 1 (split a string by "." and process):
context = "The capital of France is Paris." context += "The capital of Germany is Berlin." context += "The capital of Spain is Madrid." context += "He is a dog named Robert." if len(context.split(".")) > 2: questions = [] for sentence in context.split("."): if len(sentence) < 1: continue # skip blanks question = get_question_for_context(sentence, model, tokenizer) questions.append(question) print(questions) else: question = get_question_for_context(context, model, tokenizer) print(question)
output:
['What is the capital of France?', 'What is the capital of Germany?', 'What is the capital of Spain?', 'Who is Robert?']
example 2 (generate multiple questions for a given context):
print("\r\n\r\n") context = "She walked to the store to buy a jug of milk." print("Context:\r\n", context) print("") questions = get_questions_for_context(context, model, tokenizer, num_count=15) # pretty print all the questions print("Generated Questions:") for question in questions: print(question) print("\r\n\r\n")
output:
Generated Questions: Where did she go to buy milk? What did she walk to the store to buy? Why did she walk to the store to buy milk? Why did she go to the store? Why did she go to the grocery store? What did she go to the store to buy? Where did the woman go to buy milk? Why did she go to the store to buy milk? What did she buy at the grocery store? Why did she walk to the store? What kind of milk did she buy at the store? Where did she walk to buy milk? What kind of milk did she buy? Where did she go to get milk? What did she buy at the store?
and if we wanted to answer those questions (ez pz):
# now generate an answer for a given question from transformers import AutoTokenizer, AutoModelForQuestionAnswering tokenizer = AutoTokenizer.from_pretrained("deepset/tinyroberta-squad2") model = AutoModelForQuestionAnswering.from_pretrained("deepset/tinyroberta-squad2") def get_answer_for_question(question, context, model, tokenizer): inputs = tokenizer(question, context, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) answer_start_index = outputs.start_logits.argmax() answer_end_index = outputs.end_logits.argmax() predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] tokenizer.decode(predict_answer_tokens, skip_special_tokens=True) target_start_index = torch.tensor([14]) target_end_index = torch.tensor([15]) outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) loss = outputs.loss answer = tokenizer.decode(predict_answer_tokens, skip_special_tokens=True) return answer print("Context:\r\n", context, "\r\n") for question in questions: # right pad the question to 60 characters question_text = question.ljust(50) answer = get_answer_for_question(question, context, model, tokenizer) print("Question: ", question_text, "Answer: ", answer)
16 notes
·
View notes
So I'm back with games with machine learning! This time I had two tasks: name generation and text generation. The first was funny, but I'll tell more about that another time. As you might guess, I decided to train the text generation model on the books of Leigh Bardugo. I chose "Six of Crows" and "Crooked Kingdom".
The model is trained for a looooong time. A very long time, and I'm terribly impatient. However, I decided to show you what I have at the moment.
The text is generated until the number of words exceeds 60. As you can see from the passages below, the model got confused with punctuation marks at first, but then gradually began to add words. It turned out to be a very exciting experience. Later I want to increase the number of epochs from 21 to 100 and see what happens.
Epoch 1
train loss: 8.31648
Kaz Brekker Jesper , you of to , of . of . . to the , to , . of , . . of to of the , to the to . of . the , . the to the , the , the . . . to . . the . the . of to , the the . of the to . , to the , of the of of to . . , . the of . of the
------------------------------------------------------------------------------------------
Epoch 2
train loss: 6.48834
Kaz Brekker “ . “ , “ “ the the , ’ ’ ’ the ’ the ’ the , , ’ , ” ’ , , the . . “ ’ , the ” , ” , . , . the ’ . . . ’ the the . the . , , ’ , . “ the the ” “ the ’ . “ “ the “ . “ the . the ’ . “ the . ’ the .
------------------------------------------------------------------------------------------
Epoch 8
train loss: 6.01746
Kaz Brekker Jesper . He was . He . “ I . I to a to the ” The , . I . She the , but . The , ” He to the . I ’ s . I ’ t . The ’ . I to , and a . I the . He . She , . “ I “ I . She , but ” “ I “ I . “ the and ” The ’ s ’
------------------------------------------------------------------------------------------
Epoch 9
train loss: 5.93933
Kaz Brekker Jesper “ “ You ’ . “ The , the the , . The and ” “ “ “ “ “ The and and , the and the ” He . He . He was a the , but the and , the , ” The , ” The , . She to his to . The ’ s the and , ” “ You ” the and the the . He . “ “ The ’ s to ,
------------------------------------------------------------------------------------------
Epoch 12
train loss: 5.72819
Kaz Brekker can ’ ll he could his to her . She was ’ d , the , and a . “ The ” He , and a ’ t to her to the ” He said , but , but ” said to a ” “ “ It ’ ll his . The the . She had been . He . I said . He was was to his and the , and his , and ” “ “ It was
------------------------------------------------------------------------------------------
Epoch 13
train loss: 5.66486
Kaz Brekker and the but the Barrel of her to his ” Nina was was ’ re a ” The and and the the but and ” “ “ The he , the other to be . She , the the ” Kaz said to a , the and and ” “ It ’ ll a . “ I said . They , he was was was ’ t to her ” She ’ s a few . “ The and a
------------------------------------------------------------------------------------------
Epoch 16
train loss: 5.49819
Kaz Brekker can can said You ’ re a way . The other . The he had ’ s . “ What he was ’ s , ” Nina , but you said ” “ It said . She said , but the ” “ The ” “ The other , and you said the other . Kaz had the Barrel and his and ” said . She could his , the and he said ” Nina ’ re , but she
------------------------------------------------------------------------------------------
Epoch 17
train loss: 5.44922
Kaz Brekker t know Jesper . It ’ d been . “ The ” “ What ’ re the way ” Kaz ’ re his father ’ d a way , he could a few to the way to his , but the Ice the ” said ” Nina ’ ll her and his and his father , he could you , the ” Kaz said to her and the way . The he had to a little , the Ice ’
------------------------------------------------------------------------------------------
Epoch 19
train loss: 5.35786
Kaz Brekker was face and you have a little , he said , he said . Kaz said . She was had a , ” said , and a way . “ It could have , he could his hands , but he said , he could have been to his hands , ” said the the way to a , but the ” he had the other . She could be , the other and she had a little . She
------------------------------------------------------------------------------------------
Epoch 20
train loss: 5.31483
Kaz Brekker d “ The other ” “ The guards ? I ’ ll be a way of a few . I was to the door to be ” Nina ’ t a few . The boy ’ d ’ re the ” “ The boy , but she was ’ t be a few and his head of . “ It could the city and he was a little ” Wylan was . The other , but you said ” “
------------------------------------------------------------------------------------------
Epoch 21
train loss: 5.27195
Kaz Brekker d said ” “ You can had been , but she had been the ” he said . Kaz . He ’ t be a , and she ’ t the the city and he said . They could a long in the ” “ You can was . I had a hand , he was to a little ” Kaz was the way of his eyes of . He had ’ re her and the the other of his
...Then I tried to train the model on the Russian version of the books, and the result was much better already in the first epochs. At the moment, the model is still being trained, and texts looks a little stupid, but here are results anyway:
Epoch 2
train loss: 5.78698
Каз Бреккер . Он не был покрыт и он услышал его и , но он не мог понять в этом . – Это не было времени ? Он не хочу . Матиас почувствовал его за дверью на нее с Матиасом и в конце , и они все равно знал с ним – он с тобой , что ты хочешь . Когда она не было . – Что я не могу его ? Джеспер кивнул – и на них и не будет ждать
Epoch 3
train loss: 5.12968
Каз Бреккер , но не не не мог понять . – Это был готов , что я хочу сказать на него , чтобы я бы тебе , когда ты хочешь . Джеспер знал об этом , и все это напомнило Инеж с Матиасом в том месте . Он думал ? – Нет не не было назвать . Он знал ли они в том с пульсом ? Он взял бровь . Каз был не было бы быть . – Что , что ты
Epoch 4
train loss: 4.61884
Каз Бреккер услышал приближающийся топот глиной . « Я бы хотел , и все его лицо – Призрак . Он был весь количество , но и так не остановятся в том направлении и не так далеко . Джеспер присел рядом с Матиасом . Матиас почувствовал легкую на него , но все его пристрелить ее в воздухе с пульсом на песке . « Я бы предпочел забрать раз , когда я не мог перестать лифт , что я не могу заставить ее жизни
5 notes
·
View notes
python iterative monte carlo search for text generation using nltk
You are playing a game and you want to win. But you don't know what move to make next, because you don't know what the other player will do. So, you decide to try different moves randomly and see what happens. You repeat this process again and again, each time learning from the result of the move you made. This is called iterative Monte Carlo search. It's like making random moves in a game and learning from the outcome each time until you find the best move to win.
Iterative Monte Carlo search is a technique used in AI to explore a large space of possible solutions to find the best ones. It can be applied to semantic synonym finding by randomly selecting synonyms, generating sentences, and analyzing their context to refine the selection.
# an iterative monte carlo search example using nltk # https://pythonprogrammingsnippets.tumblr.com import random from nltk.corpus import wordnet # Define a function to get the synonyms of a word using wordnet def get_synonyms(word): synonyms = [] for syn in wordnet.synsets(word): for l in syn.lemmas(): if '_' not in l.name(): synonyms.append(l.name()) return list(set(synonyms)) # Define a function to get a random variant of a word def get_random_variant(word): synonyms = get_synonyms(word) if len(synonyms) == 0: return word else: return random.choice(synonyms) # Define a function to get the score of a candidate sentence def get_score(candidate): return len(candidate) # Define a function to perform one iteration of the monte carlo search def monte_carlo_search(candidate): variants = [get_random_variant(word) for word in candidate.split()] max_candidate = ' '.join(variants) max_score = get_score(max_candidate) for i in range(100): variants = [get_random_variant(word) for word in candidate.split()] candidate = ' '.join(variants) score = get_score(candidate) if score > max_score: max_score = score max_candidate = candidate return max_candidate initial_candidate = "This is an example sentence." # Perform 10 iterations of the monte carlo search for i in range(10): initial_candidate = monte_carlo_search(initial_candidate) print(initial_candidate)
output:
This manufacture Associate_in_Nursing theoretical_account sentence. This fabricate Associate_in_Nursing theoretical_account sentence. This construct Associate_in_Nursing theoretical_account sentence. This cathode-ray_oscilloscope Associate_in_Nursing counteract sentence. This collapse Associate_in_Nursing computed_axial_tomography sentence. This waste_one's_time Associate_in_Nursing gossip sentence. This magnetic_inclination Associate_in_Nursing temptingness sentence. This magnetic_inclination Associate_in_Nursing conjure sentence. This magnetic_inclination Associate_in_Nursing controversy sentence. This inclination Associate_in_Nursing magnetic_inclination sentence.
2 notes
·
View notes