Giter Club home page Giter Club logo

30days_leetcode_april_challenge's Introduction

LinkedIn Badge

hey there

👩‍💻  About Me :

I am a Machine Learning Engineer from India.

  • 🌱 I work at Micron Technology
  • ⚡ I work on ML/AI based applications and research
  • 📫 I design AI/ML based large scale products/solutions

30days_leetcode_april_challenge's People

Contributors

uthambathoju avatar

Watchers

 avatar

30days_leetcode_april_challenge's Issues

aa

import matplotlib.pyplot as plt

x = range(1, 11) # X-axis values from 1 to 10
y = [5, 6, 4, 8, 7, 9, 5, 6, 7, 4] # Example y-axis values in minutes

plt.plot(x, y, marker='o', linestyle='-', color='b') # Plotting the data points

plt.xlabel('X-axis')
plt.ylabel('Time (minutes)')
plt.title('Time vs. X-axis')

plt.xticks(range(1, 11)) # Setting the x-axis tick marks
plt.yticks(range(0, 11, 2)) # Setting the y-axis tick marks

plt.grid(True) # Adding a grid

plt.show()

b

def generate_tf_idf_with_corpus(data , query):
tf_idf = []
index = 0
query_tokens = query.split()
df = pd.DataFrame(columns=['doc'] + query_tokens)
for doc in data:
df['doc'] = np.arange(0 , len(data))
doc_num = norm_tf[index]
sentence = doc.split()
#print(sentence)
for word in sentence:
for text in query_tokens:
if(text == word):
idx = sentence.index(word)
tf_idf_score = doc_num[idx] * idf_dict[word]
tf_idf.append(tf_idf_score)
df.iloc[index, df.columns.get_loc(word)] = tf_idf_score
index += 1
df.fillna(0 , axis=1, inplace=True)
return tf_idf , df
tf_idf , df = generate_tf_idf_with_corpus(data , query)
#print(tf_idf)

a

def generate_tf_query(query):
query_norm_tf =[]
query_tf_word =[]
tokens = query.split()
for word in tokens:
query_tf_word.append(word)
query_norm_tf.append(termFrequency(word , query))
return query_norm_tf
query_norm_tf = generate_tf_query(query)
print(query_norm_tf)

def generate_tf_idf_with_corpus(data , query):
tf_idf = []
index = 0
query_tokens = query.split()
df = pd.DataFrame(columns=['doc'] + query_tokens)
for doc in data:
df['doc'] = np.arange(0 , len(data))
doc_num = norm_tf[index]
sentence = doc.split()
#print(sentence)
for word in sentence:
for text in query_tokens:
if(text == word):
idx = sentence.index(word)
tf_idf_score = doc_num[idx] * idf_dict[word]
tf_idf.append(tf_idf_score)
df.iloc[index, df.columns.get_loc(word)] = tf_idf_score
index += 1
df.fillna(0 , axis=1, inplace=True)
return tf_idf , df
tf_idf , df = generate_tf_idf_with_corpus(data , query)
#print(tf_idf)

def generate_idf_query(query):
idf_score =[]
idf_word =[]
sentence = query.split()
for word in sentence:
idf_word.append(word)
idf_score.append(inverseDocumentFrequency(word ,data))
idf_dict_qry = dict(zip(idf_word, idf_score))
return idf_dict_qry
idf_dict_qry = generate_idf_query(query)
print(idf_dict_qry)

def generate_tf_idf_for_query(query):
tf_idf_qry = []
tf_idf_tokens =[]
sentence = query.split()
for word in sentence:
idx = sentence.index(word)
tf_idf_tokens.append(word)
tfidf_score = query_norm_tf[idx] * idf_dict_qry[word]
tf_idf_qry.append(tfidf_score)
tfidf_dict_qry = dict(zip(tf_idf_tokens, tf_idf_qry))
return tf_idf_qry , tfidf_dict_qry
tf_idf_qry , tfidf_dict_qry = generate_tf_idf_for_query(query)
print(tfidf_dict_qry)

#Cosine Similarity(Query,Document1) = Dot product(Query, Document1) / ||Query|| * ||Document1||
def cosine_similarity(tfidf_dict_qry, df , query , doc_num):
dot_product = 0
qry_mod = 0
doc_mod = 0
tokens = query.split()
#dot product
#df['life'][(df['doc'] == 0)]
for keyword in tokens:
#print(keyword)
#print(tfidf_dict_qry[keyword])
#print(val)
dot_product += tfidf_dict_qry[keyword] * df[keyword][df['doc'] == doc_num]
#print("DOT: " , dot_product)
#||Query||
qry_mod += tfidf_dict_qry[keyword] * tfidf_dict_qry[keyword]
#||Document||
doc_mod += df[keyword][df['doc'] == doc_num] * df[keyword][df['doc'] == doc_num]
qry_mod = np.sqrt(qry_mod)
doc_mod = np.sqrt(doc_mod)
#implement formula
denominator = qry_mod * doc_mod
#print(denominator)
cos_sim = dot_product/denominator
return cos_sim

def rank_similarity_docs(data):
cos_sim =[]
for doc_num in range(0 , len(data)):
#val = cosine_similarity(tfidf_dict_qry, df , query , doc_num).tolist()
cos_sim.append(cosine_similarity(tfidf_dict_qry, df , query , doc_num).tolist())
return cos_sim
cos_sim = rank_similarity_docs(data)

import itertools
cos_sim = list(itertools.chain(*cos_sim))

result_dict = dict(zip([0 , 1 ,2 ] , cos_sim))
print(result_dict)
similar_docs = sorted(result_dict.items(), key=operator.itemgetter(1) , reverse=True)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.