ملف واحد
واحتاج الى كتابة اسكربت بايثون لهذا العمل
import pandas as pd
from sentence_transformers import SentenceTransformer, util
# Load data
df = pd.read_excel("files/similar_services.xlsx")
texts = df["name"].astype(str).tolist()
# Load multilingual model
model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
# Encode into embeddings
embeddings = model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
# Compute cosine similarity matrix
cosine_scores = util.cos_sim(embeddings, embeddings)
# Collect similar pairs
matches = []
threshold = 0.80 # Adjust threshold as needed
for i in range(len(texts)):
for j in range(i + 1, len(texts)):
score = cosine_scores[i][j].item()
if score >= threshold:
matches.append({
"service_1": texts[i],
"service_2": texts[j],
"similarity": round(score, 3)
})
similar_df = pd.DataFrame(matches)
similar_df.to_excel("similar_services_output.xlsx", index=False)
print("Done. Saved to similar_services_output.xlsx")
سويت هذا الكود بس الامور ما ظبطت