基于Python实现爬虫+协同过滤算法的招聘信息推荐系统
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# 初始化一些变量
job_descriptions = []
job_titles = []
jobs = []
# 定义一个函数来获取招聘信息
def get_job_info(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
jobs_on_page = soup.find_all('div', class_='job-card-container')
for job in jobs_on_page:
job_title = job.find('h2', class_='title').text
job_description = job.find('div', class_='description').text
job_descriptions.append(job_description)
job_titles.append(job_title)
jobs.append({'title': job_title, 'description': job_description})
# 获取招聘信息
get_job_info('https://www.seek.com.au/jobs/in-all-jobs?page=1')
# 将工作描述和标题转换为TF-IDF向量
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(job_descriptions)
cosine_sim = cosine_similarity(tfidf_matrix)
# 定义一个函数来获得相似工作的标题
def get_similar_jobs(job_title, cosine_sim):
idx = job_titles.index(job_title)
similar_job_indices = cosine_sim[idx].argsort()[::-1][1:6]
return [jobs[i] for i in similar_job_indices]
# 获取与特定工作描述相似的工作
similar_jobs = get_similar_jobs('Data Scientist', cosine_sim)
# 打印出相似的工作
for job in similar_jobs:
print(job['title'])
这个简化的代码实例展示了如何使用Python爬取招聘信息,并使用TF-IDF和cosine相似性来找出相似的工作描述。这个系统可以作为一个基础来进行工作相关性的研究和推荐。
评论已关闭