File size: 5,055 Bytes
4ede186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""Core resume analysis logic"""
import streamlit as st
import re
from models.llm_client import LLMClient
from agents.resume_extractor import ResumeExtractor
from agents.jd_summarizer import JobDescriptionSummarizer
from agents.matcher import ResumeJDMatcher
from agents.shortlister import Shortlister
from db.database import ResumeMatchDB

def extract_candidate_info(resume_text):
    """Extract candidate name and email from resume text"""
    email_pattern = r'[\w\.-]+@[\w\.-]+\.\w+'
    email = re.search(email_pattern, resume_text)
    email = email.group(0) if email else "Not found"
    
    lines = resume_text.split('\n')
    name = "Not found"
    
    name_patterns = [
        r'^[A-Z][a-z]+\s+[A-Z][a-z]+$',
        r'^[A-Z][a-z]+\s+[A-Z]\.\s+[A-Z][a-z]+$',
        r'^[A-Z][a-z]+\s+[A-Z][a-z]+\s+[A-Z][a-z]+$'
    ]
    
    for line in lines:
        line = line.strip()
        if line and '@' not in line:
            for pattern in name_patterns:
                if re.match(pattern, line):
                    name = line
                    break
            if name != "Not found":
                break
                
    return name, email

def analyze_resumes(uploaded_files, job_descriptions, api_key, model_name, base_url):
    """Analyze resumes and store results in session state"""
    progress_bar = st.progress(0)
    status_text = st.empty()
    
    with st.spinner("Analyzing resumes..."):
        results = []
        total_steps = len(uploaded_files) * len(job_descriptions)
        current_step = 0
        
        db = ResumeMatchDB()
        
        for uploaded_file in uploaded_files:
            status_text.text(f"πŸ“„ Processing {uploaded_file.name}...")
            extractor = ResumeExtractor(uploaded_file)
            resume_text = extractor.get_resume_text()
            
            if not resume_text or len(resume_text.strip()) < 10:
                st.error(f"❌ Could not extract text from {uploaded_file.name}. File may be corrupted or empty.")
                continue
            
            candidate_name, candidate_email = extract_candidate_info(resume_text)
            
            candidate_id = db.insert_candidate(
                name=candidate_name,
                email=candidate_email,
                resume_path=uploaded_file.name
            )
            
            resume_results = []
            for jd in job_descriptions:
                current_step += 1
                progress = current_step / total_steps
                progress_bar.progress(progress)
                
                status_text.text(f"πŸ” Matching with {jd['title']}...")
                jd_agent = JobDescriptionSummarizer(jd['content'])
                jd_summary = jd_agent.get_summary()
                
                if not jd_summary or len(jd_summary.strip()) < 10:
                    st.error(f"❌ Could not process job description: {jd['title']}")
                    continue
                
                llm = LLMClient(api_key=api_key, model_name=model_name, base_url=base_url)
                matcher = ResumeJDMatcher(llm)
                shortlister = Shortlister(threshold=70.0)
                
                match_result = matcher.match_resume_to_job(resume_text, jd_summary)
                match_percent = shortlister.compute_final_score(match_result)
                is_shortlisted = shortlister.is_shortlisted(match_percent)
                
                job_id = db.insert_job_description(
                    title=jd['title'],
                    description=jd['content']
                )
                
                match_data = {
                    'match_score': match_percent,
                    'skills_match': match_result['skills_match'],
                    'experience_match': match_result['experience_match'],
                    'education_match': match_result['education_match'],
                    'certifications_match': match_result['certifications_match'],
                    'summary': match_result['summary'],
                    'is_shortlisted': is_shortlisted
                }
                db.insert_match_result(candidate_id, job_id, match_data)
                
                resume_results.append({
                    "job_title": jd['title'],
                    "match_score": match_percent,
                    "is_shortlisted": is_shortlisted,
                    "details": match_result,
                    "job_id": job_id
                })
            
            if resume_results:
                best_match = max(resume_results, key=lambda x: x['match_score'])
                
                results.append({
                    "candidate_name": candidate_name,
                    "candidate_email": candidate_email,
                    "resume_name": uploaded_file.name,
                    "best_match": best_match,
                    "candidate_id": candidate_id
                })
        
        progress_bar.empty()
        status_text.empty()
        st.session_state.results = results