import os
import gradio as gr
from dotenv import load_dotenv
from PyPDF2 import PdfReader
import tempfile
import shutilfrom langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.memory import ConversationBufferMemory
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.schema.output_parser import StrOutputParserLangChain Components:
from google.colab import userdataload_dotenv()class ResumeAnalyzer:
def __init__(self):
self.vectorstore = None
self.conversation_chain = None
self.memory = None
self.processed_files = []Class Initialization:
def extract_pdf_text(self, pdf_files):
"""Extract text from uploaded PDF files"""
if not pdf_files:
return ""
text = ""
self.processed_files = []
for pdf_file in pdf_files:
try:
# Handle file path (Gradio returns file paths as strings)
pdf_path = pdf_file if isinstance(pdf_file, str) else pdf_file.name
self.processed_files.append(os.path.basename(pdf_path))
pdf_reader = PdfReader(pdf_path)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
except Exception as e:
print(f"Error processing {pdf_path}: {str(e)}")
continue
return textFunction Breakdown:
def create_text_chunks(self, text):
"""Split text into chunks for processing"""
if not text.strip():
return []
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunksText Chunking Parameters:
def create_vectorstore(self, text_chunks):
"""Create FAISS vector store from text chunks"""
if not text_chunks:
return None
try:
embeddings = HuggingFaceEmbeddings(
model_name="hkunlp/instructor-xl",
model_kwargs={"device": "cpu"}
# model_kwargs={"device": "cuda"} # For GPU acceleration
)
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
except Exception as e:
print(f"Error creating vectorstore: {str(e)}")
return NoneVector Store Setup:
def setup_conversation_chain(self, vectorstore):
"""Setup the conversation chain with Gemini LLM"""
if not vectorstore:
return None
try:
# Initialize Gemini
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash",
temperature=0.7,
)
# Memory
self.memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)LLM Configuration:
prompt = ChatPromptTemplate.from_messages([
("system", """You are an AI assistant for a resume analyzer system.
You MUST ONLY answer questions related to resume analysis, job profiling, and candidate evaluation based on the uploaded resumes.
STRICT RULES:
1. ONLY respond to queries about:
- Finding candidates for specific job roles
- Analyzing skills and qualifications from resumes
- Comparing candidates for positions
- Extracting contact information from resumes
- Summarizing candidate profiles
- Job-related questions about the uploaded resumes
2. If asked about ANYTHING else (history, general knowledge, unrelated topics, etc.), respond with:
"I can only help with resume analysis and job profiling based on the uploaded resumes. Please ask questions about finding candidates, analyzing skills, or job-related queries."
3. IMPORTANT: If the context shows "No relevant resume information found", it means no candidates in the database match the query. In this case, respond with:
"❌ No candidates found matching your criteria. This could mean:
• No resumes in the database match the specified skills/role
• The job title or skills mentioned aren't present in the uploaded resumes
• Try broadening your search criteria or using different keywords
Consider rephrasing your query or checking if the relevant resumes were properly uploaded."
4. For valid resume-related queries with relevant context, provide:
- Full name
- Email address (if available)
- LinkedIn profile link (if available)
- Phone number (if available)
- A concise summary of their qualifications and experience
- Key skills that match the job requirements
- Years of experience (if mentioned)
5. Present information in a clear, organized format. If contact information is not available, mention "Not provided" for those fields.
6. Never make up or hallucinate information about candidates. Only use information explicitly provided in the context.
Context from uploaded resumes: {context}"""),
MessagesPlaceholder(variable_name="chat_history"),
("human", "{question}")
])Prompt Engineering:
def format_docs(docs):
if not docs:
return "No relevant resume information found."
# Check if documents have meaningful content
meaningful_docs = []
for doc in docs:
if doc.page_content and len(doc.page_content.strip()) > 10:
meaningful_docs.append(doc)
if not meaningful_docs:
return "No relevant resume information found."
return "\n\n".join(doc.page_content for doc in meaningful_docs)Document Formatting:
def get_chat_history(inputs):
return self.memory.chat_memory.messages if self.memory else []Chat History Retrieval:
def enhanced_retriever(query):
"""Enhanced retriever with similarity threshold checking"""
try:
# Perform similarity search with scores
docs_with_scores = vectorstore.similarity_search_with_score(query, k=5)
# Filter documents based on similarity threshold
# Lower scores indicate higher similarity in FAISS
similarity_threshold = 1.5 # Adjust based on your needs
relevant_docs = []
for doc, score in docs_with_scores:
if score < similarity_threshold: # Lower score = more similar
relevant_docs.append(doc)
# If no documents meet the threshold, return empty list
if not relevant_docs:
return []
return relevant_docs
except Exception as e:
print(f"Retrieval error: {e}")
return []Enhanced Retrieval Logic:
# Create the chain
rag_chain = (
{
"context": RunnableLambda(enhanced_retriever) | format_docs,
"question": RunnablePassthrough(),
"chat_history": RunnableLambda(get_chat_history)
}
| prompt
| llm
| StrOutputParser()
)RAG Pipeline:
def conversation_with_memory(question):
try:
response = rag_chain.invoke(question)
# Save to memory
if self.memory:
self.memory.chat_memory.add_user_message(question)
self.memory.chat_memory.add_ai_message(response)
return response
except Exception as e:
return f"Error processing query: {str(e)}"
return conversation_with_memoryMemory Integration:
def process_resumes(self, pdf_files, progress=gr.Progress()):
"""Process uploaded resume PDFs"""
if not pdf_files:
return "❌ No files uploaded. Please upload PDF resumes.", ""
try:
progress(0.1, desc="Extracting text from PDFs...")
# Extract text from PDFs
raw_text = self.extract_pdf_text(pdf_files)
if not raw_text.strip():
return "❌ No text could be extracted from the uploaded PDFs.", ""
progress(0.3, desc="Creating text chunks...")
# Create text chunks
text_chunks = self.create_text_chunks(raw_text)
if not text_chunks:
return "❌ Could not create text chunks from the extracted text.", ""
progress(0.6, desc="Creating vector database...")
# Create vector store
self.vectorstore = self.create_vectorstore(text_chunks)
if not self.vectorstore:
return "❌ Failed to create vector database.", ""
progress(0.8, desc="Setting up AI conversation chain...")
# Setup conversation chain
self.conversation_chain = self.setup_conversation_chain(self.vectorstore)
if not self.conversation_chain:
return "❌ Failed to setup AI conversation chain.", ""
progress(1.0, desc="Processing complete!")
success_msg = f"""✅ **Processing Complete!**
📄 **Files Processed:** {len(self.processed_files)}
📝 **Text Chunks Created:** {len(text_chunks)}
🔍 **Vector Database:** Ready
🤖 **AI System:** Initialized
**Processed Files:**
{chr(10).join(f"• {file}" for file in self.processed_files)}
You can now query for job profiles using the chat interface below."""
return success_msg, ""
except Exception as e:
return f"❌ Error processing resumes: {str(e)}", ""Processing Pipeline:
def chat_with_system(self, message, history):
"""Handle chat interactions with context validation"""
if not self.conversation_chain:
return history + [(message, "❌ Please upload and process resumes first.")], ""
if not message.strip():
return history, ""
# Check if the question is resume/job-related
if not self._is_resume_related_query(message):
response = "I can only help with resume analysis and job profiling based on the uploaded resumes. Please ask questions about finding candidates, analyzing skills, or job-related queries."
history.append((message, response))
return history, ""
try:
# Get response from the conversation chain
response = self.conversation_chain(message)
# Update chat history
history.append((message, response))
return history, ""
except Exception as e:
error_msg = f"Error: {str(e)}"
history.append((message, error_msg))
return history, ""Chat Logic:
def _is_resume_related_query(self, query):
"""Check if the query is related to resume analysis or job profiling"""
query_lower = query.lower()
# Keywords that indicate resume/job-related queries
resume_keywords = [
'candidate', 'candidates', 'resume', 'resumes', 'job', 'position', 'role',
'skill', 'skills', 'experience', 'qualification', 'qualifications',
'developer', 'engineer', 'manager', 'analyst', 'designer', 'consultant',
'hire', 'hiring', 'recruit', 'recruitment', 'interview', 'profile',
'background', 'expertise', 'competency', 'competencies', 'ability',
'python', 'java', 'javascript', 'react', 'node', 'sql', 'database',
'frontend', 'backend', 'fullstack', 'devops', 'data science', 'machine learning',
'project management', 'leadership', 'team', 'work', 'employment',
'education', 'degree', 'certification', 'portfolio', 'github',
'linkedin', 'contact', 'email', 'phone', 'name', 'find', 'search',
'best', 'suitable', 'match', 'fit', 'senior', 'junior', 'entry level',
'years of experience', 'cv', 'curriculum vitae'
]
# Check if any resume-related keywords are present
return any(keyword in query_lower for keyword in resume_keywords)Keyword Validation:
# Initialize the resume analyzer
analyzer = ResumeAnalyzer()Creates a global instance of the ResumeAnalyzer class.
def create_interface():
with gr.Blocks(
title="Resume Analyzer & Job Profiler",
theme=gr.themes.Soft(),
css="""
.header { text-align: center; margin-bottom: 20px; }
.status-box { padding: 15px; border-radius: 10px; margin: 10px 0; }
.upload-area { border: 2px dashed #ccc; padding: 20px; border-radius: 10px; }
"""
) as demo:Interface Setup:
gr.HTML("""
<div class="header">
<h1>🎯 Resume Analyzer & Job Profiler</h1>
<p>Upload resumes and find the best candidates for any job profile using AI</p>
</div>
""")Creates the main header with title and description.
with gr.Tab("📤 Upload & Process Resumes"):
gr.HTML("""
<div style="padding: 15px; border-radius: 10px; margin-bottom: 20px;">
<h3>Step 1: Upload Resume PDFs</h3>
<p>Upload multiple PDF resumes to build your candidate database. The system will extract text and create a searchable vector database.</p>
</div>
""")
with gr.Row():
with gr.Column(scale=2):
file_upload = gr.File(
label="Upload Resume PDFs",
file_count="multiple",
file_types=[".pdf"],
interactive=True
)
process_btn = gr.Button(
"🚀 Process Resumes",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
gr.HTML("""
<div style="padding: 15px; border-radius: 10px;">
<h4>📋 Requirements</h4>
<ul>
<li>PDF format only</li>
<li>Text-based PDFs (not scanned images)</li>
<li>Multiple files supported</li>
<li>Processing may take a few minutes</li>
</ul>
</div>
""")
status_output = gr.HTML(label="Processing Status")Upload Interface:
with gr.Tab("💬 Query Candidates"):
gr.HTML("""
<div style="padding: 15px; border-radius: 10px; margin-bottom: 20px;">
<h3>Step 2: Find the Best Candidates</h3>
<p>Ask questions about job profiles to find the most suitable candidates from your uploaded resumes.</p>
</div>
""")
chatbot = gr.Chatbot(
label="AI Resume Analyzer",
height=500,
placeholder="Process resumes first, then start chatting..."
)
with gr.Row():
msg_input = gr.Textbox(
label="Your Query",
placeholder="e.g., 'Who are the best candidates for a senior Python developer position?'",
lines=2,
scale=4
)
send_btn = gr.Button("Send", variant="primary", scale=1)
gr.Examples(
examples=[
"Who are the best candidates for a software engineer position?",
"Find candidates with React.js and Node.js experience",
"Show me candidates suitable for a data scientist role",
"Who has the most experience in machine learning?",
"Find candidates with project management experience",
"Show me candidates with both frontend and backend skills"
],
inputs=msg_input,
label="Example Queries"
)Chat Interface:
with gr.Tab("ℹ️ About"):
gr.HTML("""
<div style="padding: 20px;">
<h2>About Resume Analyzer & Job Profiler</h2>
<h3>🔧 Technology Stack</h3>
<ul>
<li><strong>LangChain:</strong> Framework for building AI applications</li>
<li><strong>FAISS:</strong> Vector database for similarity search</li>
<li><strong>Google Gemini AI:</strong> Advanced language model</li>
<li><strong>HuggingFace Embeddings:</strong> Text embedding generation</li>
<li><strong>Gradio:</strong> Web interface framework</li>
</ul>
<h3>📋 How It Works</h3>
<ol>
<li><strong>Upload:</strong> Upload multiple PDF resumes</li>
<li><strong>Process:</strong> System extracts text and creates vector embeddings</li>
<li><strong>Query:</strong> Ask for candidates matching specific job profiles</li>
<li><strong>Results:</strong> AI analyzes and returns best matching candidates</li>
</ol>
<h3>🎯 Use Cases</h3>
<ul>
<li>HR recruitment and candidate screening</li>
<li>Talent acquisition for specific roles</li>
<li>Resume database management</li>
<li>Quick candidate profiling</li>
</ul>
<h3>⚙️ Setup Requirements</h3>
<p>Make sure you have the following API keys configured:</p>
<ul>
<li><code>GOOGLE_API_KEY</code> - For Gemini AI</li>
<li><code>HUGGINGFACEHUB_API_TOKEN</code> - For embeddings</li>
</ul>
</div>
""")Provides comprehensive documentation about the application, including technology stack, workflow, use cases, and setup requirements.
# Event handlers
process_btn.click(
fn=analyzer.process_resumes,
inputs=[file_upload],
outputs=[status_output, msg_input],
show_progress=True
)
send_btn.click(
fn=analyzer.chat_with_system,
inputs=[msg_input, chatbot],
outputs=[chatbot, msg_input]
)
msg_input.submit(
fn=analyzer.chat_with_system,
inputs=[msg_input, chatbot],
outputs=[chatbot, msg_input]
)Event Binding:
if __name__ == "__main__":
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get('HUGGINGFACEHUB_API_TOKEN')
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')
# Check for required environment variables
required_vars = ["GOOGLE_API_KEY", "HUGGINGFACEHUB_API_TOKEN"]
missing_vars = [var for var in required_vars if not os.getenv(var)]
if missing_vars:
print(f"⚠️ Missing environment variables: {', '.join(missing_vars)}")
print("Please set these variables in your .env file or environment")
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
debug=True
)Launch Configuration:
This application effectively combines modern AI technologies to create a powerful resume analysis and candidate matching system.