Здравствуйте. Встал вопрос, при попытки сохранить в БД. Выдает ошибку
NOT NULL constraint failed: scrapping_vacancy.language_id
Подскажите пож-та, как исправить ее?
from django.db import models
from scrapping.utilis import from_cyrillic_to_eng
Create your models here.
class City(models.Model):
name = models.CharField(max_length=50, verbose_name='Название населенного пункта')
slug = models.CharField(max_length=50, blank=True, unique=True)
class Meta:
verbose_name = 'Название населенного пункта'
verbose_name_plural = 'Название населеных пунктов'
def __str__(self):
return self.name
def save(self, *args, **kwargs):
if not self.slug:
self.slug = from_cyrillic_to_eng(self.name)
super().save(*args, **kwargs)
class Language(models.Model):
name = models.CharField(max_length=50, verbose_name='Язык программирования')
slug = models.CharField(max_length=50, blank=True, unique=True)
class Meta:
verbose_name = 'Язык программирования'
verbose_name_plural = 'Языки программирования'
def __str__(self):
return self.name
def save(self, *args, **kwargs):
if not self.slug:
self.slug = from_cyrillic_to_eng(str(self.name))
super().save(*args, **kwargs)
class Vacancy(models.Model):
url = models.URLField(unique=True)
title = models.CharField(max_length=250, verbose_name='Заголовок вакансии')
company = models.CharField(max_length=250, verbose_name='Компания')
description = models.TextField(verbose_name='Описание вакансии',null=True)
city = models.ForeignKey('City', on_delete=models.CASCADE,
verbose_name='Город', related_name='vacancies')
language = models.ForeignKey('Language', on_delete=models.CASCADE,
verbose_name='Язык программирования')
timestamp = models.DateField(auto_now_add=True)
class Meta:
verbose_name = 'Вакансия'
verbose_name_plural = 'Вакансии'
def __str__(self):
return self.title
Файл из которого беру функции для парсинга
import requests
from random import choice
from bs4 import BeautifulSoup
all = ('extract_max_page_hh', 'extract_hh', 'extract_max_page_stack', 'extract_stackoverflow')
def random_headers():
headers = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0']
return {'User-Agent': choice(headers),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'}
def extract_max_page_hh():
url = 'https://hh.ru/search/vacancy?L_is_autosearch=false&clusters=true&enable_snippets=true&items_on_page=100&no_magic=true&text=python&'
hh_request = requests.get(url, headers=random_headers())
hh_soup = BeautifulSoup(hh_request.text, 'lxml')
pages = []
paginator = hh_soup.find_all('span', {'class': 'pager-item-not-in-short-range'})
for page in paginator:
pages.append(int(page.find('a').text))
return pages[-1]
def extract_hh(url):
jobs = []
numbers = extract_max_page_hh()
for page in range(int(numbers)):
# url = 'https://hh.ru/search/vacancy?L_is_autosearch=false&area=113&clusters=true&enable_snippets=true&text=python&page='
hh_request = requests.get(f'{url}page={page}', headers=random_headers())
soup = BeautifulSoup(hh_request.text, 'lxml')
results = soup.find_all('div', {'class': 'vacancy-serp-item'})
for result in results:
title = result.find('a').text
link = result.find('a')['href']
company = result.find('div', {'class': "vacancy-serp-item__meta-info-company"}).find('a').text
company = company.strip()
content = result.find('div', {'class': 'g-user-content'}).text
jobs.append({'title': title, 'description': content, 'company': company, 'url': link})
return jobs
def extract_max_page_stack():
url = 'https://stackoverflow.com/jobs?q=python'
request = requests.get(url, headers=random_headers())
soup = BeautifulSoup(request.text, 'lxml')
pages = soup.find('div', {'class': 's-pagination'}).find_all('a')
last_page = pages[-2].get_text(strip=True)
return last_page
def extract_stackoverflow(url):
jobs = []
numbers = extract_max_page_stack()
for number in range(int(numbers)):
# url = 'https://stackoverflow.com/jobs?q=python'
request = requests.get(f'{url}&pg={number + 1}', headers=random_headers())
soup = BeautifulSoup(request.text, 'lxml')
results = soup.find_all('div', {'class': 'grid--cell fl1'})
for result in results:
title = result.find('h2', {'class': 'mb4 fc-black-800 fs-body3'}).find('a').text
company = result.find('h3', {'class': 'fc-black-700 fs-body1 mb4'}).find('span').text
href = result.find('h2', {'class': 'mb4 fc-black-800 fs-body3'}).find('a').get('href')
link = f'https://stackoverflow.com{href}/'
jobs.append({'title': title, 'description': '', 'company': company, 'url': link})
return jobs
import os
import sys
proj = os.path.dirname(os.path.abspath('manage.py'))
sys.path.append('proj')
os.environ['DJANGO_SETTINGS_MODULE'] = 'scrapping_service2.settings'
import django
django.setup()
from scrapping.hh import *
from scrapping.models import Vacancy, City, Language
from django.db import DatabaseError
city = City.objects.filter(slug='kiev').first()
language = Language.objects.filter(slug='python').first()
parsers = (
(extract_hh,
'https://hh.ru/search/vacancy?L_is_autosearch=false&area=113&clusters=true&enable_snippets=true&text=python&page='),
(extract_stackoverflow, 'https://stackoverflow.com/jobs?q=python')
)
jobs = []
for func, url in parsers:
j = func(url)
jobs += j
for job in jobs:
v = Vacancy(**job,city=city,language=language)
try:
v.save()
except DatabaseError as e:
print(e)